From 3347fa0928210d96aaa2bd6cd5a8391d5e630873 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Sep 2016 15:49:32 -0400
Subject: workqueue: make workqueue available early during boot

Workqueue is currently initialized in an early init call; however,
there are cases where early boot code has to be split and reordered to
come after workqueue initialization or the same code path which makes
use of workqueues is used both before workqueue initailization and
after.  The latter cases have to gate workqueue usages with
keventd_up() tests, which is nasty and easy to get wrong.

Workqueue usages have become widespread and it'd be a lot more
convenient if it can be used very early from boot.  This patch splits
workqueue initialization into two steps.  workqueue_init_early() which
sets up the basic data structures so that workqueues can be created
and work items queued, and workqueue_init() which actually brings up
workqueues online and starts executing queued work items.  The former
step can be done very early during boot once memory allocation,
cpumasks and idr are initialized.  The latter right after kthreads
become available.

This allows work item queueing and canceling from very early boot
which is what most of these use cases want.

* As systemd_wq being initialized doesn't indicate that workqueue is
  fully online anymore, update keventd_up() to test wq_online instead.
  The follow-up patches will get rid of all its usages and the
  function itself.

* Flushing doesn't make sense before workqueue is fully initialized.
  The flush functions trigger WARN and return immediately before fully
  online.

* Work items are never in-flight before fully online.  Canceling can
  always succeed by skipping the flush step.

* Some code paths can no longer assume to be called with irq enabled
  as irq is disabled during early boot.  Use irqsave/restore
  operations instead.

v2: Watchdog init, which requires timer to be running, moved from
    workqueue_init_early() to workqueue_init().

Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/CA+55aFx0vPuMuxn00rBSM192n-Du5uxy+4AvKa0SBSOVJeuCGg@mail.gmail.com
---
 include/linux/workqueue.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 26cc1df280d6..91d416f9c0a7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -358,6 +358,8 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
+extern bool wq_online;
+
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
@@ -594,7 +596,7 @@ static inline bool schedule_delayed_work(struct delayed_work *dwork,
  */
 static inline bool keventd_up(void)
 {
-	return system_wq != NULL;
+	return wq_online;
 }
 
 #ifndef CONFIG_SMP
@@ -631,4 +633,7 @@ int workqueue_online_cpu(unsigned int cpu);
 int workqueue_offline_cpu(unsigned int cpu);
 #endif
 
+int __init workqueue_init_early(void);
+int __init workqueue_init(void);
+
 #endif
-- 
cgit v1.2.3


From 863b710b664bdcb90c0c682ee24adb368f497a5b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Sep 2016 15:49:34 -0400
Subject: workqueue: remove keventd_up()

keventd_up() no longer has in-kernel users.  Remove it and make
wq_online static.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 10 ----------
 kernel/workqueue.c        |  2 +-
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 91d416f9c0a7..56417133c672 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -358,8 +358,6 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
-extern bool wq_online;
-
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
@@ -591,14 +589,6 @@ static inline bool schedule_delayed_work(struct delayed_work *dwork,
 	return queue_delayed_work(system_wq, dwork, delay);
 }
 
-/**
- * keventd_up - is workqueue initialized yet?
- */
-static inline bool keventd_up(void)
-{
-	return wq_online;
-}
-
 #ifndef CONFIG_SMP
 static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 15d0811c9e91..ad0cd439223b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -290,7 +290,7 @@ module_param_named(disable_numa, wq_disable_numa, bool, 0444);
 static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
 module_param_named(power_efficient, wq_power_efficient, bool, 0444);
 
-bool wq_online;				/* can kworkers be created yet? */
+static bool wq_online;			/* can kworkers be created yet? */
 
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 
-- 
cgit v1.2.3


From 43ece27e70b2c756e45306791955507f0533e248 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 23 Sep 2016 17:19:41 +0200
Subject: iio:trigger: Add helper function to verify that a trigger belongs to
 the same device

Some triggers can only be attached to the IIO device that corresponds to
the same physical device. Currently each driver that requires this
implements its own trigger validation function.

Introduce a new helper function called iio_trigger_validate_own_device()
that can be used to do this check. Having a common implementation avoids
code duplication and unnecessary boiler-plate code.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-trigger.c | 21 +++++++++++++++++++++
 include/linux/iio/trigger.h        |  2 ++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index e1e104845e38..978729f6d7c4 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -717,6 +717,27 @@ bool iio_trigger_using_own(struct iio_dev *indio_dev)
 }
 EXPORT_SYMBOL(iio_trigger_using_own);
 
+/**
+ * iio_trigger_validate_own_device - Check if a trigger and IIO device belong to
+ *  the same device
+ * @trig: The IIO trigger to check
+ * @indio_dev: the IIO device to check
+ *
+ * This function can be used as the validate_device callback for triggers that
+ * can only be attached to their own device.
+ *
+ * Return: 0 if both the trigger and the IIO device belong to the same
+ * device, -EINVAL otherwise.
+ */
+int iio_trigger_validate_own_device(struct iio_trigger *trig,
+	struct iio_dev *indio_dev)
+{
+	if (indio_dev->dev.parent != trig->dev.parent)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(iio_trigger_validate_own_device);
+
 void iio_device_register_trigger_consumer(struct iio_dev *indio_dev)
 {
 	indio_dev->groups[indio_dev->groupcounter++] =
diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 4f1154f7a33c..ea08302f2d7b 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -170,6 +170,8 @@ void iio_trigger_free(struct iio_trigger *trig);
  */
 bool iio_trigger_using_own(struct iio_dev *indio_dev);
 
+int iio_trigger_validate_own_device(struct iio_trigger *trig,
+				     struct iio_dev *indio_dev);
 
 #else
 struct iio_trigger;
-- 
cgit v1.2.3


From 0023e67dd8951737588b8af0469446df3ec52afe Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@gmail.com>
Date: Fri, 23 Sep 2016 23:04:07 -0700
Subject: iio: inkern: add iio_read_channel_offset helper

Allow access to underlying channel IIO_CHAN_INFO_OFFSET from a consumer.

Signed-off-by: Matt Ranostay <matt@ranostay.consulting>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/inkern.c         | 39 ++++++++++++++++++++++++++-------------
 include/linux/iio/consumer.h | 13 +++++++++++++
 2 files changed, 39 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index c4757e6367e7..29df11572858 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -658,6 +658,31 @@ err_unlock:
 }
 EXPORT_SYMBOL_GPL(iio_convert_raw_to_processed);
 
+static int iio_read_channel_attribute(struct iio_channel *chan,
+				      int *val, int *val2,
+				      enum iio_chan_info_enum attribute)
+{
+	int ret;
+
+	mutex_lock(&chan->indio_dev->info_exist_lock);
+	if (chan->indio_dev->info == NULL) {
+		ret = -ENODEV;
+		goto err_unlock;
+	}
+
+	ret = iio_channel_read(chan, val, val2, attribute);
+err_unlock:
+	mutex_unlock(&chan->indio_dev->info_exist_lock);
+
+	return ret;
+}
+
+int iio_read_channel_offset(struct iio_channel *chan, int *val, int *val2)
+{
+	return iio_read_channel_attribute(chan, val, val2, IIO_CHAN_INFO_OFFSET);
+}
+EXPORT_SYMBOL_GPL(iio_read_channel_offset);
+
 int iio_read_channel_processed(struct iio_channel *chan, int *val)
 {
 	int ret;
@@ -687,19 +712,7 @@ EXPORT_SYMBOL_GPL(iio_read_channel_processed);
 
 int iio_read_channel_scale(struct iio_channel *chan, int *val, int *val2)
 {
-	int ret;
-
-	mutex_lock(&chan->indio_dev->info_exist_lock);
-	if (chan->indio_dev->info == NULL) {
-		ret = -ENODEV;
-		goto err_unlock;
-	}
-
-	ret = iio_channel_read(chan, val, val2, IIO_CHAN_INFO_SCALE);
-err_unlock:
-	mutex_unlock(&chan->indio_dev->info_exist_lock);
-
-	return ret;
+	return iio_read_channel_attribute(chan, val, val2, IIO_CHAN_INFO_SCALE);
 }
 EXPORT_SYMBOL_GPL(iio_read_channel_scale);
 
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 9edccfba1ffb..638157234357 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -235,6 +235,19 @@ int iio_write_channel_raw(struct iio_channel *chan, int val);
 int iio_get_channel_type(struct iio_channel *channel,
 			 enum iio_chan_type *type);
 
+/**
+ * iio_read_channel_offset() - read the offset value for a channel
+ * @chan:		The channel being queried.
+ * @val:		First part of value read back.
+ * @val2:		Second part of value read back.
+ *
+ * Note returns a description of what is in val and val2, such
+ * as IIO_VAL_INT_PLUS_MICRO telling us we have a value of val
+ * + val2/1e6
+ */
+int iio_read_channel_offset(struct iio_channel *chan, int *val,
+			   int *val2);
+
 /**
  * iio_read_channel_scale() - read the scale value for a channel
  * @chan:		The channel being queried.
-- 
cgit v1.2.3


From a9a0d64a8b7af406f03b660cbad948cfd34ed2b0 Mon Sep 17 00:00:00 2001
From: Bhumika Goyal <bhumirks@gmail.com>
Date: Sat, 1 Oct 2016 15:27:18 +0530
Subject: iio: Declare event_attrs field of iio_info structure as const

The event_attrs field of iio_info structure is only initialized once
whenever an object of iio_info is created. After that this field
is never modified again anywhere in the kernel. So, declare event_attrs
field of iio_info as a const struct attribute_group.
Checked for occurences throughout the kernel using grep and
coccinelle.

Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/iio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index b4a0679e4a49..4591d8ea41bd 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -381,7 +381,7 @@ struct iio_dev;
  **/
 struct iio_info {
 	struct module			*driver_module;
-	struct attribute_group		*event_attrs;
+	const struct attribute_group	*event_attrs;
 	const struct attribute_group	*attrs;
 
 	int (*read_raw)(struct iio_dev *indio_dev,
-- 
cgit v1.2.3


From f3b0deea89039373f0d22eafd1ff65a36e957266 Mon Sep 17 00:00:00 2001
From: Brian Masney <masneyb@onstation.org>
Date: Mon, 26 Sep 2016 20:20:16 -0400
Subject: include: linux: iio: add IIO_ATTR_{RO, WO, RW} and
 IIO_DEVICE_ATTR_{RO, WO, RW} macros

Add new macros: IIO_ATTR_RO, IIO_ATTR_WO, IIO_ATTR_RW,
IIO_DEVICE_ATTR_RO, IIO_DEVICE_ATTR_WO and IIO_DEVICE_ATTR_RW to reduce
the amount of boiler plate code that is needed for creating new
attributes. This mimics the *_RO, *_WO, and *_RW macros that are found
in include/linux/device.h and include/linux/sysfs.h.

Signed-off-by: Brian Masney <masneyb@onstation.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/sysfs.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/sysfs.h b/include/linux/iio/sysfs.h
index 9cd8f747212f..ce9426c507fd 100644
--- a/include/linux/iio/sysfs.h
+++ b/include/linux/iio/sysfs.h
@@ -55,10 +55,34 @@ struct iio_const_attr {
 	{ .dev_attr = __ATTR(_name, _mode, _show, _store),	\
 	  .address = _addr }
 
+#define IIO_ATTR_RO(_name, _addr)       \
+	{ .dev_attr = __ATTR_RO(_name), \
+	  .address = _addr }
+
+#define IIO_ATTR_WO(_name, _addr)       \
+	{ .dev_attr = __ATTR_WO(_name), \
+	  .address = _addr }
+
+#define IIO_ATTR_RW(_name, _addr)       \
+	{ .dev_attr = __ATTR_RW(_name), \
+	  .address = _addr }
+
 #define IIO_DEVICE_ATTR(_name, _mode, _show, _store, _addr)	\
 	struct iio_dev_attr iio_dev_attr_##_name		\
 	= IIO_ATTR(_name, _mode, _show, _store, _addr)
 
+#define IIO_DEVICE_ATTR_RO(_name, _addr)                       \
+	struct iio_dev_attr iio_dev_attr_##_name                \
+	= IIO_ATTR_RO(_name, _addr)
+
+#define IIO_DEVICE_ATTR_WO(_name, _addr)                       \
+	struct iio_dev_attr iio_dev_attr_##_name                \
+	= IIO_ATTR_WO(_name, _addr)
+
+#define IIO_DEVICE_ATTR_RW(_name, _addr)                                   \
+	struct iio_dev_attr iio_dev_attr_##_name                            \
+	= IIO_ATTR_RW(_name, _addr)
+
 #define IIO_DEVICE_ATTR_NAMED(_vname, _name, _mode, _show, _store, _addr) \
 	struct iio_dev_attr iio_dev_attr_##_vname			\
 	= IIO_ATTR(_name, _mode, _show, _store, _addr)
-- 
cgit v1.2.3


From 3d42de25d290fdfe604835d1b389845b8cba5bff Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 4 Oct 2016 20:34:35 -0400
Subject: x86/fpu, kvm: Remove KVM vcpu->fpu_counter

With the removal of the lazy FPU code, this field is no longer used.
Get rid of it.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: pbonzini@redhat.com
Link: http://lkml.kernel.org/r/1475627678-20788-7-git-send-email-riel@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kvm/x86.c       | 4 +---
 include/linux/kvm_host.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59d7761fd6df..2c7e775d7295 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7348,10 +7348,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->guest_fpu_loaded) {
-		vcpu->fpu_counter = 0;
+	if (!vcpu->guest_fpu_loaded)
 		return;
-	}
 
 	vcpu->guest_fpu_loaded = 0;
 	copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9c28b4d4c90b..4e6905cd1e8e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -224,7 +224,6 @@ struct kvm_vcpu {
 
 	int fpu_active;
 	int guest_fpu_loaded, guest_xcr0_loaded;
-	unsigned char fpu_counter;
 	struct swait_queue_head wq;
 	struct pid *pid;
 	int sigset_active;
-- 
cgit v1.2.3


From a13e831fcaa7e8af0387aef629d1835cf39c59f0 Mon Sep 17 00:00:00 2001
From: Eva Rachel Retuya <eraretuya@gmail.com>
Date: Wed, 5 Oct 2016 11:06:21 +0800
Subject: staging: iio: ad7192: implement IIO_CHAN_INFO_SAMP_FREQ

This driver predates the availability of IIO_CHAN_INFO_SAMP_FREQ
attribute wherein usage has some advantages like it can be accessed by
in-kernel consumers as well as reduces the code size.

Therefore, use IIO_CHAN_INFO_SAMP_FREQ to implement the
sampling_frequency attribute instead of using IIO_DEV_ATTR_SAMP_FREQ()
macro.

Move code from the functions associated with IIO_DEV_ATTR_SAMP_FREQ()
into respective read and write hooks with the mask set to
IIO_CHAN_INFO_SAMP_FREQ.

Signed-off-by: Eva Rachel Retuya <eraretuya@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/staging/iio/adc/ad7192.c       | 84 ++++++++++++----------------------
 include/linux/iio/adc/ad_sigma_delta.h |  1 +
 2 files changed, 30 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c
index 1cf6b79801a9..bfa12ceb1e1f 100644
--- a/drivers/staging/iio/adc/ad7192.c
+++ b/drivers/staging/iio/adc/ad7192.c
@@ -322,57 +322,6 @@ out:
 	return ret;
 }
 
-static ssize_t ad7192_read_frequency(struct device *dev,
-				     struct device_attribute *attr,
-				     char *buf)
-{
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	struct ad7192_state *st = iio_priv(indio_dev);
-
-	return sprintf(buf, "%d\n", st->mclk /
-			(st->f_order * 1024 * AD7192_MODE_RATE(st->mode)));
-}
-
-static ssize_t ad7192_write_frequency(struct device *dev,
-				      struct device_attribute *attr,
-				      const char *buf,
-				      size_t len)
-{
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	struct ad7192_state *st = iio_priv(indio_dev);
-	unsigned long lval;
-	int div, ret;
-
-	ret = kstrtoul(buf, 10, &lval);
-	if (ret)
-		return ret;
-	if (lval == 0)
-		return -EINVAL;
-
-	ret = iio_device_claim_direct_mode(indio_dev);
-	if (ret)
-		return ret;
-
-	div = st->mclk / (lval * st->f_order * 1024);
-	if (div < 1 || div > 1023) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	st->mode &= ~AD7192_MODE_RATE(-1);
-	st->mode |= AD7192_MODE_RATE(div);
-	ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode);
-
-out:
-	iio_device_release_direct_mode(indio_dev);
-
-	return ret ? ret : len;
-}
-
-static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO,
-		ad7192_read_frequency,
-		ad7192_write_frequency);
-
 static ssize_t
 ad7192_show_scale_available(struct device *dev,
 			    struct device_attribute *attr, char *buf)
@@ -471,7 +420,6 @@ static IIO_DEVICE_ATTR(ac_excitation_en, S_IRUGO | S_IWUSR,
 		       AD7192_REG_MODE);
 
 static struct attribute *ad7192_attributes[] = {
-	&iio_dev_attr_sampling_frequency.dev_attr.attr,
 	&iio_dev_attr_in_v_m_v_scale_available.dev_attr.attr,
 	&iio_dev_attr_in_voltage_scale_available.dev_attr.attr,
 	&iio_dev_attr_bridge_switch_en.dev_attr.attr,
@@ -484,7 +432,6 @@ static const struct attribute_group ad7192_attribute_group = {
 };
 
 static struct attribute *ad7195_attributes[] = {
-	&iio_dev_attr_sampling_frequency.dev_attr.attr,
 	&iio_dev_attr_in_v_m_v_scale_available.dev_attr.attr,
 	&iio_dev_attr_in_voltage_scale_available.dev_attr.attr,
 	&iio_dev_attr_bridge_switch_en.dev_attr.attr,
@@ -536,6 +483,10 @@ static int ad7192_read_raw(struct iio_dev *indio_dev,
 		if (chan->type == IIO_TEMP)
 			*val -= 273 * ad7192_get_temp_scale(unipolar);
 		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		*val = st->mclk /
+			(st->f_order * 1024 * AD7192_MODE_RATE(st->mode));
+		return IIO_VAL_INT;
 	}
 
 	return -EINVAL;
@@ -548,7 +499,7 @@ static int ad7192_write_raw(struct iio_dev *indio_dev,
 			    long mask)
 {
 	struct ad7192_state *st = iio_priv(indio_dev);
-	int ret, i;
+	int ret, i, div;
 	unsigned int tmp;
 
 	ret = iio_device_claim_direct_mode(indio_dev);
@@ -572,6 +523,22 @@ static int ad7192_write_raw(struct iio_dev *indio_dev,
 				break;
 			}
 		break;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		if (!val) {
+			ret = -EINVAL;
+			break;
+		}
+
+		div = st->mclk / (val * st->f_order * 1024);
+		if (div < 1 || div > 1023) {
+			ret = -EINVAL;
+			break;
+		}
+
+		st->mode &= ~AD7192_MODE_RATE(-1);
+		st->mode |= AD7192_MODE_RATE(div);
+		ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode);
+		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -585,7 +552,14 @@ static int ad7192_write_raw_get_fmt(struct iio_dev *indio_dev,
 				    struct iio_chan_spec const *chan,
 				    long mask)
 {
-	return IIO_VAL_INT_PLUS_NANO;
+	switch (mask) {
+	case IIO_CHAN_INFO_SCALE:
+		return IIO_VAL_INT_PLUS_NANO;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		return IIO_VAL_INT;
+	default:
+		return -EINVAL;
+	}
 }
 
 static const struct iio_info ad7192_info = {
diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h
index e7fdec4db9da..5ba430cc9a87 100644
--- a/include/linux/iio/adc/ad_sigma_delta.h
+++ b/include/linux/iio/adc/ad_sigma_delta.h
@@ -136,6 +136,7 @@ int ad_sd_validate_trigger(struct iio_dev *indio_dev, struct iio_trigger *trig);
 		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
 			BIT(IIO_CHAN_INFO_OFFSET), \
 		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \
+		.info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), \
 		.scan_index = (_si), \
 		.scan_type = { \
 			.sign = 'u', \
-- 
cgit v1.2.3


From 4be0542073a33cc063b6a8f8fb367536e234e7aa Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 29 Aug 2016 08:08:29 +0100
Subject: dma-buf: Introduce fence_get_rcu_safe()

This variant of fence_get_rcu() takes an RCU protected pointer to a
fence and carefully returns a reference to the fence ensuring that it is
not reallocated as it does. This is required when mixing fences and
SLAB_DESTROY_BY_RCU - although it serves a more pedagogical function atm

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: linaro-mm-sig@lists.linaro.org
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20160829070834.22296-6-chris@chris-wilson.co.uk
---
 include/linux/fence.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fence.h b/include/linux/fence.h
index 0d763053f97a..c9c5ba98c302 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -182,6 +182,16 @@ void fence_init(struct fence *fence, const struct fence_ops *ops,
 void fence_release(struct kref *kref);
 void fence_free(struct fence *fence);
 
+/**
+ * fence_put - decreases refcount of the fence
+ * @fence:	[in]	fence to reduce refcount of
+ */
+static inline void fence_put(struct fence *fence)
+{
+	if (fence)
+		kref_put(&fence->refcount, fence_release);
+}
+
 /**
  * fence_get - increases refcount of the fence
  * @fence:	[in]	fence to increase refcount of
@@ -210,13 +220,49 @@ static inline struct fence *fence_get_rcu(struct fence *fence)
 }
 
 /**
- * fence_put - decreases refcount of the fence
- * @fence:	[in]	fence to reduce refcount of
+ * fence_get_rcu_safe  - acquire a reference to an RCU tracked fence
+ * @fence:	[in]	pointer to fence to increase refcount of
+ *
+ * Function returns NULL if no refcount could be obtained, or the fence.
+ * This function handles acquiring a reference to a fence that may be
+ * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU),
+ * so long as the caller is using RCU on the pointer to the fence.
+ *
+ * An alternative mechanism is to employ a seqlock to protect a bunch of
+ * fences, such as used by struct reservation_object. When using a seqlock,
+ * the seqlock must be taken before and checked after a reference to the
+ * fence is acquired (as shown here).
+ *
+ * The caller is required to hold the RCU read lock.
  */
-static inline void fence_put(struct fence *fence)
+static inline struct fence *fence_get_rcu_safe(struct fence * __rcu *fencep)
 {
-	if (fence)
-		kref_put(&fence->refcount, fence_release);
+	do {
+		struct fence *fence;
+
+		fence = rcu_dereference(*fencep);
+		if (!fence || !fence_get_rcu(fence))
+			return NULL;
+
+		/* The atomic_inc_not_zero() inside fence_get_rcu()
+		 * provides a full memory barrier upon success (such as now).
+		 * This is paired with the write barrier from assigning
+		 * to the __rcu protected fence pointer so that if that
+		 * pointer still matches the current fence, we know we
+		 * have successfully acquire a reference to it. If it no
+		 * longer matches, we are holding a reference to some other
+		 * reallocated pointer. This is possible if the allocator
+		 * is using a freelist like SLAB_DESTROY_BY_RCU where the
+		 * fence remains valid for the RCU grace period, but it
+		 * may be reallocated. When using such allocators, we are
+		 * responsible for ensuring the reference we get is to
+		 * the right fence, as below.
+		 */
+		if (fence == rcu_access_pointer(*fencep))
+			return rcu_pointer_handoff(fence);
+
+		fence_put(fence);
+	} while (1);
 }
 
 int fence_signal(struct fence *fence);
-- 
cgit v1.2.3


From 61e84623ace35ce48975e8f90bbbac7557c43d61 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Fri, 7 Oct 2016 22:04:33 -0400
Subject: net: centralize net_device min/max MTU checking

While looking into an MTU issue with sfc, I started noticing that almost
every NIC driver with an ndo_change_mtu function implemented almost
exactly the same range checks, and in many cases, that was the only
practical thing their ndo_change_mtu function was doing. Quite a few
drivers have either 68, 64, 60 or 46 as their minimum MTU value checked,
and then various sizes from 1500 to 65535 for their maximum MTU value. We
can remove a whole lot of redundant code here if we simple store min_mtu
and max_mtu in net_device, and check against those in net/core/dev.c's
dev_set_mtu().

In theory, there should be zero functional change with this patch, it just
puts the infrastructure in place. Subsequent patches will attempt to start
using said infrastructure, with theoretically zero change in
functionality.

CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++++
 net/core/dev.c            | 13 +++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 136ae6bbe81e..fbdf923af4d3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1506,6 +1506,8 @@ enum netdev_priv_flags {
  *	@if_port:	Selectable AUI, TP, ...
  *	@dma:		DMA channel
  *	@mtu:		Interface MTU value
+ *	@min_mtu:	Interface Minimum MTU value
+ *	@max_mtu:	Interface Maximum MTU value
  *	@type:		Interface hardware type
  *	@hard_header_len: Maximum hardware header length.
  *
@@ -1726,6 +1728,8 @@ struct net_device {
 	unsigned char		dma;
 
 	unsigned int		mtu;
+	unsigned int		min_mtu;
+	unsigned int		max_mtu;
 	unsigned short		type;
 	unsigned short		hard_header_len;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index f1fe26f66458..f376639e8774 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6499,9 +6499,18 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 	if (new_mtu == dev->mtu)
 		return 0;
 
-	/*	MTU must be positive.	 */
-	if (new_mtu < 0)
+	/* MTU must be positive, and in range */
+	if (new_mtu < 0 || new_mtu < dev->min_mtu) {
+		net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n",
+				    dev->name, new_mtu, dev->min_mtu);
 		return -EINVAL;
+	}
+
+	if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
+		net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n",
+				    dev->name, new_mtu, dev->min_mtu);
+		return -EINVAL;
+	}
 
 	if (!netif_device_present(dev))
 		return -ENODEV;
-- 
cgit v1.2.3


From cf53b1da73bdf940f1523ec5a7d375d7056c759c Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Tue, 11 Oct 2016 13:04:09 -0700
Subject: Revert "net: Add driver helper functions to determine checksum
 offloadability"

This reverts commit 6ae23ad36253a8033c5714c52b691b84456487c5.

The code has been in kernel since 4.4 but there are no in tree
code that uses. Unused code is broken code, remove it.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  78 --------------------------
 net/core/dev.c            | 136 ----------------------------------------------
 2 files changed, 214 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fbdf923af4d3..bf341b65ca5e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2653,71 +2653,6 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
 	remcsum_unadjust((__sum16 *)ptr, grc->delta);
 }
 
-struct skb_csum_offl_spec {
-	__u16		ipv4_okay:1,
-			ipv6_okay:1,
-			encap_okay:1,
-			ip_options_okay:1,
-			ext_hdrs_okay:1,
-			tcp_okay:1,
-			udp_okay:1,
-			sctp_okay:1,
-			vlan_okay:1,
-			no_encapped_ipv6:1,
-			no_not_encapped:1;
-};
-
-bool __skb_csum_offload_chk(struct sk_buff *skb,
-			    const struct skb_csum_offl_spec *spec,
-			    bool *csum_encapped,
-			    bool csum_help);
-
-static inline bool skb_csum_offload_chk(struct sk_buff *skb,
-					const struct skb_csum_offl_spec *spec,
-					bool *csum_encapped,
-					bool csum_help)
-{
-	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		return false;
-
-	return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help);
-}
-
-static inline bool skb_csum_offload_chk_help(struct sk_buff *skb,
-					     const struct skb_csum_offl_spec *spec)
-{
-	bool csum_encapped;
-
-	return skb_csum_offload_chk(skb, spec, &csum_encapped, true);
-}
-
-static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb)
-{
-	static const struct skb_csum_offl_spec csum_offl_spec = {
-		.ipv4_okay = 1,
-		.ip_options_okay = 1,
-		.ipv6_okay = 1,
-		.vlan_okay = 1,
-		.tcp_okay = 1,
-		.udp_okay = 1,
-	};
-
-	return skb_csum_offload_chk_help(skb, &csum_offl_spec);
-}
-
-static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb)
-{
-	static const struct skb_csum_offl_spec csum_offl_spec = {
-		.ipv4_okay = 1,
-		.ip_options_okay = 1,
-		.tcp_okay = 1,
-		.udp_okay = 1,
-		.vlan_okay = 1,
-	};
-
-	return skb_csum_offload_chk_help(skb, &csum_offl_spec);
-}
-
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
@@ -3961,19 +3896,6 @@ static inline bool can_checksum_protocol(netdev_features_t features,
 	}
 }
 
-/* Map an ethertype into IP protocol if possible */
-static inline int eproto_to_ipproto(int eproto)
-{
-	switch (eproto) {
-	case htons(ETH_P_IP):
-		return IPPROTO_IP;
-	case htons(ETH_P_IPV6):
-		return IPPROTO_IPV6;
-	default:
-		return -1;
-	}
-}
-
 #ifdef CONFIG_BUG
 void netdev_rx_csum_fault(struct net_device *dev);
 #else
diff --git a/net/core/dev.c b/net/core/dev.c
index f376639e8774..6498cc2ba8f6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -139,7 +139,6 @@
 #include <linux/errqueue.h>
 #include <linux/hrtimer.h>
 #include <linux/netfilter_ingress.h>
-#include <linux/sctp.h>
 #include <linux/crash_dump.h>
 
 #include "net-sysfs.h"
@@ -2492,141 +2491,6 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
-/* skb_csum_offload_check - Driver helper function to determine if a device
- * with limited checksum offload capabilities is able to offload the checksum
- * for a given packet.
- *
- * Arguments:
- *   skb - sk_buff for the packet in question
- *   spec - contains the description of what device can offload
- *   csum_encapped - returns true if the checksum being offloaded is
- *	      encpasulated. That is it is checksum for the transport header
- *	      in the inner headers.
- *   checksum_help - when set indicates that helper function should
- *	      call skb_checksum_help if offload checks fail
- *
- * Returns:
- *   true: Packet has passed the checksum checks and should be offloadable to
- *	   the device (a driver may still need to check for additional
- *	   restrictions of its device)
- *   false: Checksum is not offloadable. If checksum_help was set then
- *	   skb_checksum_help was called to resolve checksum for non-GSO
- *	   packets and when IP protocol is not SCTP
- */
-bool __skb_csum_offload_chk(struct sk_buff *skb,
-			    const struct skb_csum_offl_spec *spec,
-			    bool *csum_encapped,
-			    bool csum_help)
-{
-	struct iphdr *iph;
-	struct ipv6hdr *ipv6;
-	void *nhdr;
-	int protocol;
-	u8 ip_proto;
-
-	if (skb->protocol == htons(ETH_P_8021Q) ||
-	    skb->protocol == htons(ETH_P_8021AD)) {
-		if (!spec->vlan_okay)
-			goto need_help;
-	}
-
-	/* We check whether the checksum refers to a transport layer checksum in
-	 * the outermost header or an encapsulated transport layer checksum that
-	 * corresponds to the inner headers of the skb. If the checksum is for
-	 * something else in the packet we need help.
-	 */
-	if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
-		/* Non-encapsulated checksum */
-		protocol = eproto_to_ipproto(vlan_get_protocol(skb));
-		nhdr = skb_network_header(skb);
-		*csum_encapped = false;
-		if (spec->no_not_encapped)
-			goto need_help;
-	} else if (skb->encapsulation && spec->encap_okay &&
-		   skb_checksum_start_offset(skb) ==
-		   skb_inner_transport_offset(skb)) {
-		/* Encapsulated checksum */
-		*csum_encapped = true;
-		switch (skb->inner_protocol_type) {
-		case ENCAP_TYPE_ETHER:
-			protocol = eproto_to_ipproto(skb->inner_protocol);
-			break;
-		case ENCAP_TYPE_IPPROTO:
-			protocol = skb->inner_protocol;
-			break;
-		}
-		nhdr = skb_inner_network_header(skb);
-	} else {
-		goto need_help;
-	}
-
-	switch (protocol) {
-	case IPPROTO_IP:
-		if (!spec->ipv4_okay)
-			goto need_help;
-		iph = nhdr;
-		ip_proto = iph->protocol;
-		if (iph->ihl != 5 && !spec->ip_options_okay)
-			goto need_help;
-		break;
-	case IPPROTO_IPV6:
-		if (!spec->ipv6_okay)
-			goto need_help;
-		if (spec->no_encapped_ipv6 && *csum_encapped)
-			goto need_help;
-		ipv6 = nhdr;
-		nhdr += sizeof(*ipv6);
-		ip_proto = ipv6->nexthdr;
-		break;
-	default:
-		goto need_help;
-	}
-
-ip_proto_again:
-	switch (ip_proto) {
-	case IPPROTO_TCP:
-		if (!spec->tcp_okay ||
-		    skb->csum_offset != offsetof(struct tcphdr, check))
-			goto need_help;
-		break;
-	case IPPROTO_UDP:
-		if (!spec->udp_okay ||
-		    skb->csum_offset != offsetof(struct udphdr, check))
-			goto need_help;
-		break;
-	case IPPROTO_SCTP:
-		if (!spec->sctp_okay ||
-		    skb->csum_offset != offsetof(struct sctphdr, checksum))
-			goto cant_help;
-		break;
-	case NEXTHDR_HOP:
-	case NEXTHDR_ROUTING:
-	case NEXTHDR_DEST: {
-		u8 *opthdr = nhdr;
-
-		if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
-			goto need_help;
-
-		ip_proto = opthdr[0];
-		nhdr += (opthdr[1] + 1) << 3;
-
-		goto ip_proto_again;
-	}
-	default:
-		goto need_help;
-	}
-
-	/* Passed the tests for offloading checksum */
-	return true;
-
-need_help:
-	if (csum_help && !skb_shinfo(skb)->gso_size)
-		skb_checksum_help(skb);
-cant_help:
-	return false;
-}
-EXPORT_SYMBOL(__skb_csum_offload_chk);
-
 __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 {
 	__be16 type = skb->protocol;
-- 
cgit v1.2.3


From c3aaa403840a5ccd305fb5e73f3cbfac6453b5e5 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@caviumnetworks.com>
Date: Fri, 14 Oct 2016 05:19:17 -0400
Subject: qed: Pass MAC hints to VFs

Some hypervisors can support MAC hints to their VFs.
Even though we don't have such a hypervisor API in linux, we add
sufficient logic for the VF to be able to receive such hints and
set the mac accordingly - as long as the VF has not been set with
a MAC already.

Signed-off-by: Yuval Mintz <Yuval.Mintz@caviumnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_vf.c     | 4 ++--
 drivers/net/ethernet/qlogic/qede/qede_main.c | 6 +++++-
 include/linux/qed/qed_eth_if.h               | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index abf5bf11f865..f580bf4c97f0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -1230,8 +1230,8 @@ static void qed_handle_bulletin_change(struct qed_hwfn *hwfn)
 
 	is_mac_exist = qed_vf_bulletin_get_forced_mac(hwfn, mac,
 						      &is_mac_forced);
-	if (is_mac_exist && is_mac_forced && cookie)
-		ops->force_mac(cookie, mac);
+	if (is_mac_exist && cookie)
+		ops->force_mac(cookie, mac, !!is_mac_forced);
 
 	/* Always update link configuration according to bulletin */
 	qed_link_update(hwfn);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 343038ca047d..9866d952e3e1 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -171,10 +171,14 @@ static struct pci_driver qede_pci_driver = {
 #endif
 };
 
-static void qede_force_mac(void *dev, u8 *mac)
+static void qede_force_mac(void *dev, u8 *mac, bool forced)
 {
 	struct qede_dev *edev = dev;
 
+	/* MAC hints take effect only if we haven't set one already */
+	if (is_valid_ether_addr(edev->ndev->dev_addr) && !forced)
+		return;
+
 	ether_addr_copy(edev->ndev->dev_addr, mac);
 	ether_addr_copy(edev->primary_mac, mac);
 }
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 33c24ebc9b7f..1c779486c30d 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -129,7 +129,7 @@ struct qed_tunn_params {
 
 struct qed_eth_cb_ops {
 	struct qed_common_cb_ops common;
-	void (*force_mac) (void *dev, u8 *mac);
+	void (*force_mac) (void *dev, u8 *mac, bool forced);
 };
 
 #ifdef CONFIG_DCB
-- 
cgit v1.2.3


From 7b7e70f979e34ed84d725eab8ea42921ab6f42e3 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@caviumnetworks.com>
Date: Fri, 14 Oct 2016 05:19:20 -0400
Subject: qed*: Allow unicast filtering

Apparently qede fails to set IFF_UNICAST_FLT, and as a result is not
actually performing unicast MAC filtering.
While we're at it - relax a hard-coded limitation that limits each
interface into using at most 15 unicast MAC addresses before turning
promiscuous. Instead utilize the HW resources to their limit.

Signed-off-by: Yuval Mintz <Yuval.Mintz@caviumnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c     | 12 ++++++++++--
 drivers/net/ethernet/qlogic/qede/qede_main.c |  4 +++-
 include/linux/qed/qed_eth_if.h               |  1 +
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index ddd410a91e13..6b0e22d9fe4c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1652,6 +1652,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 
 	if (IS_PF(cdev)) {
 		int max_vf_vlan_filters = 0;
+		int max_vf_mac_filters = 0;
 
 		if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
 			for_each_hwfn(cdev, i)
@@ -1665,11 +1666,18 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 			info->num_queues = cdev->num_hwfns;
 		}
 
-		if (IS_QED_SRIOV(cdev))
+		if (IS_QED_SRIOV(cdev)) {
 			max_vf_vlan_filters = cdev->p_iov_info->total_vfs *
 					      QED_ETH_VF_NUM_VLAN_FILTERS;
-		info->num_vlan_filters = RESC_NUM(&cdev->hwfns[0], QED_VLAN) -
+			max_vf_mac_filters = cdev->p_iov_info->total_vfs *
+					     QED_ETH_VF_NUM_MAC_FILTERS;
+		}
+		info->num_vlan_filters = RESC_NUM(QED_LEADING_HWFN(cdev),
+						  QED_VLAN) -
 					 max_vf_vlan_filters;
+		info->num_mac_filters = RESC_NUM(QED_LEADING_HWFN(cdev),
+						 QED_MAC) -
+					max_vf_mac_filters;
 
 		ether_addr_copy(info->port_mac,
 				cdev->hwfns[0].hw_info.hw_mac_addr);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 6c2b09c255d5..0e483afc2b87 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -2365,6 +2365,8 @@ static void qede_init_ndev(struct qede_dev *edev)
 
 	qede_set_ethtool_ops(ndev);
 
+	ndev->priv_flags = IFF_UNICAST_FLT;
+
 	/* user-changeble features */
 	hw_features = NETIF_F_GRO | NETIF_F_SG |
 		      NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -3937,7 +3939,7 @@ static void qede_config_rx_mode(struct net_device *ndev)
 
 	/* Check for promiscuous */
 	if ((ndev->flags & IFF_PROMISC) ||
-	    (uc_count > 15)) { /* @@@TBD resource allocation - 1 */
+	    (uc_count > edev->dev_info.num_mac_filters - 1)) {
 		accept_flags = QED_FILTER_RX_MODE_TYPE_PROMISC;
 	} else {
 		/* Add MAC filters according to the unicast secondary macs */
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 1c779486c30d..15130805d792 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -23,6 +23,7 @@ struct qed_dev_eth_info {
 
 	u8	port_mac[ETH_ALEN];
 	u8	num_vlan_filters;
+	u16	num_mac_filters;
 
 	/* Legacy VF - this affects the datapath, so qede has to know */
 	bool is_legacy;
-- 
cgit v1.2.3


From a6e78b3e1406575323b30b65890ee3c29930fb27 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@intel.com>
Date: Mon, 17 Oct 2016 17:34:39 +0530
Subject: video: Add new aspect ratios for HDMI 2.0

HDMI 2.0/CEA-861-F introduces two new aspect ratios:
- 64:27
- 256:135

This patch adds enumeration for the new aspect ratios
in the existing aspect ratio list.

V2: rebase
V3: rebase
V4: Added r-b from Jose, Ack by Tomi

Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
Reviewed-by: Sean Paul <seanpaul@chromium.org>
Reviewed-by: Jose Abreu <Jose.Abreu@synopsys.com>
Acked-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Emil Velikov <emil.l.velikov@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1476705880-15600-4-git-send-email-shashank.sharma@intel.com
---
 drivers/video/hdmi.c | 4 ++++
 include/linux/hdmi.h | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c
index 162689227a23..1cf907ecded4 100644
--- a/drivers/video/hdmi.c
+++ b/drivers/video/hdmi.c
@@ -533,6 +533,10 @@ hdmi_picture_aspect_get_name(enum hdmi_picture_aspect picture_aspect)
 		return "4:3";
 	case HDMI_PICTURE_ASPECT_16_9:
 		return "16:9";
+	case HDMI_PICTURE_ASPECT_64_27:
+		return "64:27";
+	case HDMI_PICTURE_ASPECT_256_135:
+		return "256:135";
 	case HDMI_PICTURE_ASPECT_RESERVED:
 		return "Reserved";
 	}
diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index e9744202fa29..edbb4fc674ed 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -78,6 +78,8 @@ enum hdmi_picture_aspect {
 	HDMI_PICTURE_ASPECT_NONE,
 	HDMI_PICTURE_ASPECT_4_3,
 	HDMI_PICTURE_ASPECT_16_9,
+	HDMI_PICTURE_ASPECT_64_27,
+	HDMI_PICTURE_ASPECT_256_135,
 	HDMI_PICTURE_ASPECT_RESERVED,
 };
 
-- 
cgit v1.2.3


From 664fcf123a30edf16b47d2ce1f610d654ba917b2 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 16 Oct 2016 19:56:51 +0200
Subject: net: phy: Threaded interrupts allow some simplification

The PHY interrupts are now handled in a threaded interrupt handler,
which can sleep. The work queue is no longer needed, phy_change() can
be called directly. phy_mac_interrupt() still needs to be safe to call
in interrupt context, so keep the work queue, and use a helper to call
phy_change().

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        | 45 +++++++++++++++++++++++++-------------------
 drivers/net/phy/phy_device.c |  2 +-
 include/linux/phy.h          |  5 +++--
 3 files changed, 30 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 25f2b296aaba..bb673c63c85c 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -664,7 +664,7 @@ static void phy_error(struct phy_device *phydev)
  * @phy_dat: phy_device pointer
  *
  * Description: When a PHY interrupt occurs, the handler disables
- * interrupts, and schedules a work task to clear the interrupt.
+ * interrupts, and uses phy_change to handle the interrupt.
  */
 static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 {
@@ -673,15 +673,10 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 	if (PHY_HALTED == phydev->state)
 		return IRQ_NONE;		/* It can't be ours.  */
 
-	/* The MDIO bus is not allowed to be written in interrupt
-	 * context, so we need to disable the irq here.  A work
-	 * queue will write the PHY to disable and clear the
-	 * interrupt, and then reenable the irq line.
-	 */
 	disable_irq_nosync(irq);
 	atomic_inc(&phydev->irq_disable);
 
-	queue_work(system_power_efficient_wq, &phydev->phy_queue);
+	phy_change(phydev);
 
 	return IRQ_HANDLED;
 }
@@ -766,12 +761,6 @@ int phy_stop_interrupts(struct phy_device *phydev)
 
 	free_irq(phydev->irq, phydev);
 
-	/* Cannot call flush_scheduled_work() here as desired because
-	 * of rtnl_lock(), but we do not really care about what would
-	 * be done, except from enable_irq(), so cancel any work
-	 * possibly pending and take care of the matter below.
-	 */
-	cancel_work_sync(&phydev->phy_queue);
 	/* If work indeed has been cancelled, disable_irq() will have
 	 * been left unbalanced from phy_interrupt() and enable_irq()
 	 * has to be called so that other devices on the line work.
@@ -784,14 +773,11 @@ int phy_stop_interrupts(struct phy_device *phydev)
 EXPORT_SYMBOL(phy_stop_interrupts);
 
 /**
- * phy_change - Scheduled by the phy_interrupt/timer to handle PHY changes
- * @work: work_struct that describes the work to be done
+ * phy_change - Called by the phy_interrupt to handle PHY changes
+ * @phydev: phy_device struct that interrupted
  */
-void phy_change(struct work_struct *work)
+void phy_change(struct phy_device *phydev)
 {
-	struct phy_device *phydev =
-		container_of(work, struct phy_device, phy_queue);
-
 	if (phy_interrupt_is_valid(phydev)) {
 		if (phydev->drv->did_interrupt &&
 		    !phydev->drv->did_interrupt(phydev))
@@ -832,6 +818,18 @@ phy_err:
 	phy_error(phydev);
 }
 
+/**
+ * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes
+ * @work: work_struct that describes the work to be done
+ */
+void phy_change_work(struct work_struct *work)
+{
+	struct phy_device *phydev =
+		container_of(work, struct phy_device, phy_queue);
+
+	phy_change(phydev);
+}
+
 /**
  * phy_stop - Bring down the PHY link, and stop checking the status
  * @phydev: target phy_device struct
@@ -1116,6 +1114,15 @@ void phy_state_machine(struct work_struct *work)
 				   PHY_STATE_TIME * HZ);
 }
 
+/**
+ * phy_mac_interrupt - MAC says the link has changed
+ * @phydev: phy_device struct with changed link
+ * @new_link: Link is Up/Down.
+ *
+ * Description: The MAC layer is able indicate there has been a change
+ *   in the PHY link status. Set the new link status, and trigger the
+ *   state machine, work a work queue.
+ */
 void phy_mac_interrupt(struct phy_device *phydev, int new_link)
 {
 	phydev->link = new_link;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index e977ba931878..ac440a815353 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -347,7 +347,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
 
 	mutex_init(&dev->lock);
 	INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine);
-	INIT_WORK(&dev->phy_queue, phy_change);
+	INIT_WORK(&dev->phy_queue, phy_change_work);
 
 	/* Request the appropriate module unconditionally; don't
 	 * bother trying to do so only if it isn't already loaded,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e25f1830fbcf..c47378c93607 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -343,7 +343,7 @@ struct phy_c45_device_ids {
  * giving up on the current attempt at acquiring a link
  * irq: IRQ number of the PHY's interrupt (-1 if none)
  * phy_timer: The timer for handling the state machine
- * phy_queue: A work_queue for the interrupt
+ * phy_queue: A work_queue for the phy_mac_interrupt
  * attached_dev: The attached enet driver's device instance ptr
  * adjust_link: Callback for the enet controller to respond to
  * changes in the link state.
@@ -802,7 +802,8 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
 int phy_drivers_register(struct phy_driver *new_driver, int n,
 			 struct module *owner);
 void phy_state_machine(struct work_struct *work);
-void phy_change(struct work_struct *work);
+void phy_change(struct phy_device *phydev);
+void phy_change_work(struct work_struct *work);
 void phy_mac_interrupt(struct phy_device *phydev, int new_link);
 void phy_start_machine(struct phy_device *phydev);
 void phy_stop_machine(struct phy_device *phydev);
-- 
cgit v1.2.3


From bb6869b2147817385e0261f928b942f466f74a63 Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Tue, 18 Oct 2016 10:39:06 +0100
Subject: remoteproc: st_slim_rproc: add a slimcore rproc driver

slim core is used as a basis for many IPs in the STi
chipsets such as fdma and demux. To avoid duplicating
the elf loading code in each device driver a slim
rproc driver has been created.

This driver is designed to be used by other device drivers
such as fdma, or demux whose IP is based around a slim core.
The device driver can call slim_rproc_alloc() to allocate
a slim rproc and slim_rproc_put() when finished.

This driver takes care of ioremapping the slim
registers (dmem, imem, slimcore, peripherals), whose offsets
and sizes can change between IP's. It also obtains and enables
any clocks used by the device. This approach avoids having
a double mapping of the registers as slim_rproc does not register
its own platform device. It also maps well to device tree
abstraction as it allows us to have one dt node for the whole
device.

All of the generic rproc elf loading code can be reused, and
we provide start() stop() hooks to start and stop the slim
core once the firmware has been loaded. This has been tested
successfully with fdma driver.

Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/remoteproc/Kconfig               |   7 +-
 drivers/remoteproc/Makefile              |   1 +
 drivers/remoteproc/st_slim_rproc.c       | 364 +++++++++++++++++++++++++++++++
 include/linux/remoteproc/st_slim_rproc.h |  58 +++++
 4 files changed, 428 insertions(+), 2 deletions(-)
 create mode 100644 drivers/remoteproc/st_slim_rproc.c
 create mode 100644 include/linux/remoteproc/st_slim_rproc.h

(limited to 'include/linux')

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index f396bfef5d42..9270c8e596f7 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -58,7 +58,6 @@ config DA8XX_REMOTEPROC
 	tristate "DA8xx/OMAP-L13x remoteproc support"
 	depends on ARCH_DAVINCI_DA8XX
 	select CMA if MMU
-	select REMOTEPROC
 	select RPMSG_VIRTIO
 	help
 	  Say y here to support DA8xx/OMAP-L13x remote processors via the
@@ -99,10 +98,10 @@ config QCOM_WCNSS_PIL
 	tristate "Qualcomm WCNSS Peripheral Image Loader"
 	depends on OF && ARCH_QCOM
 	depends on QCOM_SMEM
+	depends on REMOTEPROC
 	select QCOM_MDT_LOADER
 	select QCOM_SCM
 	select QCOM_WCNSS_IRIS
-	select REMOTEPROC
 	help
 	  Say y here to support the Peripheral Image Loader for the Qualcomm
 	  Wireless Connectivity Subsystem.
@@ -116,4 +115,8 @@ config ST_REMOTEPROC
 	  processor framework.
 	  This can be either built-in or a loadable module.
 
+config ST_SLIM_REMOTEPROC
+	tristate
+	select REMOTEPROC
+
 endmenu
diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
index 6dfb62ed643f..924f0cb25470 100644
--- a/drivers/remoteproc/Makefile
+++ b/drivers/remoteproc/Makefile
@@ -16,3 +16,4 @@ obj-$(CONFIG_QCOM_Q6V5_PIL)		+= qcom_q6v5_pil.o
 obj-$(CONFIG_QCOM_WCNSS_IRIS)		+= qcom_wcnss_iris.o
 obj-$(CONFIG_QCOM_WCNSS_PIL)		+= qcom_wcnss.o
 obj-$(CONFIG_ST_REMOTEPROC)		+= st_remoteproc.o
+obj-$(CONFIG_ST_SLIM_REMOTEPROC)	+= st_slim_rproc.o
diff --git a/drivers/remoteproc/st_slim_rproc.c b/drivers/remoteproc/st_slim_rproc.c
new file mode 100644
index 000000000000..1484e9717946
--- /dev/null
+++ b/drivers/remoteproc/st_slim_rproc.c
@@ -0,0 +1,364 @@
+/*
+ * SLIM core rproc driver
+ *
+ * Copyright (C) 2016 STMicroelectronics
+ *
+ * Author: Peter Griffin <peter.griffin@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/remoteproc.h>
+#include <linux/remoteproc/st_slim_rproc.h>
+#include "remoteproc_internal.h"
+
+/* SLIM core registers */
+#define SLIM_ID_OFST		0x0
+#define SLIM_VER_OFST		0x4
+
+#define SLIM_EN_OFST		0x8
+#define SLIM_EN_RUN			BIT(0)
+
+#define SLIM_CLK_GATE_OFST	0xC
+#define SLIM_CLK_GATE_DIS		BIT(0)
+#define SLIM_CLK_GATE_RESET		BIT(2)
+
+#define SLIM_SLIM_PC_OFST	0x20
+
+/* DMEM registers */
+#define SLIM_REV_ID_OFST	0x0
+#define SLIM_REV_ID_MIN_MASK		GENMASK(15, 8)
+#define SLIM_REV_ID_MIN(id)		((id & SLIM_REV_ID_MIN_MASK) >> 8)
+#define SLIM_REV_ID_MAJ_MASK		GENMASK(23, 16)
+#define SLIM_REV_ID_MAJ(id)		((id & SLIM_REV_ID_MAJ_MASK) >> 16)
+
+
+/* peripherals registers */
+#define SLIM_STBUS_SYNC_OFST	0xF88
+#define SLIM_STBUS_SYNC_DIS		BIT(0)
+
+#define SLIM_INT_SET_OFST	0xFD4
+#define SLIM_INT_CLR_OFST	0xFD8
+#define SLIM_INT_MASK_OFST	0xFDC
+
+#define SLIM_CMD_CLR_OFST	0xFC8
+#define SLIM_CMD_MASK_OFST	0xFCC
+
+static const char *mem_names[ST_SLIM_MEM_MAX] = {
+	[ST_SLIM_DMEM]	= "dmem",
+	[ST_SLIM_IMEM]	= "imem",
+};
+
+static int slim_clk_get(struct st_slim_rproc *slim_rproc, struct device *dev)
+{
+	int clk, err;
+
+	for (clk = 0; clk < ST_SLIM_MAX_CLK; clk++) {
+		slim_rproc->clks[clk] = of_clk_get(dev->of_node, clk);
+		if (IS_ERR(slim_rproc->clks[clk])) {
+			err = PTR_ERR(slim_rproc->clks[clk]);
+			if (err == -EPROBE_DEFER)
+				goto err_put_clks;
+			slim_rproc->clks[clk] = NULL;
+			break;
+		}
+	}
+
+	return 0;
+
+err_put_clks:
+	while (--clk >= 0)
+		clk_put(slim_rproc->clks[clk]);
+
+	return err;
+}
+
+static void slim_clk_disable(struct st_slim_rproc *slim_rproc)
+{
+	int clk;
+
+	for (clk = 0; clk < ST_SLIM_MAX_CLK && slim_rproc->clks[clk]; clk++)
+		clk_disable_unprepare(slim_rproc->clks[clk]);
+}
+
+static int slim_clk_enable(struct st_slim_rproc *slim_rproc)
+{
+	int clk, ret;
+
+	for (clk = 0; clk < ST_SLIM_MAX_CLK && slim_rproc->clks[clk]; clk++) {
+		ret = clk_prepare_enable(slim_rproc->clks[clk]);
+		if (ret)
+			goto err_disable_clks;
+	}
+
+	return 0;
+
+err_disable_clks:
+	while (--clk >= 0)
+		clk_disable_unprepare(slim_rproc->clks[clk]);
+
+	return ret;
+}
+
+/*
+ * Remoteproc slim specific device handlers
+ */
+static int slim_rproc_start(struct rproc *rproc)
+{
+	struct device *dev = &rproc->dev;
+	struct st_slim_rproc *slim_rproc = rproc->priv;
+	unsigned long hw_id, hw_ver, fw_rev;
+	u32 val;
+
+	/* disable CPU pipeline clock & reset CPU pipeline */
+	val = SLIM_CLK_GATE_DIS | SLIM_CLK_GATE_RESET;
+	writel(val, slim_rproc->slimcore + SLIM_CLK_GATE_OFST);
+
+	/* disable SLIM core STBus sync */
+	writel(SLIM_STBUS_SYNC_DIS, slim_rproc->peri + SLIM_STBUS_SYNC_OFST);
+
+	/* enable cpu pipeline clock */
+	writel(!SLIM_CLK_GATE_DIS,
+		slim_rproc->slimcore + SLIM_CLK_GATE_OFST);
+
+	/* clear int & cmd mailbox */
+	writel(~0U, slim_rproc->peri + SLIM_INT_CLR_OFST);
+	writel(~0U, slim_rproc->peri + SLIM_CMD_CLR_OFST);
+
+	/* enable all channels cmd & int */
+	writel(~0U, slim_rproc->peri + SLIM_INT_MASK_OFST);
+	writel(~0U, slim_rproc->peri + SLIM_CMD_MASK_OFST);
+
+	/* enable cpu */
+	writel(SLIM_EN_RUN, slim_rproc->slimcore + SLIM_EN_OFST);
+
+	hw_id = readl_relaxed(slim_rproc->slimcore + SLIM_ID_OFST);
+	hw_ver = readl_relaxed(slim_rproc->slimcore + SLIM_VER_OFST);
+
+	fw_rev = readl(slim_rproc->mem[ST_SLIM_DMEM].cpu_addr +
+			SLIM_REV_ID_OFST);
+
+	dev_info(dev, "fw rev:%ld.%ld on SLIM %ld.%ld\n",
+		 SLIM_REV_ID_MAJ(fw_rev), SLIM_REV_ID_MIN(fw_rev),
+		 hw_id, hw_ver);
+
+	return 0;
+}
+
+static int slim_rproc_stop(struct rproc *rproc)
+{
+	struct st_slim_rproc *slim_rproc = rproc->priv;
+	u32 val;
+
+	/* mask all (cmd & int) channels */
+	writel(0UL, slim_rproc->peri + SLIM_INT_MASK_OFST);
+	writel(0UL, slim_rproc->peri + SLIM_CMD_MASK_OFST);
+
+	/* disable cpu pipeline clock */
+	writel(SLIM_CLK_GATE_DIS, slim_rproc->slimcore + SLIM_CLK_GATE_OFST);
+
+	writel(!SLIM_EN_RUN, slim_rproc->slimcore + SLIM_EN_OFST);
+
+	val = readl(slim_rproc->slimcore + SLIM_EN_OFST);
+	if (val & SLIM_EN_RUN)
+		dev_warn(&rproc->dev, "Failed to disable SLIM");
+
+	dev_dbg(&rproc->dev, "slim stopped\n");
+
+	return 0;
+}
+
+static void *slim_rproc_da_to_va(struct rproc *rproc, u64 da, int len)
+{
+	struct st_slim_rproc *slim_rproc = rproc->priv;
+	void *va = NULL;
+	int i;
+
+	for (i = 0; i < ST_SLIM_MEM_MAX; i++) {
+		if (da != slim_rproc->mem[i].bus_addr)
+			continue;
+
+		if (len <= slim_rproc->mem[i].size) {
+			/* __force to make sparse happy with type conversion */
+			va = (__force void *)slim_rproc->mem[i].cpu_addr;
+			break;
+		}
+	}
+
+	dev_dbg(&rproc->dev, "da = 0x%llx len = 0x%x va = 0x%p\n", da, len, va);
+
+	return va;
+}
+
+static struct rproc_ops slim_rproc_ops = {
+	.start		= slim_rproc_start,
+	.stop		= slim_rproc_stop,
+	.da_to_va       = slim_rproc_da_to_va,
+};
+
+/*
+ * Firmware handler operations: sanity, boot address, load ...
+ */
+
+static struct resource_table empty_rsc_tbl = {
+	.ver = 1,
+	.num = 0,
+};
+
+static struct resource_table *slim_rproc_find_rsc_table(struct rproc *rproc,
+					       const struct firmware *fw,
+					       int *tablesz)
+{
+	*tablesz = sizeof(empty_rsc_tbl);
+	return &empty_rsc_tbl;
+}
+
+static struct rproc_fw_ops slim_rproc_fw_ops = {
+	.find_rsc_table = slim_rproc_find_rsc_table,
+};
+
+/**
+ * st_slim_rproc_alloc() - allocate and initialise slim rproc
+ * @pdev: Pointer to the platform_device struct
+ * @fw_name: Name of firmware for rproc to use
+ *
+ * Function for allocating and initialising a slim rproc for use by
+ * device drivers whose IP is based around the SLIM core. It
+ * obtains and enables any clocks required by the SLIM core and also
+ * ioremaps the various IO.
+ *
+ * Returns st_slim_rproc pointer or PTR_ERR() on error.
+ */
+
+struct st_slim_rproc *st_slim_rproc_alloc(struct platform_device *pdev,
+				char *fw_name)
+{
+	struct device *dev = &pdev->dev;
+	struct st_slim_rproc *slim_rproc;
+	struct device_node *np = dev->of_node;
+	struct rproc *rproc;
+	struct resource *res;
+	int err, i;
+	const struct rproc_fw_ops *elf_ops;
+
+	if (!fw_name)
+		return ERR_PTR(-EINVAL);
+
+	if (!of_device_is_compatible(np, "st,slim-rproc"))
+		return ERR_PTR(-EINVAL);
+
+	rproc = rproc_alloc(dev, np->name, &slim_rproc_ops,
+			fw_name, sizeof(*slim_rproc));
+	if (!rproc)
+		return ERR_PTR(-ENOMEM);
+
+	rproc->has_iommu = false;
+
+	slim_rproc = rproc->priv;
+	slim_rproc->rproc = rproc;
+
+	elf_ops = rproc->fw_ops;
+	/* Use some generic elf ops */
+	slim_rproc_fw_ops.load = elf_ops->load;
+	slim_rproc_fw_ops.sanity_check = elf_ops->sanity_check;
+
+	rproc->fw_ops = &slim_rproc_fw_ops;
+
+	/* get imem and dmem */
+	for (i = 0; i < ARRAY_SIZE(mem_names); i++) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						mem_names[i]);
+
+		slim_rproc->mem[i].cpu_addr = devm_ioremap_resource(dev, res);
+		if (IS_ERR(slim_rproc->mem[i].cpu_addr)) {
+			dev_err(&pdev->dev, "devm_ioremap_resource failed\n");
+			err = PTR_ERR(slim_rproc->mem[i].cpu_addr);
+			goto err;
+		}
+		slim_rproc->mem[i].bus_addr = res->start;
+		slim_rproc->mem[i].size = resource_size(res);
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "slimcore");
+	slim_rproc->slimcore = devm_ioremap_resource(dev, res);
+	if (IS_ERR(slim_rproc->slimcore)) {
+		dev_err(&pdev->dev, "failed to ioremap slimcore IO\n");
+		err = PTR_ERR(slim_rproc->slimcore);
+		goto err;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "peripherals");
+	slim_rproc->peri = devm_ioremap_resource(dev, res);
+	if (IS_ERR(slim_rproc->peri)) {
+		dev_err(&pdev->dev, "failed to ioremap peripherals IO\n");
+		err = PTR_ERR(slim_rproc->peri);
+		goto err;
+	}
+
+	err = slim_clk_get(slim_rproc, dev);
+	if (err)
+		goto err;
+
+	err = slim_clk_enable(slim_rproc);
+	if (err) {
+		dev_err(dev, "Failed to enable clocks\n");
+		goto err_clk_put;
+	}
+
+	/* Register as a remoteproc device */
+	err = rproc_add(rproc);
+	if (err) {
+		dev_err(dev, "registration of slim remoteproc failed\n");
+		goto err_clk_dis;
+	}
+
+	return slim_rproc;
+
+err_clk_dis:
+	slim_clk_disable(slim_rproc);
+err_clk_put:
+	for (i = 0; i < ST_SLIM_MAX_CLK && slim_rproc->clks[i]; i++)
+		clk_put(slim_rproc->clks[i]);
+err:
+	rproc_put(rproc);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(st_slim_rproc_alloc);
+
+/**
+  * st_slim_rproc_put() - put slim rproc resources
+  * @slim_rproc: Pointer to the st_slim_rproc struct
+  *
+  * Function for calling respective _put() functions on slim_rproc resources.
+  *
+  */
+void st_slim_rproc_put(struct st_slim_rproc *slim_rproc)
+{
+	int clk;
+
+	if (!slim_rproc)
+		return;
+
+	slim_clk_disable(slim_rproc);
+
+	for (clk = 0; clk < ST_SLIM_MAX_CLK && slim_rproc->clks[clk]; clk++)
+		clk_put(slim_rproc->clks[clk]);
+
+	rproc_del(slim_rproc->rproc);
+	rproc_put(slim_rproc->rproc);
+}
+EXPORT_SYMBOL(st_slim_rproc_put);
+
+MODULE_AUTHOR("Peter Griffin <peter.griffin@linaro.org>");
+MODULE_DESCRIPTION("STMicroelectronics SLIM core rproc driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/remoteproc/st_slim_rproc.h b/include/linux/remoteproc/st_slim_rproc.h
new file mode 100644
index 000000000000..4155556fa4b2
--- /dev/null
+++ b/include/linux/remoteproc/st_slim_rproc.h
@@ -0,0 +1,58 @@
+/*
+ * SLIM core rproc driver header
+ *
+ * Copyright (C) 2016 STMicroelectronics
+ *
+ * Author: Peter Griffin <peter.griffin@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _ST_REMOTEPROC_SLIM_H
+#define _ST_REMOTEPROC_SLIM_H
+
+#define ST_SLIM_MEM_MAX 2
+#define ST_SLIM_MAX_CLK 4
+
+enum {
+	ST_SLIM_DMEM,
+	ST_SLIM_IMEM,
+};
+
+/**
+ * struct st_slim_mem - slim internal memory structure
+ * @cpu_addr: MPU virtual address of the memory region
+ * @bus_addr: Bus address used to access the memory region
+ * @size: Size of the memory region
+ */
+struct st_slim_mem {
+	void __iomem *cpu_addr;
+	phys_addr_t bus_addr;
+	size_t size;
+};
+
+/**
+ * struct st_slim_rproc - SLIM slim core
+ * @rproc: rproc handle
+ * @mem: slim memory information
+ * @slimcore: slim slimcore regs
+ * @peri: slim peripheral regs
+ * @clks: slim clocks
+ */
+struct st_slim_rproc {
+	struct rproc *rproc;
+	struct st_slim_mem mem[ST_SLIM_MEM_MAX];
+	void __iomem *slimcore;
+	void __iomem *peri;
+
+	/* st_slim_rproc private */
+	struct clk *clks[ST_SLIM_MAX_CLK];
+};
+
+struct st_slim_rproc *st_slim_rproc_alloc(struct platform_device *pdev,
+					char *fw_name);
+void st_slim_rproc_put(struct st_slim_rproc *slim_rproc);
+
+#endif
-- 
cgit v1.2.3


From 1a3f060c1a47dba4e12ac21ce62b57666b9c4e95 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 17 Oct 2016 19:15:44 -0700
Subject: net: Introduce new api for walking upper and lower devices

This patch introduces netdev_walk_all_upper_dev_rcu,
netdev_walk_all_lower_dev and netdev_walk_all_lower_dev_rcu. These
functions recursively walk the adj_list of devices to determine all upper
and lower devices.

The functions take a callback function that is invoked for each device
in the list. If the callback returns non-0, the walk is terminated and
the functions return that code back to callers.

v3
- simplified netdev_has_upper_dev_all_rcu and __netdev_has_upper_dev and
  removed typecast as suggested by Stephen

v2
- fixed definition of netdev_next_lower_dev_rcu to mirror the upper_dev
  version.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  17 +++++
 net/core/dev.c            | 155 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 172 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bf341b65ca5e..a5902d995907 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3778,6 +3778,14 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 	     updev; \
 	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
 
+int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
+				  int (*fn)(struct net_device *upper_dev,
+					    void *data),
+				  void *data);
+
+bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
+				  struct net_device *upper_dev);
+
 void *netdev_lower_get_next_private(struct net_device *dev,
 				    struct list_head **iter);
 void *netdev_lower_get_next_private_rcu(struct net_device *dev,
@@ -3821,6 +3829,15 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
 	     ldev; \
 	     ldev = netdev_all_lower_get_next_rcu(dev, &(iter)))
 
+int netdev_walk_all_lower_dev(struct net_device *dev,
+			      int (*fn)(struct net_device *lower_dev,
+					void *data),
+			      void *data);
+int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+				  int (*fn)(struct net_device *lower_dev,
+					    void *data),
+				  void *data);
+
 void *netdev_adjacent_get_private(struct list_head *adj_list);
 void *netdev_lower_get_first_private_rcu(struct net_device *dev);
 struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index f67fd16615bb..fc48337cfab8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5155,6 +5155,31 @@ bool netdev_has_upper_dev(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
 
+/**
+ * netdev_has_upper_dev_all - Check if device is linked to an upper device
+ * @dev: device
+ * @upper_dev: upper device to check
+ *
+ * Find out if a device is linked to specified upper device and return true
+ * in case it is. Note that this checks the entire upper device chain.
+ * The caller must hold rcu lock.
+ */
+
+static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+{
+	struct net_device *dev = data;
+
+	return upper_dev == dev;
+}
+
+bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
+				  struct net_device *upper_dev)
+{
+	return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+					       upper_dev);
+}
+EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
+
 /**
  * netdev_has_any_upper_dev - Check if device is linked to some device
  * @dev: device
@@ -5255,6 +5280,51 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
 
+static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
+						    struct list_head **iter)
+{
+	struct netdev_adjacent *upper;
+
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
+
+	upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+	if (&upper->list == &dev->adj_list.upper)
+		return NULL;
+
+	*iter = &upper->list;
+
+	return upper->dev;
+}
+
+int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
+				  int (*fn)(struct net_device *dev,
+					    void *data),
+				  void *data)
+{
+	struct net_device *udev;
+	struct list_head *iter;
+	int ret;
+
+	for (iter = &dev->adj_list.upper,
+	     udev = netdev_next_upper_dev_rcu(dev, &iter);
+	     udev;
+	     udev = netdev_next_upper_dev_rcu(dev, &iter)) {
+		/* first is the upper device itself */
+		ret = fn(udev, data);
+		if (ret)
+			return ret;
+
+		/* then look at all of its upper devices */
+		ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
+
 /**
  * netdev_lower_get_next_private - Get the next ->private from the
  *				   lower neighbour list
@@ -5361,6 +5431,49 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list
 }
 EXPORT_SYMBOL(netdev_all_lower_get_next);
 
+static struct net_device *netdev_next_lower_dev(struct net_device *dev,
+						struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_entry(*iter, struct netdev_adjacent, list);
+
+	if (&lower->list == &dev->adj_list.lower)
+		return NULL;
+
+	*iter = lower->list.next;
+
+	return lower->dev;
+}
+
+int netdev_walk_all_lower_dev(struct net_device *dev,
+			      int (*fn)(struct net_device *dev,
+					void *data),
+			      void *data)
+{
+	struct net_device *ldev;
+	struct list_head *iter;
+	int ret;
+
+	for (iter = &dev->adj_list.lower,
+	     ldev = netdev_next_lower_dev(dev, &iter);
+	     ldev;
+	     ldev = netdev_next_lower_dev(dev, &iter)) {
+		/* first is the lower device itself */
+		ret = fn(ldev, data);
+		if (ret)
+			return ret;
+
+		/* then look at all of its lower devices */
+		ret = netdev_walk_all_lower_dev(ldev, fn, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
+
 /**
  * netdev_all_lower_get_next_rcu - Get the next device from all
  *				   lower neighbour list, RCU variant
@@ -5382,6 +5495,48 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
 
+static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+						    struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+	if (&lower->list == &dev->adj_list.lower)
+		return NULL;
+
+	*iter = &lower->list;
+
+	return lower->dev;
+}
+
+int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+				  int (*fn)(struct net_device *dev,
+					    void *data),
+				  void *data)
+{
+	struct net_device *ldev;
+	struct list_head *iter;
+	int ret;
+
+	for (iter = &dev->adj_list.lower,
+	     ldev = netdev_next_lower_dev_rcu(dev, &iter);
+	     ldev;
+	     ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
+		/* first is the lower device itself */
+		ret = fn(ldev, data);
+		if (ret)
+			return ret;
+
+		/* then look at all of its lower devices */
+		ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);
+
 /**
  * netdev_lower_get_first_private_rcu - Get the first ->private from the
  *				       lower neighbour list, RCU
-- 
cgit v1.2.3


From f1170fd462c67c4ae2f20734566d94e0f8f62f69 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 17 Oct 2016 19:15:51 -0700
Subject: net: Remove all_adj_list and its references

Only direct adjacencies are maintained. All upper or lower devices can
be learned via the new walk API which recursively walks the adj_list for
upper devices or lower devices.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  25 ------
 net/core/dev.c            | 223 ++++------------------------------------------
 2 files changed, 18 insertions(+), 230 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a5902d995907..458c87631e7f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1456,7 +1456,6 @@ enum netdev_priv_flags {
  *	@ptype_specific: Device-specific, protocol-specific packet handlers
  *
  *	@adj_list:	Directly linked devices, like slaves for bonding
- *	@all_adj_list:	All linked devices, *including* neighbours
  *	@features:	Currently active device features
  *	@hw_features:	User-changeable features
  *
@@ -1675,11 +1674,6 @@ struct net_device {
 		struct list_head lower;
 	} adj_list;
 
-	struct {
-		struct list_head upper;
-		struct list_head lower;
-	} all_adj_list;
-
 	netdev_features_t	features;
 	netdev_features_t	hw_features;
 	netdev_features_t	wanted_features;
@@ -3771,13 +3765,6 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 	     updev; \
 	     updev = netdev_upper_get_next_dev_rcu(dev, &(iter)))
 
-/* iterate through upper list, must be called under RCU read lock */
-#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
-	for (iter = &(dev)->all_adj_list.upper, \
-	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \
-	     updev; \
-	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
-
 int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
 				  int (*fn)(struct net_device *upper_dev,
 					    void *data),
@@ -3817,18 +3804,6 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev,
 struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
 						 struct list_head **iter);
 
-#define netdev_for_each_all_lower_dev(dev, ldev, iter) \
-	for (iter = (dev)->all_adj_list.lower.next, \
-	     ldev = netdev_all_lower_get_next(dev, &(iter)); \
-	     ldev; \
-	     ldev = netdev_all_lower_get_next(dev, &(iter)))
-
-#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \
-	for (iter = (dev)->all_adj_list.lower.next, \
-	     ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \
-	     ldev; \
-	     ldev = netdev_all_lower_get_next_rcu(dev, &(iter)))
-
 int netdev_walk_all_lower_dev(struct net_device *dev,
 			      int (*fn)(struct net_device *lower_dev,
 					void *data),
diff --git a/net/core/dev.c b/net/core/dev.c
index fc48337cfab8..a9fe14908b44 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5137,6 +5137,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
 	return NULL;
 }
 
+static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+{
+	struct net_device *dev = data;
+
+	return upper_dev == dev;
+}
+
 /**
  * netdev_has_upper_dev - Check if device is linked to an upper device
  * @dev: device
@@ -5151,7 +5158,8 @@ bool netdev_has_upper_dev(struct net_device *dev,
 {
 	ASSERT_RTNL();
 
-	return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
+	return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+					     upper_dev);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
 
@@ -5165,13 +5173,6 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
  * The caller must hold rcu lock.
  */
 
-static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
-{
-	struct net_device *dev = data;
-
-	return upper_dev == dev;
-}
-
 bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
 				  struct net_device *upper_dev)
 {
@@ -5191,7 +5192,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev)
 {
 	ASSERT_RTNL();
 
-	return !list_empty(&dev->all_adj_list.upper);
+	return !list_empty(&dev->adj_list.upper);
 }
 
 /**
@@ -5254,32 +5255,6 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
 
-/**
- * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
- * @dev: device
- * @iter: list_head ** of the current position
- *
- * Gets the next device from the dev's upper list, starting from iter
- * position. The caller must hold RCU read lock.
- */
-struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
-						     struct list_head **iter)
-{
-	struct netdev_adjacent *upper;
-
-	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
-
-	upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
-
-	if (&upper->list == &dev->all_adj_list.upper)
-		return NULL;
-
-	*iter = &upper->list;
-
-	return upper->dev;
-}
-EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
-
 static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
 						    struct list_head **iter)
 {
@@ -5406,31 +5381,6 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
 }
 EXPORT_SYMBOL(netdev_lower_get_next);
 
-/**
- * netdev_all_lower_get_next - Get the next device from all lower neighbour list
- * @dev: device
- * @iter: list_head ** of the current position
- *
- * Gets the next netdev_adjacent from the dev's all lower neighbour
- * list, starting from iter position. The caller must hold RTNL lock or
- * its own locking that guarantees that the neighbour all lower
- * list will remain unchanged.
- */
-struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
-{
-	struct netdev_adjacent *lower;
-
-	lower = list_entry(*iter, struct netdev_adjacent, list);
-
-	if (&lower->list == &dev->all_adj_list.lower)
-		return NULL;
-
-	*iter = lower->list.next;
-
-	return lower->dev;
-}
-EXPORT_SYMBOL(netdev_all_lower_get_next);
-
 static struct net_device *netdev_next_lower_dev(struct net_device *dev,
 						struct list_head **iter)
 {
@@ -5474,27 +5424,6 @@ int netdev_walk_all_lower_dev(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
 
-/**
- * netdev_all_lower_get_next_rcu - Get the next device from all
- *				   lower neighbour list, RCU variant
- * @dev: device
- * @iter: list_head ** of the current position
- *
- * Gets the next netdev_adjacent from the dev's all lower neighbour
- * list, starting from iter position. The caller must hold RCU read lock.
- */
-struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
-						 struct list_head **iter)
-{
-	struct netdev_adjacent *lower;
-
-	lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
-				       struct netdev_adjacent, list);
-
-	return lower ? lower->dev : NULL;
-}
-EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
-
 static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
 						    struct list_head **iter)
 {
@@ -5722,15 +5651,6 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
 	return 0;
 }
 
-static int __netdev_adjacent_dev_link(struct net_device *dev,
-				      struct net_device *upper_dev)
-{
-	return __netdev_adjacent_dev_link_lists(dev, upper_dev,
-						&dev->all_adj_list.upper,
-						&upper_dev->all_adj_list.lower,
-						NULL, false);
-}
-
 static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
 					       struct net_device *upper_dev,
 					       u16 ref_nr,
@@ -5741,40 +5661,19 @@ static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
 	__netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
 }
 
-static void __netdev_adjacent_dev_unlink(struct net_device *dev,
-					 struct net_device *upper_dev,
-					 u16 ref_nr)
-{
-	__netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr,
-					   &dev->all_adj_list.upper,
-					   &upper_dev->all_adj_list.lower);
-}
-
 static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
 						struct net_device *upper_dev,
 						void *private, bool master)
 {
-	int ret = __netdev_adjacent_dev_link(dev, upper_dev);
-
-	if (ret)
-		return ret;
-
-	ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
-					       &dev->adj_list.upper,
-					       &upper_dev->adj_list.lower,
-					       private, master);
-	if (ret) {
-		__netdev_adjacent_dev_unlink(dev, upper_dev, 1);
-		return ret;
-	}
-
-	return 0;
+	return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+						&dev->adj_list.upper,
+						&upper_dev->adj_list.lower,
+						private, master);
 }
 
 static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
 						   struct net_device *upper_dev)
 {
-	__netdev_adjacent_dev_unlink(dev, upper_dev, 1);
 	__netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
 					   &dev->adj_list.upper,
 					   &upper_dev->adj_list.lower);
@@ -5785,7 +5684,6 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 				   void *upper_priv, void *upper_info)
 {
 	struct netdev_notifier_changeupper_info changeupper_info;
-	struct netdev_adjacent *i, *j, *to_i, *to_j;
 	int ret = 0;
 
 	ASSERT_RTNL();
@@ -5794,10 +5692,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 		return -EBUSY;
 
 	/* To prevent loops, check if dev is not upper device to upper_dev. */
-	if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
+	if (netdev_has_upper_dev(upper_dev, dev))
 		return -EBUSY;
 
-	if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
+	if (netdev_has_upper_dev(dev, upper_dev))
 		return -EEXIST;
 
 	if (master && netdev_master_upper_dev_get(dev))
@@ -5819,80 +5717,15 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	if (ret)
 		return ret;
 
-	/* Now that we linked these devs, make all the upper_dev's
-	 * all_adj_list.upper visible to every dev's all_adj_list.lower an
-	 * versa, and don't forget the devices itself. All of these
-	 * links are non-neighbours.
-	 */
-	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
-		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
-			pr_debug("Interlinking %s with %s, non-neighbour\n",
-				 i->dev->name, j->dev->name);
-			ret = __netdev_adjacent_dev_link(i->dev, j->dev);
-			if (ret)
-				goto rollback_mesh;
-		}
-	}
-
-	/* add dev to every upper_dev's upper device */
-	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
-		pr_debug("linking %s's upper device %s with %s\n",
-			 upper_dev->name, i->dev->name, dev->name);
-		ret = __netdev_adjacent_dev_link(dev, i->dev);
-		if (ret)
-			goto rollback_upper_mesh;
-	}
-
-	/* add upper_dev to every dev's lower device */
-	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
-		pr_debug("linking %s's lower device %s with %s\n", dev->name,
-			 i->dev->name, upper_dev->name);
-		ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
-		if (ret)
-			goto rollback_lower_mesh;
-	}
-
 	ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
 					    &changeupper_info.info);
 	ret = notifier_to_errno(ret);
 	if (ret)
-		goto rollback_lower_mesh;
+		goto rollback;
 
 	return 0;
 
-rollback_lower_mesh:
-	to_i = i;
-	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
-		if (i == to_i)
-			break;
-		__netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
-	}
-
-	i = NULL;
-
-rollback_upper_mesh:
-	to_i = i;
-	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
-		if (i == to_i)
-			break;
-		__netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
-	}
-
-	i = j = NULL;
-
-rollback_mesh:
-	to_i = i;
-	to_j = j;
-	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
-		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
-			if (i == to_i && j == to_j)
-				break;
-			__netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
-		}
-		if (i == to_i)
-			break;
-	}
-
+rollback:
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
 	return ret;
@@ -5949,7 +5782,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 			     struct net_device *upper_dev)
 {
 	struct netdev_notifier_changeupper_info changeupper_info;
-	struct netdev_adjacent *i, *j;
 	ASSERT_RTNL();
 
 	changeupper_info.upper_dev = upper_dev;
@@ -5961,23 +5793,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
-	/* Here is the tricky part. We must remove all dev's lower
-	 * devices from all upper_dev's upper devices and vice
-	 * versa, to maintain the graph relationship.
-	 */
-	list_for_each_entry(i, &dev->all_adj_list.lower, list)
-		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
-			__netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
-
-	/* remove also the devices itself from lower/upper device
-	 * list
-	 */
-	list_for_each_entry(i, &dev->all_adj_list.lower, list)
-		__netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
-
-	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
-		__netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
-
 	call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
 				      &changeupper_info.info);
 }
@@ -7679,8 +7494,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->link_watch_list);
 	INIT_LIST_HEAD(&dev->adj_list.upper);
 	INIT_LIST_HEAD(&dev->adj_list.lower);
-	INIT_LIST_HEAD(&dev->all_adj_list.upper);
-	INIT_LIST_HEAD(&dev->all_adj_list.lower);
 	INIT_LIST_HEAD(&dev->ptype_all);
 	INIT_LIST_HEAD(&dev->ptype_specific);
 #ifdef CONFIG_NET_SCHED
-- 
cgit v1.2.3


From 1f9127caece42514a47011326b83ad93d95cd5d7 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@ni.com>
Date: Mon, 17 Oct 2016 10:49:54 -0500
Subject: net: phy: Create phy_supported_speeds function which lists speeds
 currently supported by a phydevice

phy_supported_speeds provides a means to get a list of all the speeds a
phy device currently supports.

Signed-off-by: Zach Brown <zach.brown@ni.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 35 +++++++++++++++++++++++++++++++++++
 include/linux/phy.h   | 15 +++++++++++++++
 2 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 8b7659e94057..ee3c793124c7 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -260,6 +260,41 @@ static inline unsigned int phy_find_valid(unsigned int idx, u32 features)
 	return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1;
 }
 
+/**
+ * phy_supported_speeds - return all speeds currently supported by a phy device
+ * @phy: The phy device to return supported speeds of.
+ * @speeds: buffer to store supported speeds in.
+ * @size:   size of speeds buffer.
+ *
+ * Description: Returns the number of supported speeds, and fills the speeds
+ * buffer with the supported speeds. If speeds buffer is too small to contain
+ * all currently supported speeds, will return as many speeds as can fit.
+ */
+unsigned int phy_supported_speeds(struct phy_device *phy,
+				  unsigned int *speeds,
+				  unsigned int size)
+{
+	unsigned int count = 0;
+	unsigned int idx = 0;
+
+	while (idx < MAX_NUM_SETTINGS && count < size) {
+		idx = phy_find_valid(idx, phy->supported);
+
+		if (!(settings[idx].setting & phy->supported))
+			break;
+
+		/* Assumes settings are grouped by speed */
+		if ((count == 0) ||
+		    (speeds[count - 1] != settings[idx].speed)) {
+			speeds[count] = settings[idx].speed;
+			count++;
+		}
+		idx++;
+	}
+
+	return count;
+}
+
 /**
  * phy_check_valid - check if there is a valid PHY setting which matches
  *		     speed, duplex, and feature mask
diff --git a/include/linux/phy.h b/include/linux/phy.h
index c47378c93607..4b6c246c63bb 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -84,6 +84,21 @@ typedef enum {
 	PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
+/**
+ * phy_supported_speeds - return all speeds currently supported by a phy device
+ * @phy: The phy device to return supported speeds of.
+ * @speeds: buffer to store supported speeds in.
+ * @size: size of speeds buffer.
+ *
+ * Description: Returns the number of supported speeds, and
+ * fills the speeds * buffer with the supported speeds. If speeds buffer is
+ * too small to contain * all currently supported speeds, will return as
+ * many speeds as can fit.
+ */
+unsigned int phy_supported_speeds(struct phy_device *phy,
+				      unsigned int *speeds,
+				      unsigned int size);
+
 /**
  * It maps 'enum phy_interface_t' found in include/linux/phy.h
  * into the device tree binding of 'phy-mode', so that Ethernet
-- 
cgit v1.2.3


From 2e0bc452f4721520502575362a9cd3c1248d2337 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@ni.com>
Date: Mon, 17 Oct 2016 10:49:55 -0500
Subject: net: phy: leds: add support for led triggers on phy link state change

Create an option CONFIG_LED_TRIGGER_PHY (default n), which will create a
set of led triggers for each instantiated PHY device. There is one LED
trigger per link-speed, per-phy.
The triggers are registered during phy_attach and unregistered during
phy_detach.

This allows for a user to configure their system to allow a set of LEDs
not controlled by the phy to represent link state changes on the phy.
LEDS controlled by the phy are unaffected.

For example, we have a board where some of the leds in the
RJ45 socket are controlled by the phy, but others are not. Using the
triggers provided by this patch the leds not controlled by the phy can
be configured to show the current speed of the ethernet connection. The
leds controlled by the phy are unaffected.

Signed-off-by: Josh Cartwright <josh.cartwright@ni.com>
Signed-off-by: Nathan Sullivan <nathan.sullivan@ni.com>
Signed-off-by: Zach Brown <zach.brown@ni.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig            |  13 ++++
 drivers/net/phy/Makefile           |   1 +
 drivers/net/phy/phy.c              |   1 +
 drivers/net/phy/phy_device.c       |   5 ++
 drivers/net/phy/phy_led_triggers.c | 136 +++++++++++++++++++++++++++++++++++++
 include/linux/phy.h                |   7 ++
 include/linux/phy_led_triggers.h   |  51 ++++++++++++++
 7 files changed, 214 insertions(+)
 create mode 100644 drivers/net/phy/phy_led_triggers.c
 create mode 100644 include/linux/phy_led_triggers.h

(limited to 'include/linux')

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 2651c8d8de2f..45f68eaf9b79 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -15,6 +15,19 @@ if PHYLIB
 config SWPHY
 	bool
 
+config LED_TRIGGER_PHY
+	bool "Support LED triggers for tracking link state"
+	depends on LEDS_TRIGGERS
+	---help---
+	  Adds support for a set of LED trigger events per-PHY.  Link
+	  state change will trigger the events, for consumption by an
+	  LED class driver.  There are triggers for each link speed currently
+	  supported by the phy, and are of the form:
+	       <mii bus id>:<phy>:<speed>
+
+	  Where speed is in the form:
+		<Speed in megabits>Mbps or <Speed in gigabits>Gbps
+
 comment "MDIO bus device drivers"
 
 config MDIO_BCM_IPROC
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index e58667d111e7..86d12cd3fbf0 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -2,6 +2,7 @@
 
 libphy-y			:= phy.o phy_device.o mdio_bus.o mdio_device.o
 libphy-$(CONFIG_SWPHY)		+= swphy.o
+libphy-$(CONFIG_LED_TRIGGER_PHY)	+= phy_led_triggers.o
 
 obj-$(CONFIG_PHYLIB)		+= libphy.o
 
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index ee3c793124c7..2f94c60d4939 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -946,6 +946,7 @@ EXPORT_SYMBOL(phy_start);
 static void phy_adjust_link(struct phy_device *phydev)
 {
 	phydev->adjust_link(phydev->attached_dev);
+	phy_led_trigger_change_speed(phydev);
 }
 
 /**
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index ac440a815353..49a1c988d29c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -30,6 +30,7 @@
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/phy.h>
+#include <linux/phy_led_triggers.h>
 #include <linux/mdio.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
@@ -916,6 +917,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	else
 		phy_resume(phydev);
 
+	phy_led_triggers_register(phydev);
+
 	return err;
 
 error:
@@ -989,6 +992,8 @@ void phy_detach(struct phy_device *phydev)
 		}
 	}
 
+	phy_led_triggers_unregister(phydev);
+
 	/*
 	 * The phydev might go away on the put_device() below, so avoid
 	 * a use-after-free bug by reading the underlying bus first.
diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c
new file mode 100644
index 000000000000..cda600a1b766
--- /dev/null
+++ b/drivers/net/phy/phy_led_triggers.c
@@ -0,0 +1,136 @@
+/* Copyright (C) 2016 National Instruments Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/leds.h>
+#include <linux/phy.h>
+#include <linux/netdevice.h>
+
+static struct phy_led_trigger *phy_speed_to_led_trigger(struct phy_device *phy,
+							unsigned int speed)
+{
+	unsigned int i;
+
+	for (i = 0; i < phy->phy_num_led_triggers; i++) {
+		if (phy->phy_led_triggers[i].speed == speed)
+			return &phy->phy_led_triggers[i];
+	}
+	return NULL;
+}
+
+void phy_led_trigger_change_speed(struct phy_device *phy)
+{
+	struct phy_led_trigger *plt;
+
+	if (!phy->link)
+		goto out_change_speed;
+
+	if (phy->speed == 0)
+		return;
+
+	plt = phy_speed_to_led_trigger(phy, phy->speed);
+	if (!plt) {
+		netdev_alert(phy->attached_dev,
+			     "No phy led trigger registered for speed(%d)\n",
+			     phy->speed);
+		goto out_change_speed;
+	}
+
+	if (plt != phy->last_triggered) {
+		led_trigger_event(&phy->last_triggered->trigger, LED_OFF);
+		led_trigger_event(&plt->trigger, LED_FULL);
+		phy->last_triggered = plt;
+	}
+	return;
+
+out_change_speed:
+	if (phy->last_triggered) {
+		led_trigger_event(&phy->last_triggered->trigger,
+				  LED_OFF);
+		phy->last_triggered = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(phy_led_trigger_change_speed);
+
+static int phy_led_trigger_register(struct phy_device *phy,
+				    struct phy_led_trigger *plt,
+				    unsigned int speed)
+{
+	char name_suffix[PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE];
+
+	plt->speed = speed;
+
+	if (speed < SPEED_1000)
+		snprintf(name_suffix, sizeof(name_suffix), "%dMbps", speed);
+	else if (speed == SPEED_2500)
+		snprintf(name_suffix, sizeof(name_suffix), "2.5Gbps");
+	else
+		snprintf(name_suffix, sizeof(name_suffix), "%dGbps",
+			 DIV_ROUND_CLOSEST(speed, 1000));
+
+	snprintf(plt->name, sizeof(plt->name), PHY_ID_FMT ":%s",
+		 phy->mdio.bus->id, phy->mdio.addr, name_suffix);
+	plt->trigger.name = plt->name;
+
+	return led_trigger_register(&plt->trigger);
+}
+
+static void phy_led_trigger_unregister(struct phy_led_trigger *plt)
+{
+	led_trigger_unregister(&plt->trigger);
+}
+
+int phy_led_triggers_register(struct phy_device *phy)
+{
+	int i, err;
+	unsigned int speeds[50];
+
+	phy->phy_num_led_triggers = phy_supported_speeds(phy, speeds,
+							 ARRAY_SIZE(speeds));
+	if (!phy->phy_num_led_triggers)
+		return 0;
+
+	phy->phy_led_triggers = devm_kzalloc(&phy->mdio.dev,
+					    sizeof(struct phy_led_trigger) *
+						   phy->phy_num_led_triggers,
+					    GFP_KERNEL);
+	if (!phy->phy_led_triggers)
+		return -ENOMEM;
+
+	for (i = 0; i < phy->phy_num_led_triggers; i++) {
+		err = phy_led_trigger_register(phy, &phy->phy_led_triggers[i],
+					       speeds[i]);
+		if (err)
+			goto out_unreg;
+	}
+
+	phy->last_triggered = NULL;
+	phy_led_trigger_change_speed(phy);
+
+	return 0;
+out_unreg:
+	while (i--)
+		phy_led_trigger_unregister(&phy->phy_led_triggers[i]);
+	devm_kfree(&phy->mdio.dev, phy->phy_led_triggers);
+	return err;
+}
+EXPORT_SYMBOL_GPL(phy_led_triggers_register);
+
+void phy_led_triggers_unregister(struct phy_device *phy)
+{
+	int i;
+
+	for (i = 0; i < phy->phy_num_led_triggers; i++)
+		phy_led_trigger_unregister(&phy->phy_led_triggers[i]);
+
+	devm_kfree(&phy->mdio.dev, phy->phy_led_triggers);
+}
+EXPORT_SYMBOL_GPL(phy_led_triggers_unregister);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4b6c246c63bb..e7e1fd382564 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -25,6 +25,7 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/mod_devicetable.h>
+#include <linux/phy_led_triggers.h>
 
 #include <linux/atomic.h>
 
@@ -420,6 +421,12 @@ struct phy_device {
 
 	int link_timeout;
 
+#ifdef CONFIG_LED_TRIGGER_PHY
+	struct phy_led_trigger *phy_led_triggers;
+	unsigned int phy_num_led_triggers;
+	struct phy_led_trigger *last_triggered;
+#endif
+
 	/*
 	 * Interrupt number for this PHY
 	 * -1 means no interrupt
diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h
new file mode 100644
index 000000000000..a2daea0a37d2
--- /dev/null
+++ b/include/linux/phy_led_triggers.h
@@ -0,0 +1,51 @@
+/* Copyright (C) 2016 National Instruments Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef __PHY_LED_TRIGGERS
+#define __PHY_LED_TRIGGERS
+
+struct phy_device;
+
+#ifdef CONFIG_LED_TRIGGER_PHY
+
+#include <linux/leds.h>
+
+#define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE	10
+#define PHY_MII_BUS_ID_SIZE	(20 - 3)
+
+#define PHY_LINK_LED_TRIGGER_NAME_SIZE (PHY_MII_BUS_ID_SIZE + \
+				       FIELD_SIZEOF(struct mdio_device, addr)+\
+				       PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE)
+
+struct phy_led_trigger {
+	struct led_trigger trigger;
+	char name[PHY_LINK_LED_TRIGGER_NAME_SIZE];
+	unsigned int speed;
+};
+
+
+extern int phy_led_triggers_register(struct phy_device *phy);
+extern void phy_led_triggers_unregister(struct phy_device *phy);
+extern void phy_led_trigger_change_speed(struct phy_device *phy);
+
+#else
+
+static inline int phy_led_triggers_register(struct phy_device *phy)
+{
+	return 0;
+}
+static inline void phy_led_triggers_unregister(struct phy_device *phy) { }
+static inline void phy_led_trigger_change_speed(struct phy_device *phy) { }
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From 797476b88bde2a6001f9552f383f147e58c1a330 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@hgst.com>
Date: Tue, 18 Oct 2016 15:40:29 +0900
Subject: block: Add 'zoned' queue limit

Add the zoned queue limit to indicate the zoning model of a block device.
Defined values are 0 (BLK_ZONED_NONE) for regular block devices,
1 (BLK_ZONED_HA) for host-aware zone block devices and 2 (BLK_ZONED_HM)
for host-managed zone block devices. The standards defined drive managed
model is not defined here since these block devices do not provide any
command for accessing zone information. Drive managed model devices will
be reported as BLK_ZONED_NONE.

The helper functions blk_queue_zoned_model and bdev_zoned_model return
the zoned limit and the functions blk_queue_is_zoned and bdev_is_zoned
return a boolean for callers to test if a block device is zoned.

The zoned attribute is also exported as a string to applications via
sysfs. BLK_ZONED_NONE shows as "none", BLK_ZONED_HA as "host-aware" and
BLK_ZONED_HM as "host-managed".

Signed-off-by: Damien Le Moal <damien.lemoal@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Tested-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/ABI/testing/sysfs-block | 16 ++++++++++++
 block/blk-settings.c                  |  1 +
 block/blk-sysfs.c                     | 18 ++++++++++++++
 include/linux/blkdev.h                | 47 +++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 71d184dbb70d..75a5055a722b 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -235,3 +235,19 @@ Description:
 		write_same_max_bytes is 0, write same is not supported
 		by the device.
 
+What:		/sys/block/<disk>/queue/zoned
+Date:		September 2016
+Contact:	Damien Le Moal <damien.lemoal@hgst.com>
+Description:
+		zoned indicates if the device is a zoned block device
+		and the zone model of the device if it is indeed zoned.
+		The possible values indicated by zoned are "none" for
+		regular block devices and "host-aware" or "host-managed"
+		for zoned block devices. The characteristics of
+		host-aware and host-managed zoned block devices are
+		described in the ZBC (Zoned Block Commands) and ZAC
+		(Zoned Device ATA Command Set) standards. These standards
+		also define the "drive-managed" zone model. However,
+		since drive-managed zoned block devices do not support
+		zone commands, they will be treated as regular block
+		devices and zoned will report "none".
diff --git a/block/blk-settings.c b/block/blk-settings.c
index f679ae122843..b1d5b7fa4d07 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -107,6 +107,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->io_opt = 0;
 	lim->misaligned = 0;
 	lim->cluster = 1;
+	lim->zoned = BLK_ZONED_NONE;
 }
 EXPORT_SYMBOL(blk_set_default_limits);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9cc8d7c5439a..ff9cd9cd35a3 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -257,6 +257,18 @@ QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
 QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
 #undef QUEUE_SYSFS_BIT_FNS
 
+static ssize_t queue_zoned_show(struct request_queue *q, char *page)
+{
+	switch (blk_queue_zoned_model(q)) {
+	case BLK_ZONED_HA:
+		return sprintf(page, "host-aware\n");
+	case BLK_ZONED_HM:
+		return sprintf(page, "host-managed\n");
+	default:
+		return sprintf(page, "none\n");
+	}
+}
+
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
 {
 	return queue_var_show((blk_queue_nomerges(q) << 1) |
@@ -485,6 +497,11 @@ static struct queue_sysfs_entry queue_nonrot_entry = {
 	.store = queue_store_nonrot,
 };
 
+static struct queue_sysfs_entry queue_zoned_entry = {
+	.attr = {.name = "zoned", .mode = S_IRUGO },
+	.show = queue_zoned_show,
+};
+
 static struct queue_sysfs_entry queue_nomerges_entry = {
 	.attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_nomerges_show,
@@ -546,6 +563,7 @@ static struct attribute *default_attrs[] = {
 	&queue_discard_zeroes_data_entry.attr,
 	&queue_write_same_max_entry.attr,
 	&queue_nonrot_entry.attr,
+	&queue_zoned_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c47c358ba052..f19e16bb43d1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -261,6 +261,15 @@ struct blk_queue_tag {
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 
+/*
+ * Zoned block device models (zoned limit).
+ */
+enum blk_zoned_model {
+	BLK_ZONED_NONE,	/* Regular block device */
+	BLK_ZONED_HA,	/* Host-aware zoned block device */
+	BLK_ZONED_HM,	/* Host-managed zoned block device */
+};
+
 struct queue_limits {
 	unsigned long		bounce_pfn;
 	unsigned long		seg_boundary_mask;
@@ -290,6 +299,7 @@ struct queue_limits {
 	unsigned char		cluster;
 	unsigned char		discard_zeroes_data;
 	unsigned char		raid_partial_stripes_expensive;
+	enum blk_zoned_model	zoned;
 };
 
 struct request_queue {
@@ -627,6 +637,23 @@ static inline unsigned int blk_queue_cluster(struct request_queue *q)
 	return q->limits.cluster;
 }
 
+static inline enum blk_zoned_model
+blk_queue_zoned_model(struct request_queue *q)
+{
+	return q->limits.zoned;
+}
+
+static inline bool blk_queue_is_zoned(struct request_queue *q)
+{
+	switch (blk_queue_zoned_model(q)) {
+	case BLK_ZONED_HA:
+	case BLK_ZONED_HM:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
  * We regard a request as sync, if either a read or a sync write
  */
@@ -1354,6 +1381,26 @@ static inline unsigned int bdev_write_same(struct block_device *bdev)
 	return 0;
 }
 
+static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return blk_queue_zoned_model(q);
+
+	return BLK_ZONED_NONE;
+}
+
+static inline bool bdev_is_zoned(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return blk_queue_is_zoned(q);
+
+	return false;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
-- 
cgit v1.2.3


From 2d253440b5afb128d22ccdae812dde9ba77a2cca Mon Sep 17 00:00:00 2001
From: Shaun Tancheff <shaun.tancheff@seagate.com>
Date: Tue, 18 Oct 2016 15:40:32 +0900
Subject: block: Define zoned block device operations

Define REQ_OP_ZONE_REPORT and REQ_OP_ZONE_RESET for handling zones of
host-managed and host-aware zoned block devices. With with these two
new operations, the total number of operations defined reaches 8 and
still fits with the 3 bits definition of REQ_OP_BITS.

Signed-off-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Damien Le Moal <damien.lemoal@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c          | 4 ++++
 include/linux/blk_types.h | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 14d7c0740dc0..e4eda5d2aa56 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1941,6 +1941,10 @@ generic_make_request_checks(struct bio *bio)
 	case REQ_OP_WRITE_SAME:
 		if (!bdev_write_same(bio->bi_bdev))
 			goto not_supported;
+	case REQ_OP_ZONE_REPORT:
+	case REQ_OP_ZONE_RESET:
+		if (!bdev_is_zoned(bio->bi_bdev))
+			goto not_supported;
 		break;
 	default:
 		break;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index cd395ecec99d..dd50dce89a80 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -243,6 +243,8 @@ enum req_op {
 	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
 	REQ_OP_WRITE_SAME,	/* write same block many times */
 	REQ_OP_FLUSH,		/* request for cache flush */
+	REQ_OP_ZONE_REPORT,	/* Get zone information */
+	REQ_OP_ZONE_RESET,	/* Reset a zone write pointer */
 };
 
 #define REQ_OP_BITS 3
-- 
cgit v1.2.3


From 6a0cb1bc106fc07ce0443303bcdb7f7da5131e5c Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 18 Oct 2016 15:40:33 +0900
Subject: block: Implement support for zoned block devices

Implement zoned block device zone information reporting and reset.
Zone information are reported as struct blk_zone. This implementation
does not differentiate between host-aware and host-managed device
models and is valid for both. Two functions are provided:
blkdev_report_zones for discovering the zone configuration of a
zoned block device, and blkdev_reset_zones for resetting the write
pointer of sequential zones. The helper function blk_queue_zone_size
and bdev_zone_size are also provided for, as the name suggest,
obtaining the zone size (in 512B sectors) of the zones of the device.

Signed-off-by: Hannes Reinecke <hare@suse.de>

[Damien: * Removed the zone cache
         * Implement report zones operation based on earlier proposal
           by Shaun Tancheff <shaun.tancheff@seagate.com>]
Signed-off-by: Damien Le Moal <damien.lemoal@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Tested-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/Kconfig                 |   8 ++
 block/Makefile                |   1 +
 block/blk-zoned.c             | 257 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h        |  31 +++++
 include/uapi/linux/Kbuild     |   1 +
 include/uapi/linux/blkzoned.h | 103 +++++++++++++++++
 6 files changed, 401 insertions(+)
 create mode 100644 block/blk-zoned.c
 create mode 100644 include/uapi/linux/blkzoned.h

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index 1d4d624492fc..6b0ad08f0677 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -89,6 +89,14 @@ config BLK_DEV_INTEGRITY
 	T10/SCSI Data Integrity Field or the T13/ATA External Path
 	Protection.  If in doubt, say N.
 
+config BLK_DEV_ZONED
+	bool "Zoned block device support"
+	---help---
+	Block layer zoned block device support. This option enables
+	support for ZAC/ZBC host-managed and host-aware zoned block devices.
+
+	Say yes here if you have a ZAC or ZBC storage device.
+
 config BLK_DEV_THROTTLING
 	bool "Block layer bio throttling support"
 	depends on BLK_CGROUP=y
diff --git a/block/Makefile b/block/Makefile
index 36acdd7545be..934dac73fb37 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
 obj-$(CONFIG_BLK_MQ_PCI)	+= blk-mq-pci.o
+obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
new file mode 100644
index 000000000000..1603573f9605
--- /dev/null
+++ b/block/blk-zoned.c
@@ -0,0 +1,257 @@
+/*
+ * Zoned block device handling
+ *
+ * Copyright (c) 2015, Hannes Reinecke
+ * Copyright (c) 2015, SUSE Linux GmbH
+ *
+ * Copyright (c) 2016, Damien Le Moal
+ * Copyright (c) 2016, Western Digital
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/blkdev.h>
+
+static inline sector_t blk_zone_start(struct request_queue *q,
+				      sector_t sector)
+{
+	sector_t zone_mask = blk_queue_zone_size(q) - 1;
+
+	return sector & ~zone_mask;
+}
+
+/*
+ * Check that a zone report belongs to the partition.
+ * If yes, fix its start sector and write pointer, copy it in the
+ * zone information array and return true. Return false otherwise.
+ */
+static bool blkdev_report_zone(struct block_device *bdev,
+			       struct blk_zone *rep,
+			       struct blk_zone *zone)
+{
+	sector_t offset = get_start_sect(bdev);
+
+	if (rep->start < offset)
+		return false;
+
+	rep->start -= offset;
+	if (rep->start + rep->len > bdev->bd_part->nr_sects)
+		return false;
+
+	if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
+		rep->wp = rep->start + rep->len;
+	else
+		rep->wp -= offset;
+	memcpy(zone, rep, sizeof(struct blk_zone));
+
+	return true;
+}
+
+/**
+ * blkdev_report_zones - Get zones information
+ * @bdev:	Target block device
+ * @sector:	Sector from which to report zones
+ * @zones:	Array of zone structures where to return the zones information
+ * @nr_zones:	Number of zone structures in the zone array
+ * @gfp_mask:	Memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ *    Get zone information starting from the zone containing @sector.
+ *    The number of zone information reported may be less than the number
+ *    requested by @nr_zones. The number of zones actually reported is
+ *    returned in @nr_zones.
+ */
+int blkdev_report_zones(struct block_device *bdev,
+			sector_t sector,
+			struct blk_zone *zones,
+			unsigned int *nr_zones,
+			gfp_t gfp_mask)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+	struct blk_zone_report_hdr *hdr;
+	unsigned int nrz = *nr_zones;
+	struct page *page;
+	unsigned int nr_rep;
+	size_t rep_bytes;
+	unsigned int nr_pages;
+	struct bio *bio;
+	struct bio_vec *bv;
+	unsigned int i, n, nz;
+	unsigned int ofst;
+	void *addr;
+	int ret = 0;
+
+	if (!q)
+		return -ENXIO;
+
+	if (!blk_queue_is_zoned(q))
+		return -EOPNOTSUPP;
+
+	if (!nrz)
+		return 0;
+
+	if (sector > bdev->bd_part->nr_sects) {
+		*nr_zones = 0;
+		return 0;
+	}
+
+	/*
+	 * The zone report has a header. So make room for it in the
+	 * payload. Also make sure that the report fits in a single BIO
+	 * that will not be split down the stack.
+	 */
+	rep_bytes = sizeof(struct blk_zone_report_hdr) +
+		sizeof(struct blk_zone) * nrz;
+	rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
+	if (rep_bytes > (queue_max_sectors(q) << 9))
+		rep_bytes = queue_max_sectors(q) << 9;
+
+	nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
+			 rep_bytes >> PAGE_SHIFT);
+	nr_pages = min_t(unsigned int, nr_pages,
+			 queue_max_segments(q));
+
+	bio = bio_alloc(gfp_mask, nr_pages);
+	if (!bio)
+		return -ENOMEM;
+
+	bio->bi_bdev = bdev;
+	bio->bi_iter.bi_sector = blk_zone_start(q, sector);
+	bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
+
+	for (i = 0; i < nr_pages; i++) {
+		page = alloc_page(gfp_mask);
+		if (!page) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
+			__free_page(page);
+			break;
+		}
+	}
+
+	if (i == 0)
+		ret = -ENOMEM;
+	else
+		ret = submit_bio_wait(bio);
+	if (ret)
+		goto out;
+
+	/*
+	 * Process the report result: skip the header and go through the
+	 * reported zones to fixup and fixup the zone information for
+	 * partitions. At the same time, return the zone information into
+	 * the zone array.
+	 */
+	n = 0;
+	nz = 0;
+	nr_rep = 0;
+	bio_for_each_segment_all(bv, bio, i) {
+
+		if (!bv->bv_page)
+			break;
+
+		addr = kmap_atomic(bv->bv_page);
+
+		/* Get header in the first page */
+		ofst = 0;
+		if (!nr_rep) {
+			hdr = (struct blk_zone_report_hdr *) addr;
+			nr_rep = hdr->nr_zones;
+			ofst = sizeof(struct blk_zone_report_hdr);
+		}
+
+		/* Fixup and report zones */
+		while (ofst < bv->bv_len &&
+		       n < nr_rep && nz < nrz) {
+			if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
+				nz++;
+			ofst += sizeof(struct blk_zone);
+			n++;
+		}
+
+		kunmap_atomic(addr);
+
+		if (n >= nr_rep || nz >= nrz)
+			break;
+
+	}
+
+out:
+	bio_for_each_segment_all(bv, bio, i)
+		__free_page(bv->bv_page);
+	bio_put(bio);
+
+	if (ret == 0)
+		*nr_zones = nz;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_report_zones);
+
+/**
+ * blkdev_reset_zones - Reset zones write pointer
+ * @bdev:	Target block device
+ * @sector:	Start sector of the first zone to reset
+ * @nr_sectors:	Number of sectors, at least the length of one zone
+ * @gfp_mask:	Memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ *    Reset the write pointer of the zones contained in the range
+ *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
+ *    is valid, but the specified range should not contain conventional zones.
+ */
+int blkdev_reset_zones(struct block_device *bdev,
+		       sector_t sector, sector_t nr_sectors,
+		       gfp_t gfp_mask)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+	sector_t zone_sectors;
+	sector_t end_sector = sector + nr_sectors;
+	struct bio *bio;
+	int ret;
+
+	if (!q)
+		return -ENXIO;
+
+	if (!blk_queue_is_zoned(q))
+		return -EOPNOTSUPP;
+
+	if (end_sector > bdev->bd_part->nr_sects)
+		/* Out of range */
+		return -EINVAL;
+
+	/* Check alignment (handle eventual smaller last zone) */
+	zone_sectors = blk_queue_zone_size(q);
+	if (sector & (zone_sectors - 1))
+		return -EINVAL;
+
+	if ((nr_sectors & (zone_sectors - 1)) &&
+	    end_sector != bdev->bd_part->nr_sects)
+		return -EINVAL;
+
+	while (sector < end_sector) {
+
+		bio = bio_alloc(gfp_mask, 0);
+		bio->bi_iter.bi_sector = sector;
+		bio->bi_bdev = bdev;
+		bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
+
+		ret = submit_bio_wait(bio);
+		bio_put(bio);
+
+		if (ret)
+			return ret;
+
+		sector += zone_sectors;
+
+		/* This may take a while, so be nice to others */
+		cond_resched();
+
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blkdev_reset_zones);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f19e16bb43d1..252043f7cd2c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
+#include <linux/blkzoned.h>
 
 struct module;
 struct scsi_ioctl_command;
@@ -302,6 +303,21 @@ struct queue_limits {
 	enum blk_zoned_model	zoned;
 };
 
+#ifdef CONFIG_BLK_DEV_ZONED
+
+struct blk_zone_report_hdr {
+	unsigned int	nr_zones;
+	u8		padding[60];
+};
+
+extern int blkdev_report_zones(struct block_device *bdev,
+			       sector_t sector, struct blk_zone *zones,
+			       unsigned int *nr_zones, gfp_t gfp_mask);
+extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
+			      sector_t nr_sectors, gfp_t gfp_mask);
+
+#endif /* CONFIG_BLK_DEV_ZONED */
+
 struct request_queue {
 	/*
 	 * Together with queue_head for cacheline sharing
@@ -654,6 +670,11 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
 	}
 }
 
+static inline unsigned int blk_queue_zone_size(struct request_queue *q)
+{
+	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
+}
+
 /*
  * We regard a request as sync, if either a read or a sync write
  */
@@ -1401,6 +1422,16 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
 	return false;
 }
 
+static inline unsigned int bdev_zone_size(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return blk_queue_zone_size(q);
+
+	return 0;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 6965d0909554..b2166f283da9 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -70,6 +70,7 @@ header-y += bfs_fs.h
 header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
+header-y += blkzoned.h
 header-y += bpf_common.h
 header-y += bpf_perf_event.h
 header-y += bpf.h
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
new file mode 100644
index 000000000000..a3817214b0e0
--- /dev/null
+++ b/include/uapi/linux/blkzoned.h
@@ -0,0 +1,103 @@
+/*
+ * Zoned block devices handling.
+ *
+ * Copyright (C) 2015 Seagate Technology PLC
+ *
+ * Written by: Shaun Tancheff <shaun.tancheff@seagate.com>
+ *
+ * Modified by: Damien Le Moal <damien.lemoal@hgst.com>
+ * Copyright (C) 2016 Western Digital
+ *
+ * This file is licensed under  the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#ifndef _UAPI_BLKZONED_H
+#define _UAPI_BLKZONED_H
+
+#include <linux/types.h>
+
+/**
+ * enum blk_zone_type - Types of zones allowed in a zoned device.
+ *
+ * @BLK_ZONE_TYPE_CONVENTIONAL: The zone has no write pointer and can be writen
+ *                              randomly. Zone reset has no effect on the zone.
+ * @BLK_ZONE_TYPE_SEQWRITE_REQ: The zone must be written sequentially
+ * @BLK_ZONE_TYPE_SEQWRITE_PREF: The zone can be written non-sequentially
+ *
+ * Any other value not defined is reserved and must be considered as invalid.
+ */
+enum blk_zone_type {
+	BLK_ZONE_TYPE_CONVENTIONAL	= 0x1,
+	BLK_ZONE_TYPE_SEQWRITE_REQ	= 0x2,
+	BLK_ZONE_TYPE_SEQWRITE_PREF	= 0x3,
+};
+
+/**
+ * enum blk_zone_cond - Condition [state] of a zone in a zoned device.
+ *
+ * @BLK_ZONE_COND_NOT_WP: The zone has no write pointer, it is conventional.
+ * @BLK_ZONE_COND_EMPTY: The zone is empty.
+ * @BLK_ZONE_COND_IMP_OPEN: The zone is open, but not explicitly opened.
+ * @BLK_ZONE_COND_EXP_OPEN: The zones was explicitly opened by an
+ *                          OPEN ZONE command.
+ * @BLK_ZONE_COND_CLOSED: The zone was [explicitly] closed after writing.
+ * @BLK_ZONE_COND_FULL: The zone is marked as full, possibly by a zone
+ *                      FINISH ZONE command.
+ * @BLK_ZONE_COND_READONLY: The zone is read-only.
+ * @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written).
+ *
+ * The Zone Condition state machine in the ZBC/ZAC standards maps the above
+ * deinitions as:
+ *   - ZC1: Empty         | BLK_ZONE_EMPTY
+ *   - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN
+ *   - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN
+ *   - ZC4: Closed        | BLK_ZONE_CLOSED
+ *   - ZC5: Full          | BLK_ZONE_FULL
+ *   - ZC6: Read Only     | BLK_ZONE_READONLY
+ *   - ZC7: Offline       | BLK_ZONE_OFFLINE
+ *
+ * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should
+ * be considered invalid.
+ */
+enum blk_zone_cond {
+	BLK_ZONE_COND_NOT_WP	= 0x0,
+	BLK_ZONE_COND_EMPTY	= 0x1,
+	BLK_ZONE_COND_IMP_OPEN	= 0x2,
+	BLK_ZONE_COND_EXP_OPEN	= 0x3,
+	BLK_ZONE_COND_CLOSED	= 0x4,
+	BLK_ZONE_COND_READONLY	= 0xD,
+	BLK_ZONE_COND_FULL	= 0xE,
+	BLK_ZONE_COND_OFFLINE	= 0xF,
+};
+
+/**
+ * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl.
+ *
+ * @start: Zone start in 512 B sector units
+ * @len: Zone length in 512 B sector units
+ * @wp: Zone write pointer location in 512 B sector units
+ * @type: see enum blk_zone_type for possible values
+ * @cond: see enum blk_zone_cond for possible values
+ * @non_seq: Flag indicating that the zone is using non-sequential resources
+ *           (for host-aware zoned block devices only).
+ * @reset: Flag indicating that a zone reset is recommended.
+ * @reserved: Padding to 64 B to match the ZBC/ZAC defined zone descriptor size.
+ *
+ * start, len and wp use the regular 512 B sector unit, regardless of the
+ * device logical block size. The overall structure size is 64 B to match the
+ * ZBC/ZAC defined zone descriptor and allow support for future additional
+ * zone information.
+ */
+struct blk_zone {
+	__u64	start;		/* Zone start sector */
+	__u64	len;		/* Zone length in number of sectors */
+	__u64	wp;		/* Zone write pointer position */
+	__u8	type;		/* Zone type */
+	__u8	cond;		/* Zone condition */
+	__u8	non_seq;	/* Non-sequential write resources active */
+	__u8	reset;		/* Reset write pointer recommended */
+	__u8	reserved[36];
+};
+
+#endif /* _UAPI_BLKZONED_H */
-- 
cgit v1.2.3


From 3ed05a987e0f63b21e634101e0b460d32f3581c3 Mon Sep 17 00:00:00 2001
From: Shaun Tancheff <shaun@tancheff.com>
Date: Tue, 18 Oct 2016 15:40:35 +0900
Subject: blk-zoned: implement ioctls

Adds the new BLKREPORTZONE and BLKRESETZONE ioctls for respectively
obtaining the zone configuration of a zoned block device and resetting
the write pointer of sequential zones of a zoned block device.

The BLKREPORTZONE ioctl maps directly to a single call of the function
blkdev_report_zones. The zone information result is passed as an array
of struct blk_zone identical to the structure used internally for
processing the REQ_OP_ZONE_REPORT operation.  The BLKRESETZONE ioctl
maps to a single call of the blkdev_reset_zones function.

Signed-off-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Damien Le Moal <damien.lemoal@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-zoned.c             | 93 +++++++++++++++++++++++++++++++++++++++++++
 block/ioctl.c                 |  4 ++
 include/linux/blkdev.h        | 21 ++++++++++
 include/uapi/linux/blkzoned.h | 40 +++++++++++++++++++
 include/uapi/linux/fs.h       |  4 ++
 5 files changed, 162 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 1603573f9605..667f95d86695 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -255,3 +255,96 @@ int blkdev_reset_zones(struct block_device *bdev,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blkdev_reset_zones);
+
+/**
+ * BLKREPORTZONE ioctl processing.
+ * Called from blkdev_ioctl.
+ */
+int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
+			      unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct request_queue *q;
+	struct blk_zone_report rep;
+	struct blk_zone *zones;
+	int ret;
+
+	if (!argp)
+		return -EINVAL;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	if (!blk_queue_is_zoned(q))
+		return -ENOTTY;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
+		return -EFAULT;
+
+	if (!rep.nr_zones)
+		return -EINVAL;
+
+	zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL);
+	if (!zones)
+		return -ENOMEM;
+
+	ret = blkdev_report_zones(bdev, rep.sector,
+				  zones, &rep.nr_zones,
+				  GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (rep.nr_zones) {
+		if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
+				 sizeof(struct blk_zone) * rep.nr_zones))
+			ret = -EFAULT;
+	}
+
+ out:
+	kfree(zones);
+
+	return ret;
+}
+
+/**
+ * BLKRESETZONE ioctl processing.
+ * Called from blkdev_ioctl.
+ */
+int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct request_queue *q;
+	struct blk_zone_range zrange;
+
+	if (!argp)
+		return -EINVAL;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	if (!blk_queue_is_zoned(q))
+		return -ENOTTY;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (!(mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
+		return -EFAULT;
+
+	return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
+				  GFP_KERNEL);
+}
diff --git a/block/ioctl.c b/block/ioctl.c
index 755119c3c1b9..f856963204f4 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -519,6 +519,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 				BLKDEV_DISCARD_SECURE);
 	case BLKZEROOUT:
 		return blk_ioctl_zeroout(bdev, mode, arg);
+	case BLKREPORTZONE:
+		return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
+	case BLKRESETZONE:
+		return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
 	case HDIO_GETGEO:
 		return blkdev_getgeo(bdev, argp);
 	case BLKRAGET:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 252043f7cd2c..90097dd8b8ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -316,6 +316,27 @@ extern int blkdev_report_zones(struct block_device *bdev,
 extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
 			      sector_t nr_sectors, gfp_t gfp_mask);
 
+extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
+				     unsigned int cmd, unsigned long arg);
+extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
+				    unsigned int cmd, unsigned long arg);
+
+#else /* CONFIG_BLK_DEV_ZONED */
+
+static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
+					    fmode_t mode, unsigned int cmd,
+					    unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
+					   fmode_t mode, unsigned int cmd,
+					   unsigned long arg)
+{
+	return -ENOTTY;
+}
+
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 struct request_queue {
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index a3817214b0e0..40d1d7bff537 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -16,6 +16,7 @@
 #define _UAPI_BLKZONED_H
 
 #include <linux/types.h>
+#include <linux/ioctl.h>
 
 /**
  * enum blk_zone_type - Types of zones allowed in a zoned device.
@@ -100,4 +101,43 @@ struct blk_zone {
 	__u8	reserved[36];
 };
 
+/**
+ * struct blk_zone_report - BLKREPORTZONE ioctl request/reply
+ *
+ * @sector: starting sector of report
+ * @nr_zones: IN maximum / OUT actual
+ * @reserved: padding to 16 byte alignment
+ * @zones: Space to hold @nr_zones @zones entries on reply.
+ *
+ * The array of at most @nr_zones must follow this structure in memory.
+ */
+struct blk_zone_report {
+	__u64		sector;
+	__u32		nr_zones;
+	__u8		reserved[4];
+	struct blk_zone zones[0];
+} __packed;
+
+/**
+ * struct blk_zone_range - BLKRESETZONE ioctl request
+ * @sector: starting sector of the first zone to issue reset write pointer
+ * @nr_sectors: Total number of sectors of 1 or more zones to reset
+ */
+struct blk_zone_range {
+	__u64		sector;
+	__u64		nr_sectors;
+};
+
+/**
+ * Zoned block device ioctl's:
+ *
+ * @BLKREPORTZONE: Get zone information. Takes a zone report as argument.
+ *                 The zone report will start from the zone containing the
+ *                 sector specified in the report request structure.
+ * @BLKRESETZONE: Reset the write pointer of the zones in the specified
+ *                sector range. The sector range must be zone aligned.
+ */
+#define BLKREPORTZONE	_IOWR(0x12, 130, struct blk_zone_report)
+#define BLKRESETZONE	_IOW(0x12, 131, struct blk_zone_range)
+
 #endif /* _UAPI_BLKZONED_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index acb2b6152ba0..c1d11df07b28 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -225,6 +225,10 @@ struct fsxattr {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
+/*
+ * A jump here: 130-131 are reserved for zoned block devices
+ * (see uapi/linux/blkzoned.h)
+ */
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
-- 
cgit v1.2.3


From 9a97434215819872b054c3d0c067e5e4fa768b0e Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Mon, 17 Oct 2016 21:45:29 +0200
Subject: ARM: pxa: enhance smc91x platform data

Instead of having the smc91x driver relying on machine_is_*() calls,
provide this data through platform data, ie. idp, mainstone and
stargate.

This way, the driver doesn't need anymore machine_is_*() calls, which
wouldn't work anymore with a device-tree build.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/mach-pxa/idp.c       | 1 +
 arch/arm/mach-pxa/mainstone.c | 1 +
 arch/arm/mach-pxa/stargate2.c | 1 +
 include/linux/smc91x.h        | 1 +
 4 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
index 66070acaa888..d1db32b1a2c6 100644
--- a/arch/arm/mach-pxa/idp.c
+++ b/arch/arm/mach-pxa/idp.c
@@ -85,6 +85,7 @@ static struct resource smc91x_resources[] = {
 static struct smc91x_platdata smc91x_platdata = {
 	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
 		 SMC91X_USE_DMA | SMC91X_NOWAIT,
+	.pxa_u16_align4 = true,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index 40964069a17c..a2d851a3a546 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -140,6 +140,7 @@ static struct resource smc91x_resources[] = {
 static struct smc91x_platdata mainstone_smc91x_info = {
 	.flags	= SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
 		  SMC91X_NOWAIT | SMC91X_USE_DMA,
+	.pxa_u16_align4 = true,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 702f4f14b708..7b6610e9dae4 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -673,6 +673,7 @@ static struct resource smc91x_resources[] = {
 static struct smc91x_platdata stargate2_smc91x_info = {
 	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT
 	| SMC91X_NOWAIT | SMC91X_USE_DMA,
+	.pxa_u16_align4 = true,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h
index e302c447e057..129bc674dcf5 100644
--- a/include/linux/smc91x.h
+++ b/include/linux/smc91x.h
@@ -39,6 +39,7 @@ struct smc91x_platdata {
 	unsigned long flags;
 	unsigned char leda;
 	unsigned char ledb;
+	bool pxa_u16_align4;	/* PXA buggy u16 writes on 4*n+2 addresses */
 };
 
 #endif /* __SMC91X_H__ */
-- 
cgit v1.2.3


From 0f57dc6ae1ff0c702450083176b657ba37c07363 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Mon, 17 Oct 2016 16:48:58 +0100
Subject: remoteproc: Keep local copy of firmware name

Storage of the firmware name was inconsistent, either storing a pointer
to a name stored with unknown ownership, or a variable length tacked
onto the end of the struct proc allocated in rproc_alloc.

In preparation for allowing the firmware of an already allocated struct
rproc to be changed, instead always keep a locally maintained copy of
the firmware name.

Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 31 ++++++++++++++++---------------
 include/linux/remoteproc.h           |  2 +-
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index c6bfb3496684..ccc2a73e94dd 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1273,6 +1273,7 @@ static void rproc_type_release(struct device *dev)
 	if (rproc->index >= 0)
 		ida_simple_remove(&rproc_dev_index, rproc->index);
 
+	kfree(rproc->firmware);
 	kfree(rproc);
 }
 
@@ -1310,31 +1311,31 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
 {
 	struct rproc *rproc;
 	char *p, *template = "rproc-%s-fw";
-	int name_len = 0;
+	int name_len;
 
 	if (!dev || !name || !ops)
 		return NULL;
 
-	if (!firmware)
+	if (!firmware) {
 		/*
-		 * Make room for default firmware name (minus %s plus '\0').
 		 * If the caller didn't pass in a firmware name then
-		 * construct a default name.  We're already glomming 'len'
-		 * bytes onto the end of the struct rproc allocation, so do
-		 * a few more for the default firmware name (but only if
-		 * the caller doesn't pass one).
+		 * construct a default name.
 		 */
 		name_len = strlen(name) + strlen(template) - 2 + 1;
-
-	rproc = kzalloc(sizeof(*rproc) + len + name_len, GFP_KERNEL);
-	if (!rproc)
-		return NULL;
-
-	if (!firmware) {
-		p = (char *)rproc + sizeof(struct rproc) + len;
+		p = kmalloc(name_len, GFP_KERNEL);
+		if (!p)
+			return NULL;
 		snprintf(p, name_len, template, name);
 	} else {
-		p = (char *)firmware;
+		p = kstrdup(firmware, GFP_KERNEL);
+		if (!p)
+			return NULL;
+	}
+
+	rproc = kzalloc(sizeof(struct rproc) + len, GFP_KERNEL);
+	if (!rproc) {
+		kfree(p);
+		return NULL;
 	}
 
 	rproc->firmware = p;
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 930023b7c825..940e4cf2ac48 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -415,7 +415,7 @@ struct rproc {
 	struct list_head node;
 	struct iommu_domain *domain;
 	const char *name;
-	const char *firmware;
+	char *firmware;
 	void *priv;
 	const struct rproc_ops *ops;
 	struct device dev;
-- 
cgit v1.2.3


From 57a09bf0a416700676e77102c28f9cfcb48267e0 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 18 Oct 2016 19:51:19 +0200
Subject: bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers

A BPF program is required to check the return register of a
map_elem_lookup() call before accessing memory. The verifier keeps
track of this by converting the type of the result register from
PTR_TO_MAP_VALUE_OR_NULL to PTR_TO_MAP_VALUE after a conditional
jump ensures safety. This check is currently exclusively performed
for the result register 0.

In the event the compiler reorders instructions, BPF_MOV64_REG
instructions may be moved before the conditional jump which causes
them to keep their type PTR_TO_MAP_VALUE_OR_NULL to which the
verifier objects when the register is accessed:

0: (b7) r1 = 10
1: (7b) *(u64 *)(r10 -8) = r1
2: (bf) r2 = r10
3: (07) r2 += -8
4: (18) r1 = 0x59c00000
6: (85) call 1
7: (bf) r4 = r0
8: (15) if r0 == 0x0 goto pc+1
 R0=map_value(ks=8,vs=8) R4=map_value_or_null(ks=8,vs=8) R10=fp
9: (7a) *(u64 *)(r4 +0) = 0
R4 invalid mem access 'map_value_or_null'

This commit extends the verifier to keep track of all identical
PTR_TO_MAP_VALUE_OR_NULL registers after a map_elem_lookup() by
assigning them an ID and then marking them all when the conditional
jump is observed.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf_verifier.h                |  2 +-
 kernel/bpf/verifier.c                       | 61 +++++++++++++++++-------
 tools/testing/selftests/bpf/test_verifier.c | 72 +++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7035b997aaa5..ac5b393ee6b2 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -23,13 +23,13 @@ struct bpf_reg_state {
 	 * result in a bad access.
 	 */
 	u64 min_value, max_value;
+	u32 id;
 	union {
 		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
 		s64 imm;
 
 		/* valid when type == PTR_TO_PACKET* */
 		struct {
-			u32 id;
 			u16 off;
 			u16 range;
 		};
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 99a7e5b388f2..846d7ceaf202 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -212,9 +212,10 @@ static void print_verifier_state(struct bpf_verifier_state *state)
 		else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
 			 t == PTR_TO_MAP_VALUE_OR_NULL ||
 			 t == PTR_TO_MAP_VALUE_ADJ)
-			verbose("(ks=%d,vs=%d)",
+			verbose("(ks=%d,vs=%d,id=%u)",
 				reg->map_ptr->key_size,
-				reg->map_ptr->value_size);
+				reg->map_ptr->value_size,
+				reg->id);
 		if (reg->min_value != BPF_REGISTER_MIN_RANGE)
 			verbose(",min_value=%llu",
 				(unsigned long long)reg->min_value);
@@ -447,6 +448,7 @@ static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
 {
 	BUG_ON(regno >= MAX_BPF_REG);
 	regs[regno].type = UNKNOWN_VALUE;
+	regs[regno].id = 0;
 	regs[regno].imm = 0;
 }
 
@@ -1252,6 +1254,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
 			return -EINVAL;
 		}
 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
+		regs[BPF_REG_0].id = ++env->id_gen;
 	} else {
 		verbose("unknown return type %d of func %d\n",
 			fn->ret_type, func_id);
@@ -1644,8 +1647,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 						insn->src_reg);
 					return -EACCES;
 				}
-				regs[insn->dst_reg].type = UNKNOWN_VALUE;
-				regs[insn->dst_reg].map_ptr = NULL;
+				mark_reg_unknown_value(regs, insn->dst_reg);
 			}
 		} else {
 			/* case: R = imm
@@ -1907,6 +1909,38 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 	check_reg_overflow(true_reg);
 }
 
+static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
+			 enum bpf_reg_type type)
+{
+	struct bpf_reg_state *reg = &regs[regno];
+
+	if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
+		reg->type = type;
+		if (type == UNKNOWN_VALUE)
+			mark_reg_unknown_value(regs, regno);
+	}
+}
+
+/* The logic is similar to find_good_pkt_pointers(), both could eventually
+ * be folded together at some point.
+ */
+static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
+			  enum bpf_reg_type type)
+{
+	struct bpf_reg_state *regs = state->regs;
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_map_reg(regs, i, regs[regno].id, type);
+
+	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
+		if (state->stack_slot_type[i] != STACK_SPILL)
+			continue;
+		mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE,
+			     regs[regno].id, type);
+	}
+}
+
 static int check_cond_jmp_op(struct bpf_verifier_env *env,
 			     struct bpf_insn *insn, int *insn_idx)
 {
@@ -1994,18 +2028,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	if (BPF_SRC(insn->code) == BPF_K &&
 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
 	    dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
-		if (opcode == BPF_JEQ) {
-			/* next fallthrough insn can access memory via
-			 * this register
-			 */
-			regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
-			/* branch targer cannot access it, since reg == 0 */
-			mark_reg_unknown_value(other_branch->regs,
-					       insn->dst_reg);
-		} else {
-			other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
-			mark_reg_unknown_value(regs, insn->dst_reg);
-		}
+		/* Mark all identical map registers in each branch as either
+		 * safe or unknown depending R == 0 or R != 0 conditional.
+		 */
+		mark_map_regs(this_branch, insn->dst_reg,
+			      opcode == BPF_JEQ ? PTR_TO_MAP_VALUE : UNKNOWN_VALUE);
+		mark_map_regs(other_branch, insn->dst_reg,
+			      opcode == BPF_JEQ ? UNKNOWN_VALUE : PTR_TO_MAP_VALUE);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
 		   dst_reg->type == PTR_TO_PACKET &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index ff5df121b2f6..0ef8eaf6cea7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2588,6 +2588,78 @@ static struct bpf_test tests[] = {
 		.result_unpriv = REJECT,
 		.result = REJECT,
 	},
+	{
+		"multiple registers share map_lookup_elem result",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"invalid memory access with multiple map_lookup_elem calls",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.result = REJECT,
+		.errstr = "R4 !read_ok",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"valid indirect map_lookup_elem access with 2nd lookup in branch",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_2, 10),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From 3ca45a46f8af8c4a92dd8a08eac57787242d5021 Mon Sep 17 00:00:00 2001
From: zijun_hu <zijun_hu@htc.com>
Date: Fri, 14 Oct 2016 15:12:54 +0800
Subject: percpu: ensure the requested alignment is power of two

The percpu allocator expectedly assumes that the requested alignment
is power of two but hasn't been veryfing the input.  If the specified
alignment isn't power of two, the allocator can malfunction.  Add the
sanity check.

The following is detailed analysis of the effects of alignments which
aren't power of two.

 The alignment must be a even at least since the LSB of a chunk->map
 element is used as free/in-use flag of a area; besides, the alignment
 must be a power of 2 too since ALIGN() doesn't work well for other
 alignment always but is adopted by pcpu_fit_in_area().  IOW, the
 current allocator only works well for a power of 2 aligned area
 allocation.

 See below opposite example for why an odd alignment doesn't work.
 Let's assume area [16, 36) is free but its previous one is in-use, we
 want to allocate a @size == 8 and @align == 7 area.  The larger area
 [16, 36) is split to three areas [16, 21), [21, 29), [29, 36)
 eventually.  However, due to the usage for a chunk->map element, the
 actual offset of the aim area [21, 29) is 21 but is recorded in
 relevant element as 20; moreover, the residual tail free area [29,
 36) is mistook as in-use and is lost silently

 Unlike macro roundup(), ALIGN(x, a) doesn't work if @a isn't a power
 of 2 for example, roundup(10, 6) == 12 but ALIGN(10, 6) == 10, and
 the latter result isn't desired obviously.

tj: Code style and patch description updates.

Signed-off-by: zijun_hu <zijun_hu@htc.com>
Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/kernel.h | 1 +
 mm/percpu.c            | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index bc6ed52a39b9..0dc0b21bd164 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -45,6 +45,7 @@
 
 #define REPEAT_BYTE(x)	((~0ul / 0xff) * (x))
 
+/* @a is a power of 2 value */
 #define ALIGN(x, a)		__ALIGN_KERNEL((x), (a))
 #define __ALIGN_MASK(x, mask)	__ALIGN_KERNEL_MASK((x), (mask))
 #define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
diff --git a/mm/percpu.c b/mm/percpu.c
index 255714302394..99d8abd4c1e7 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -886,7 +886,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
 
 	size = ALIGN(size, 2);
 
-	if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
+	if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE ||
+		     !is_power_of_2(align))) {
 		WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n",
 		     size, align);
 		return NULL;
-- 
cgit v1.2.3


From 5dc8b362a2374d007bc0db649b7ab6a79dd32bda Mon Sep 17 00:00:00 2001
From: Adam Manzanares <adam.manzanares@hgst.com>
Date: Mon, 17 Oct 2016 11:27:28 -0700
Subject: block: Add iocontext priority to request

Patch adds an association between iocontext ioprio and the ioprio of a
request. This is done to enable request based drivers the ability to
act on priority information stored in the request. An example being
ATA devices that support command priorities. If the ATA driver discovers
that the device supports command priorities and the request has valid
priority information indicating the request is high priority, then a high
priority command can be sent to the device. This should improve tail
latencies for high priority IO on any device that queues requests
internally and can make use of the priority information stored in the
request.

The ioprio of the request is set in blk_rq_set_prio which takes the
request and the ioc as arguments. If the ioc is valid in blk_rq_set_prio
then the iopriority of the request is set as the iopriority of the ioc.
In init_request_from_bio a check is made to see if the ioprio of the bio
is valid and if so then the request prio comes from the bio.

Signed-off-by: Adam Manzananares <adam.manzanares@wdc.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-core.c       |  4 +++-
 include/linux/blkdev.h | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 14d7c0740dc0..361b1b965d89 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1153,6 +1153,7 @@ static struct request *__get_request(struct request_list *rl, int op,
 
 	blk_rq_init(q, rq);
 	blk_rq_set_rl(rq, rl);
+	blk_rq_set_prio(rq, ioc);
 	req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED);
 
 	/* init elvpriv */
@@ -1656,7 +1657,8 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 
 	req->errors = 0;
 	req->__sector = bio->bi_iter.bi_sector;
-	req->ioprio = bio_prio(bio);
+	if (ioprio_valid(bio_prio(bio)))
+		req->ioprio = bio_prio(bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c47c358ba052..9a0ceaa1b7e6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -933,6 +933,20 @@ static inline unsigned int blk_rq_count_bios(struct request *rq)
 	return nr_bios;
 }
 
+/*
+ * blk_rq_set_prio - associate a request with prio from ioc
+ * @rq: request of interest
+ * @ioc: target iocontext
+ *
+ * Assocate request prio with ioc prio so request based drivers
+ * can leverage priority information.
+ */
+static inline void blk_rq_set_prio(struct request *rq, struct io_context *ioc)
+{
+	if (ioc)
+		rq->ioprio = ioc->ioprio;
+}
+
 /*
  * Request issue related functions.
  */
-- 
cgit v1.2.3


From 8e061784b51ec4a4efed0deaafb5bd9725bf5b06 Mon Sep 17 00:00:00 2001
From: Adam Manzanares <adam.manzanares@hgst.com>
Date: Mon, 17 Oct 2016 11:27:29 -0700
Subject: ata: Enabling ATA Command Priorities

This patch checks to see if an ATA device supports NCQ command priorities.
If so and the user has specified an iocontext that indicates
IO_PRIO_CLASS_RT then we build a tf with a high priority command.

This is done to improve the tail latency of commands that are high
priority by passing priority to the device.

tj: Removed trivial ata_ncq_prio_enabled() and open-coded the test.

Signed-off-by: Adam Manzanares <adam.manzanares@hgst.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c | 35 ++++++++++++++++++++++++++++++++++-
 drivers/ata/libata-scsi.c |  6 +++++-
 drivers/ata/libata.h      |  2 +-
 include/linux/ata.h       |  6 ++++++
 include/linux/libata.h    |  3 +++
 5 files changed, 49 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 223a770f78f3..8346faf63337 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -739,6 +739,7 @@ u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev)
  *	@n_block: Number of blocks
  *	@tf_flags: RW/FUA etc...
  *	@tag: tag
+ *	@class: IO priority class
  *
  *	LOCKING:
  *	None.
@@ -753,7 +754,7 @@ u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev)
  */
 int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 		    u64 block, u32 n_block, unsigned int tf_flags,
-		    unsigned int tag)
+		    unsigned int tag, int class)
 {
 	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
 	tf->flags |= tf_flags;
@@ -785,6 +786,12 @@ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 		tf->device = ATA_LBA;
 		if (tf->flags & ATA_TFLAG_FUA)
 			tf->device |= 1 << 7;
+
+		if (dev->flags & ATA_DFLAG_NCQ_PRIO) {
+			if (class == IOPRIO_CLASS_RT)
+				tf->hob_nsect |= ATA_PRIO_HIGH <<
+						 ATA_SHIFT_PRIO;
+		}
 	} else if (dev->flags & ATA_DFLAG_LBA) {
 		tf->flags |= ATA_TFLAG_LBA;
 
@@ -2156,6 +2163,30 @@ static void ata_dev_config_ncq_non_data(struct ata_device *dev)
 	}
 }
 
+static void ata_dev_config_ncq_prio(struct ata_device *dev)
+{
+	struct ata_port *ap = dev->link->ap;
+	unsigned int err_mask;
+
+	err_mask = ata_read_log_page(dev,
+				     ATA_LOG_SATA_ID_DEV_DATA,
+				     ATA_LOG_SATA_SETTINGS,
+				     ap->sector_buf,
+				     1);
+	if (err_mask) {
+		ata_dev_dbg(dev,
+			    "failed to get Identify Device data, Emask 0x%x\n",
+			    err_mask);
+		return;
+	}
+
+	if (ap->sector_buf[ATA_LOG_NCQ_PRIO_OFFSET] & BIT(3))
+		dev->flags |= ATA_DFLAG_NCQ_PRIO;
+	else
+		ata_dev_dbg(dev, "SATA page does not support priority\n");
+
+}
+
 static int ata_dev_config_ncq(struct ata_device *dev,
 			       char *desc, size_t desc_sz)
 {
@@ -2205,6 +2236,8 @@ static int ata_dev_config_ncq(struct ata_device *dev,
 			ata_dev_config_ncq_send_recv(dev);
 		if (ata_id_has_ncq_non_data(dev->id))
 			ata_dev_config_ncq_non_data(dev);
+		if (ata_id_has_ncq_prio(dev->id))
+			ata_dev_config_ncq_prio(dev);
 	}
 
 	return 0;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 9cceb4a875a5..2bccc3c7de48 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -50,6 +50,7 @@
 #include <linux/uaccess.h>
 #include <linux/suspend.h>
 #include <asm/unaligned.h>
+#include <linux/ioprio.h>
 
 #include "libata.h"
 #include "libata-transport.h"
@@ -1755,6 +1756,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
 {
 	struct scsi_cmnd *scmd = qc->scsicmd;
 	const u8 *cdb = scmd->cmnd;
+	struct request *rq = scmd->request;
+	int class = IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
 	unsigned int tf_flags = 0;
 	u64 block;
 	u32 n_block;
@@ -1821,7 +1824,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
 	qc->nbytes = n_block * scmd->device->sector_size;
 
 	rc = ata_build_rw_tf(&qc->tf, qc->dev, block, n_block, tf_flags,
-			     qc->tag);
+			     qc->tag, class);
+
 	if (likely(rc == 0))
 		return 0;
 
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 3b301a48007c..8f3a5596dd67 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -66,7 +66,7 @@ extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
 extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
 extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 			   u64 block, u32 n_block, unsigned int tf_flags,
-			   unsigned int tag);
+			   unsigned int tag, int class);
 extern u64 ata_tf_read_block(const struct ata_taskfile *tf,
 			     struct ata_device *dev);
 extern unsigned ata_exec_internal(struct ata_device *dev,
diff --git a/include/linux/ata.h b/include/linux/ata.h
index fdb180367ba1..af6859b3a93d 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -348,6 +348,7 @@ enum {
 	ATA_LOG_DEVSLP_DETO	  = 0x01,
 	ATA_LOG_DEVSLP_VALID	  = 0x07,
 	ATA_LOG_DEVSLP_VALID_MASK = 0x80,
+	ATA_LOG_NCQ_PRIO_OFFSET   = 0x09,
 
 	/* NCQ send and receive log */
 	ATA_LOG_NCQ_SEND_RECV_SUBCMDS_OFFSET	= 0x00,
@@ -940,6 +941,11 @@ static inline bool ata_id_has_ncq_non_data(const u16 *id)
 	return id[ATA_ID_SATA_CAPABILITY_2] & BIT(5);
 }
 
+static inline bool ata_id_has_ncq_prio(const u16 *id)
+{
+	return id[ATA_ID_SATA_CAPABILITY] & BIT(12);
+}
+
 static inline bool ata_id_has_trim(const u16 *id)
 {
 	if (ata_id_major_version(id) >= 7 &&
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 616eef4d81ea..90b69a6293a3 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -166,6 +166,7 @@ enum {
 	ATA_DFLAG_NO_UNLOAD	= (1 << 17), /* device doesn't support unload */
 	ATA_DFLAG_UNLOCK_HPA	= (1 << 18), /* unlock HPA */
 	ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */
+	ATA_DFLAG_NCQ_PRIO	= (1 << 20), /* device supports NCQ priority */
 	ATA_DFLAG_INIT_MASK	= (1 << 24) - 1,
 
 	ATA_DFLAG_DETACH	= (1 << 24),
@@ -342,7 +343,9 @@ enum {
 	ATA_SHIFT_PIO		= 0,
 	ATA_SHIFT_MWDMA		= ATA_SHIFT_PIO + ATA_NR_PIO_MODES,
 	ATA_SHIFT_UDMA		= ATA_SHIFT_MWDMA + ATA_NR_MWDMA_MODES,
+	ATA_SHIFT_PRIO		= 6,
 
+	ATA_PRIO_HIGH		= 2,
 	/* size of buffer to pad xfers ending on unaligned boundaries */
 	ATA_DMA_PAD_SZ		= 4,
 
-- 
cgit v1.2.3


From 84f95243b5439a20c33837075b88926bfa00c4ec Mon Sep 17 00:00:00 2001
From: Adam Manzanares <adam.manzanares@hgst.com>
Date: Mon, 17 Oct 2016 11:27:30 -0700
Subject: ata: ATA Command Priority Disabled By Default

Add a sysfs entry to turn on priority information being passed
to a ATA device. By default this feature is turned off.

This patch depends on ata: Enabling ATA Command Priorities

tj: Renamed ncq_prio_on to ncq_prio_enable and removed trivial
    ata_ncq_prio_on() and open-coded the test.

Signed-off-by: Adam Manzanares <adam.manzanares@hgst.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libahci.c     |  1 +
 drivers/ata/libata-core.c |  3 ++-
 drivers/ata/libata-scsi.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/libata.h    |  2 ++
 4 files changed, 73 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 0d028ead99e8..ee7db3119b18 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -140,6 +140,7 @@ EXPORT_SYMBOL_GPL(ahci_shost_attrs);
 struct device_attribute *ahci_sdev_attrs[] = {
 	&dev_attr_sw_activity,
 	&dev_attr_unload_heads,
+	&dev_attr_ncq_prio_enable,
 	NULL
 };
 EXPORT_SYMBOL_GPL(ahci_sdev_attrs);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8346faf63337..b294339159a4 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -787,7 +787,8 @@ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 		if (tf->flags & ATA_TFLAG_FUA)
 			tf->device |= 1 << 7;
 
-		if (dev->flags & ATA_DFLAG_NCQ_PRIO) {
+		if ((dev->flags & ATA_DFLAG_NCQ_PRIO) &&
+		    (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE)) {
 			if (class == IOPRIO_CLASS_RT)
 				tf->hob_nsect |= ATA_PRIO_HIGH <<
 						 ATA_SHIFT_PRIO;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 2bccc3c7de48..87597a3f6149 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -271,6 +271,73 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR,
 	    ata_scsi_park_show, ata_scsi_park_store);
 EXPORT_SYMBOL_GPL(dev_attr_unload_heads);
 
+static ssize_t ata_ncq_prio_enable_show(struct device *device,
+					struct device_attribute *attr, char *buf)
+{
+	struct scsi_device *sdev = to_scsi_device(device);
+	struct ata_port *ap;
+	struct ata_device *dev;
+	bool ncq_prio_enable;
+	int rc = 0;
+
+	ap = ata_shost_to_port(sdev->host);
+
+	spin_lock_irq(ap->lock);
+	dev = ata_scsi_find_dev(ap, sdev);
+	if (!dev) {
+		rc = -ENODEV;
+		goto unlock;
+	}
+
+	ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE;
+
+unlock:
+	spin_unlock_irq(ap->lock);
+
+	return rc ? rc : snprintf(buf, 20, "%u\n", ncq_prio_enable);
+}
+
+static ssize_t ata_ncq_prio_enable_store(struct device *device,
+					 struct device_attribute *attr,
+					 const char *buf, size_t len)
+{
+	struct scsi_device *sdev = to_scsi_device(device);
+	struct ata_port *ap;
+	struct ata_device *dev;
+	long int input;
+	unsigned long flags;
+	int rc;
+
+	rc = kstrtol(buf, 10, &input);
+	if (rc)
+		return rc;
+	if ((input < 0) || (input > 1))
+		return -EINVAL;
+
+	ap = ata_shost_to_port(sdev->host);
+
+	spin_lock_irqsave(ap->lock, flags);
+	dev = ata_scsi_find_dev(ap, sdev);
+	if (unlikely(!dev)) {
+		rc = -ENODEV;
+		goto unlock;
+	}
+
+	if (input)
+		dev->flags |= ATA_DFLAG_NCQ_PRIO_ENABLE;
+	else
+		dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
+
+unlock:
+	spin_unlock_irqrestore(ap->lock, flags);
+
+	return rc ? rc : len;
+}
+
+DEVICE_ATTR(ncq_prio_enable, S_IRUGO | S_IWUSR,
+	    ata_ncq_prio_enable_show, ata_ncq_prio_enable_store);
+EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable);
+
 void ata_scsi_set_sense(struct ata_device *dev, struct scsi_cmnd *cmd,
 			u8 sk, u8 asc, u8 ascq)
 {
@@ -402,6 +469,7 @@ EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
 
 struct device_attribute *ata_common_sdev_attrs[] = {
 	&dev_attr_unload_heads,
+	&dev_attr_ncq_prio_enable,
 	NULL
 };
 EXPORT_SYMBOL_GPL(ata_common_sdev_attrs);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 90b69a6293a3..c170be548b7f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -167,6 +167,7 @@ enum {
 	ATA_DFLAG_UNLOCK_HPA	= (1 << 18), /* unlock HPA */
 	ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */
 	ATA_DFLAG_NCQ_PRIO	= (1 << 20), /* device supports NCQ priority */
+	ATA_DFLAG_NCQ_PRIO_ENABLE = (1 << 21), /* Priority cmds sent to dev */
 	ATA_DFLAG_INIT_MASK	= (1 << 24) - 1,
 
 	ATA_DFLAG_DETACH	= (1 << 24),
@@ -545,6 +546,7 @@ typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes)
 
 extern struct device_attribute dev_attr_link_power_management_policy;
 extern struct device_attribute dev_attr_unload_heads;
+extern struct device_attribute dev_attr_ncq_prio_enable;
 extern struct device_attribute dev_attr_em_message_type;
 extern struct device_attribute dev_attr_em_message;
 extern struct device_attribute dev_attr_sw_activity;
-- 
cgit v1.2.3


From 3c3fcb45d524feb5d14a14f332e3eec7f2aff8f3 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Wed, 19 Oct 2016 15:10:59 +0100
Subject: sched/fair: Kill the unused 'sched_shares_window_ns' tunable

The last user of this tunable was removed in 2012 in commit:

  82958366cfea ("sched: Replace update_shares weight distribution with per-entity computation")

Delete it since its very existence confuses people.

Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20161019141059.26408-1-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/sysctl.h | 1 -
 kernel/sched/fair.c          | 7 -------
 kernel/sysctl.c              | 7 -------
 3 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 22db1e63707e..441145351301 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -36,7 +36,6 @@ extern unsigned int sysctl_numa_balancing_scan_size;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
 extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_sched_shares_window;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *length,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d941c97dfbc3..79d464a04417 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -93,13 +93,6 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
 
 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
 
-/*
- * The exponential sliding  window over which load is averaged for shares
- * distribution.
- * (default: 10msec)
- */
-unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
-
 #ifdef CONFIG_CFS_BANDWIDTH
 /*
  * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 706309f9ed84..739fb17371af 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -347,13 +347,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "sched_shares_window_ns",
-		.data		= &sysctl_sched_shares_window,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 #ifdef CONFIG_SCHEDSTATS
 	{
 		.procname	= "sched_schedstats",
-- 
cgit v1.2.3


From 50066a042da5457ae5b6397425f0a7ca556231e3 Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Wed, 19 Oct 2016 18:03:42 -0700
Subject: HID: wacom: generic: Add support for height, tilt, and twist usages

The HID standard defines usages that allow digitizers to report the pen's
height, tilt, and rotation and which are used by Wacom's new "MobileStudio
Pro" devices.

Note that 'hidinput_calc_abs_res' expects ABS_Z (historically used by our
driver to report twist) to have linear units. To ensure it calculates a
resolution with the actually-angular units provided in the HID descriptor
we nedd to lie and tell it we're calculating it for the (rotational) ABS_RZ
axis instead.

Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 28 ++++++++++++++++++++++++++--
 include/linux/hid.h     |  3 +++
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 8071c18bf9c2..3f4ba53192c0 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1440,6 +1440,11 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage,
 {
 	int fmin = field->logical_minimum;
 	int fmax = field->logical_maximum;
+	int resolution_code = code;
+
+	if (usage->hid == HID_DG_TWIST) {
+		resolution_code = ABS_RZ;
+	}
 
 	usage->type = type;
 	usage->code = code;
@@ -1450,7 +1455,7 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage,
 	case EV_ABS:
 		input_set_abs_params(input, code, fmin, fmax, fuzz, 0);
 		input_abs_set_res(input, code,
-				  hidinput_calc_abs_res(field, code));
+				  hidinput_calc_abs_res(field, resolution_code));
 		break;
 	case EV_KEY:
 		input_set_capability(input, EV_KEY, code);
@@ -1475,6 +1480,9 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
 	case HID_GD_Y:
 		wacom_map_usage(input, usage, field, EV_ABS, ABS_Y, 4);
 		break;
+	case HID_GD_Z:
+		wacom_map_usage(input, usage, field, EV_ABS, ABS_DISTANCE, 0);
+		break;
 	case HID_DG_TIPPRESSURE:
 		wacom_map_usage(input, usage, field, EV_ABS, ABS_PRESSURE, 0);
 		break;
@@ -1485,6 +1493,15 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
 		wacom_map_usage(input, usage, field, EV_KEY,
 				BTN_TOOL_RUBBER, 0);
 		break;
+	case HID_DG_TILT_X:
+		wacom_map_usage(input, usage, field, EV_ABS, ABS_TILT_X, 0);
+		break;
+	case HID_DG_TILT_Y:
+		wacom_map_usage(input, usage, field, EV_ABS, ABS_TILT_Y, 0);
+		break;
+	case HID_DG_TWIST:
+		wacom_map_usage(input, usage, field, EV_ABS, ABS_Z, 0);
+		break;
 	case HID_DG_ERASER:
 	case HID_DG_TIPSWITCH:
 		wacom_map_usage(input, usage, field, EV_KEY, BTN_TOUCH, 0);
@@ -1508,8 +1525,15 @@ static int wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field,
 	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
 	struct input_dev *input = wacom_wac->pen_input;
 
-	/* checking which Tool / tip switch to send */
 	switch (usage->hid) {
+	case HID_GD_Z:
+		/*
+		 * HID_GD_Z "should increase as the control's position is
+		 * moved from high to low", while ABS_DISTANCE instead
+		 * increases in value as the tool moves from low to high.
+		 */
+		value = field->logical_maximum - value;
+		break;
 	case HID_DG_INRANGE:
 		wacom_wac->hid_data.inrange_state = value;
 		return 0;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index b2ec82712baa..e712101a1670 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -232,6 +232,9 @@ struct hid_item {
 #define HID_DG_TABLETFUNCTIONKEY	0x000d0039
 #define HID_DG_PROGRAMCHANGEKEY	0x000d003a
 #define HID_DG_INVERT		0x000d003c
+#define HID_DG_TILT_X		0x000d003d
+#define HID_DG_TILT_Y		0x000d003e
+#define HID_DG_TWIST		0x000d0041
 #define HID_DG_TIPSWITCH	0x000d0042
 #define HID_DG_TIPSWITCH2	0x000d0043
 #define HID_DG_BARRELSWITCH	0x000d0044
-- 
cgit v1.2.3


From 93aab7fa4f8091d8fe2aed7e79a650fc1c084512 Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Wed, 19 Oct 2016 18:03:52 -0700
Subject: HID: wacom: generic: Add support for battery status on pen and pad
 interfaces

Adds support for usages that may appear on the pen or pad interface which
report the state of the tablet battery.

Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 drivers/hid/wacom_wac.h |  6 ++++++
 include/linux/hid.h     |  1 +
 3 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 70de1fa930cc..f3edecf52c06 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1525,6 +1525,10 @@ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev,
 	unsigned equivalent_usage = wacom_equivalent_usage(usage->hid);
 
 	switch (equivalent_usage) {
+	case WACOM_HID_WD_BATTERY_LEVEL:
+	case WACOM_HID_WD_BATTERY_CHARGING:
+		features->quirks |= WACOM_QUIRK_BATTERY;
+		break;
 	case WACOM_HID_WD_ACCELEROMETER_X:
 		__set_bit(INPUT_PROP_ACCELEROMETER, input->propbit);
 		wacom_map_usage(input, usage, field, EV_ABS, ABS_X, 0);
@@ -1574,8 +1578,25 @@ static int wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field,
 		wacom_wac->hid_data.inrange_state |= value;
 	}
 
-	if (equivalent_usage != WACOM_HID_WD_TOUCHRINGSTATUS)
+	switch (equivalent_usage) {
+	case WACOM_HID_WD_BATTERY_LEVEL:
+		wacom_wac->hid_data.battery_capacity = value;
+		wacom_wac->hid_data.bat_connected = 1;
+		return 0;
+
+	case WACOM_HID_WD_BATTERY_CHARGING:
+		wacom_wac->hid_data.bat_charging = value;
+		wacom_wac->hid_data.ps_connected = value;
+		wacom_wac->hid_data.bat_connected = 1;
+		return 0;
+
+	case WACOM_HID_WD_TOUCHRINGSTATUS:
+		return 0;
+
+	default:
 		input_event(input, usage->type, usage->code, value);
+		break;
+	}
 
 	return 0;
 }
@@ -1594,6 +1615,7 @@ static void wacom_wac_pad_report(struct hid_device *hdev,
 {
 	struct wacom *wacom = hid_get_drvdata(hdev);
 	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct wacom_features *features = &wacom_wac->features;
 	struct input_dev *input = wacom_wac->pad_input;
 	bool active = wacom_wac->hid_data.inrange_state != 0;
 
@@ -1604,6 +1626,16 @@ static void wacom_wac_pad_report(struct hid_device *hdev,
 	if (wacom_equivalent_usage(report->field[0]->physical) == HID_DG_TABLETFUNCTIONKEY)
 		input_event(input, EV_ABS, ABS_MISC, active ? PAD_DEVICE_ID : 0);
 
+	if (features->quirks & WACOM_QUIRK_BATTERY) {
+		int capacity = wacom_wac->hid_data.battery_capacity;
+		bool charging = wacom_wac->hid_data.bat_charging;
+		bool connected = wacom_wac->hid_data.bat_connected;
+		bool powered = wacom_wac->hid_data.ps_connected;
+
+		wacom_notify_battery(wacom_wac, capacity, charging,
+				     connected, powered);
+	}
+
 	input_sync(input);
 }
 
@@ -1633,6 +1665,9 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
 	case HID_DG_INRANGE:
 		wacom_map_usage(input, usage, field, EV_KEY, BTN_TOOL_PEN, 0);
 		break;
+	case HID_DG_BATTERYSTRENGTH:
+		features->quirks |= WACOM_QUIRK_BATTERY;
+		break;
 	case HID_DG_INVERT:
 		wacom_map_usage(input, usage, field, EV_KEY,
 				BTN_TOOL_RUBBER, 0);
@@ -1703,6 +1738,10 @@ static int wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field,
 		if (!(features->quirks & WACOM_QUIRK_SENSE))
 			wacom_wac->hid_data.sense_state = value;
 		return 0;
+	case HID_DG_BATTERYSTRENGTH:
+		wacom_wac->hid_data.battery_capacity = value;
+		wacom_wac->hid_data.bat_connected = 1;
+		break;
 	case HID_DG_INVERT:
 		wacom_wac->hid_data.invert_state = value;
 		return 0;
diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index 1f7c4a86d91b..7418c9715d31 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h
@@ -101,6 +101,8 @@
 #define WACOM_HID_WD_ACCELEROMETER_X    (WACOM_HID_UP_WACOMDIGITIZER | 0x0401)
 #define WACOM_HID_WD_ACCELEROMETER_Y    (WACOM_HID_UP_WACOMDIGITIZER | 0x0402)
 #define WACOM_HID_WD_ACCELEROMETER_Z    (WACOM_HID_UP_WACOMDIGITIZER | 0x0403)
+#define WACOM_HID_WD_BATTERY_CHARGING   (WACOM_HID_UP_WACOMDIGITIZER | 0x0404)
+#define WACOM_HID_WD_BATTERY_LEVEL      (WACOM_HID_UP_WACOMDIGITIZER | 0x043b)
 #define WACOM_HID_WD_EXPRESSKEY00       (WACOM_HID_UP_WACOMDIGITIZER | 0x0910)
 #define WACOM_HID_WD_BUTTONHOME         (WACOM_HID_UP_WACOMDIGITIZER | 0x0990)
 #define WACOM_HID_WD_BUTTONUP           (WACOM_HID_UP_WACOMDIGITIZER | 0x0991)
@@ -257,6 +259,10 @@ struct hid_data {
 	int last_slot_field;
 	int num_expected;
 	int num_received;
+	int battery_capacity;
+	int bat_charging;
+	int bat_connected;
+	int ps_connected;
 };
 
 struct wacom_remote_data {
diff --git a/include/linux/hid.h b/include/linux/hid.h
index e712101a1670..3baa2f962e48 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -231,6 +231,7 @@ struct hid_item {
 #define HID_DG_TAP		0x000d0035
 #define HID_DG_TABLETFUNCTIONKEY	0x000d0039
 #define HID_DG_PROGRAMCHANGEKEY	0x000d003a
+#define HID_DG_BATTERYSTRENGTH	0x000d003b
 #define HID_DG_INVERT		0x000d003c
 #define HID_DG_TILT_X		0x000d003d
 #define HID_DG_TILT_Y		0x000d003e
-- 
cgit v1.2.3


From 8b6b4135e4fb2b537f33b811c13f77bee25ca8d3 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Thu, 20 Oct 2016 13:55:19 -0400
Subject: net: use core MTU range checking in WAN drivers

- set min/max_mtu in all hdlc drivers, remove hdlc_change_mtu
- sent max_mtu in lec driver, remove lec_change_mtu
- set min/max_mtu in x25_asy driver

CC: netdev@vger.kernel.org
CC: Krzysztof Halasa <khc@pm.waw.pl>
CC: Krzysztof Halasa <khalasa@piap.pl>
CC: Jan "Yenya" Kasprzak <kas@fi.muni.cz>
CC: Francois Romieu <romieu@fr.zoreil.com>
CC: Kevin Curtis <kevin.curtis@farsite.co.uk>
CC: Zhao Qiang <qiang.zhao@nxp.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/pcmcia/synclink_cs.c |  1 -
 drivers/net/wan/c101.c            |  1 -
 drivers/net/wan/cosa.c            |  1 -
 drivers/net/wan/dscc4.c           |  1 -
 drivers/net/wan/farsync.c         |  1 -
 drivers/net/wan/fsl_ucc_hdlc.c    |  1 -
 drivers/net/wan/hdlc.c            | 11 ++---------
 drivers/net/wan/hdlc_fr.c         |  3 ++-
 drivers/net/wan/hostess_sv11.c    |  1 -
 drivers/net/wan/ixp4xx_hss.c      |  1 -
 drivers/net/wan/lmc/lmc_main.c    |  1 -
 drivers/net/wan/n2.c              |  1 -
 drivers/net/wan/pc300too.c        |  1 -
 drivers/net/wan/pci200syn.c       |  1 -
 drivers/net/wan/sealevel.c        |  1 -
 drivers/net/wan/wanxl.c           |  1 -
 drivers/net/wan/x25_asy.c         |  5 ++---
 drivers/tty/synclink.c            |  1 -
 drivers/tty/synclink_gt.c         |  1 -
 drivers/tty/synclinkmp.c          |  1 -
 include/linux/hdlc.h              |  2 --
 net/atm/lec.c                     | 11 +----------
 22 files changed, 7 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index d28922df01d7..a7dd5f4f2c5a 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4248,7 +4248,6 @@ static void hdlcdev_rx(MGSLPC_INFO *info, char *buf, int size)
 static const struct net_device_ops hdlcdev_ops = {
 	.ndo_open       = hdlcdev_open,
 	.ndo_stop       = hdlcdev_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = hdlcdev_ioctl,
 	.ndo_tx_timeout = hdlcdev_tx_timeout,
diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index 09a50751763b..2371e078afbb 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -302,7 +302,6 @@ static void c101_destroy_card(card_t *card)
 static const struct net_device_ops c101_ops = {
 	.ndo_open       = c101_open,
 	.ndo_stop       = c101_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = c101_ioctl,
 };
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index b87fe0a01c69..087eb266601f 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -432,7 +432,6 @@ module_exit(cosa_exit);
 static const struct net_device_ops cosa_ops = {
 	.ndo_open       = cosa_net_open,
 	.ndo_stop       = cosa_net_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = cosa_net_ioctl,
 	.ndo_tx_timeout = cosa_net_timeout,
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 629225980463..7351e5440ed7 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -887,7 +887,6 @@ static inline int dscc4_set_quartz(struct dscc4_dev_priv *dpriv, int hz)
 static const struct net_device_ops dscc4_ops = {
 	.ndo_open       = dscc4_open,
 	.ndo_stop       = dscc4_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = dscc4_ioctl,
 	.ndo_tx_timeout = dscc4_tx_timeout,
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 3c9cbf908ec7..03696d35ee9c 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -2394,7 +2394,6 @@ fst_init_card(struct fst_card_info *card)
 static const struct net_device_ops fst_ops = {
 	.ndo_open       = fst_open,
 	.ndo_stop       = fst_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = fst_ioctl,
 	.ndo_tx_timeout = fst_tx_timeout,
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 65647533b401..e38ce4da3efb 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -992,7 +992,6 @@ static const struct dev_pm_ops uhdlc_pm_ops = {
 static const struct net_device_ops uhdlc_ops = {
 	.ndo_open       = uhdlc_open,
 	.ndo_stop       = uhdlc_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = uhdlc_ioctl,
 };
diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index 9bd4aa8083ce..7221a53b8b14 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -46,14 +46,6 @@ static const char* version = "HDLC support module revision 1.22";
 
 static struct hdlc_proto *first_proto;
 
-int hdlc_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < 68) || (new_mtu > HDLC_MAX_MTU))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev,
 		    struct packet_type *p, struct net_device *orig_dev)
 {
@@ -237,6 +229,8 @@ static void hdlc_setup_dev(struct net_device *dev)
 	dev->flags		 = IFF_POINTOPOINT | IFF_NOARP;
 	dev->priv_flags		 = IFF_WAN_HDLC;
 	dev->mtu		 = HDLC_MAX_MTU;
+	dev->min_mtu		 = 68;
+	dev->max_mtu		 = HDLC_MAX_MTU;
 	dev->type		 = ARPHRD_RAWHDLC;
 	dev->hard_header_len	 = 16;
 	dev->addr_len		 = 0;
@@ -353,7 +347,6 @@ MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
 MODULE_DESCRIPTION("HDLC support module");
 MODULE_LICENSE("GPL v2");
 
-EXPORT_SYMBOL(hdlc_change_mtu);
 EXPORT_SYMBOL(hdlc_start_xmit);
 EXPORT_SYMBOL(hdlc_open);
 EXPORT_SYMBOL(hdlc_close);
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index b6e0cfb095d3..eb915281197e 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1053,7 +1053,6 @@ static void pvc_setup(struct net_device *dev)
 static const struct net_device_ops pvc_ops = {
 	.ndo_open       = pvc_open,
 	.ndo_stop       = pvc_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = pvc_xmit,
 	.ndo_do_ioctl   = pvc_ioctl,
 };
@@ -1096,6 +1095,8 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 	}
 	dev->netdev_ops = &pvc_ops;
 	dev->mtu = HDLC_MAX_MTU;
+	dev->min_mtu = 68;
+	dev->max_mtu = HDLC_MAX_MTU;
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->ml_priv = pvc;
 
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 3d741663fd67..dd6bb3364ad2 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -180,7 +180,6 @@ static int hostess_attach(struct net_device *dev, unsigned short encoding,
 static const struct net_device_ops hostess_ops = {
 	.ndo_open       = hostess_open,
 	.ndo_stop       = hostess_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = hostess_ioctl,
 };
diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index e7bbdb7af53a..6a505c26a3e7 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -1321,7 +1321,6 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 static const struct net_device_ops hss_hdlc_ops = {
 	.ndo_open       = hss_hdlc_open,
 	.ndo_stop       = hss_hdlc_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = hss_hdlc_ioctl,
 };
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 299140c04556..001b7796740d 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -808,7 +808,6 @@ static int lmc_attach(struct net_device *dev, unsigned short encoding,
 static const struct net_device_ops lmc_ops = {
 	.ndo_open       = lmc_open,
 	.ndo_stop       = lmc_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = lmc_ioctl,
 	.ndo_tx_timeout = lmc_driver_timeout,
diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c
index 315bf09d6a20..c8f4517db3a0 100644
--- a/drivers/net/wan/n2.c
+++ b/drivers/net/wan/n2.c
@@ -330,7 +330,6 @@ static void n2_destroy_card(card_t *card)
 static const struct net_device_ops n2_ops = {
 	.ndo_open       = n2_open,
 	.ndo_stop       = n2_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = n2_ioctl,
 };
diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index db363856e0b5..e1dd1ec18d64 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -291,7 +291,6 @@ static void pc300_pci_remove_one(struct pci_dev *pdev)
 static const struct net_device_ops pc300_ops = {
 	.ndo_open       = pc300_open,
 	.ndo_stop       = pc300_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = pc300_ioctl,
 };
diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c
index e8455621390e..4e437c599e9a 100644
--- a/drivers/net/wan/pci200syn.c
+++ b/drivers/net/wan/pci200syn.c
@@ -270,7 +270,6 @@ static void pci200_pci_remove_one(struct pci_dev *pdev)
 static const struct net_device_ops pci200_ops = {
 	.ndo_open       = pci200_open,
 	.ndo_stop       = pci200_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = pci200_ioctl,
 };
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 27860b4f5908..fbb5aa2c4d8f 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -174,7 +174,6 @@ static int sealevel_attach(struct net_device *dev, unsigned short encoding,
 static const struct net_device_ops sealevel_ops = {
 	.ndo_open       = sealevel_open,
 	.ndo_stop       = sealevel_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = sealevel_ioctl,
 };
diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index a20d688d2595..0c7317520ed3 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -551,7 +551,6 @@ static void wanxl_pci_remove_one(struct pci_dev *pdev)
 static const struct net_device_ops wanxl_ops = {
 	.ndo_open       = wanxl_open,
 	.ndo_stop       = wanxl_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = wanxl_ioctl,
 	.ndo_get_stats  = wanxl_get_stats,
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 1bc5e93d2a34..878b05d06fc7 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -124,9 +124,6 @@ static int x25_asy_change_mtu(struct net_device *dev, int newmtu)
 	unsigned char *xbuff, *rbuff;
 	int len;
 
-	if (newmtu > 65534)
-		return -EINVAL;
-
 	len = 2 * newmtu;
 	xbuff = kmalloc(len + 4, GFP_ATOMIC);
 	rbuff = kmalloc(len + 4, GFP_ATOMIC);
@@ -751,6 +748,8 @@ static void x25_asy_setup(struct net_device *dev)
 	 */
 
 	dev->mtu		= SL_MTU;
+	dev->min_mtu		= 0;
+	dev->max_mtu		= 65534;
 	dev->netdev_ops		= &x25_asy_netdev_ops;
 	dev->watchdog_timeo	= HZ*20;
 	dev->hard_header_len	= 0;
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index c13e27ecb0b7..415885c56435 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -7973,7 +7973,6 @@ static void hdlcdev_rx(struct mgsl_struct *info, char *buf, int size)
 static const struct net_device_ops hdlcdev_ops = {
 	.ndo_open       = hdlcdev_open,
 	.ndo_stop       = hdlcdev_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = hdlcdev_ioctl,
 	.ndo_tx_timeout = hdlcdev_tx_timeout,
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 7aca2d4670e4..8267bcf2405e 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -1768,7 +1768,6 @@ static void hdlcdev_rx(struct slgt_info *info, char *buf, int size)
 static const struct net_device_ops hdlcdev_ops = {
 	.ndo_open       = hdlcdev_open,
 	.ndo_stop       = hdlcdev_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = hdlcdev_ioctl,
 	.ndo_tx_timeout = hdlcdev_tx_timeout,
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index dec156586de1..d66620f7eaa3 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -1887,7 +1887,6 @@ static void hdlcdev_rx(SLMP_INFO *info, char *buf, int size)
 static const struct net_device_ops hdlcdev_ops = {
 	.ndo_open       = hdlcdev_open,
 	.ndo_stop       = hdlcdev_close,
-	.ndo_change_mtu = hdlc_change_mtu,
 	.ndo_start_xmit = hdlc_start_xmit,
 	.ndo_do_ioctl   = hdlcdev_ioctl,
 	.ndo_tx_timeout = hdlcdev_tx_timeout,
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index e31bcd4c7859..97585d9679f3 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -93,8 +93,6 @@ static __inline__ void debug_frame(const struct sk_buff *skb)
 int hdlc_open(struct net_device *dev);
 /* Must be called by hardware driver when HDLC device is being closed */
 void hdlc_close(struct net_device *dev);
-/* May be used by hardware driver */
-int hdlc_change_mtu(struct net_device *dev, int new_mtu);
 /* Must be pointed to by hw driver's dev->netdev_ops->ndo_start_xmit */
 netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5d2693826afb..779b3fa6052d 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -544,15 +544,6 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
 	return 0;
 }
 
-/* shamelessly stolen from drivers/net/net_init.c */
-static int lec_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < 68) || (new_mtu > 18190))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 static void lec_set_multicast_list(struct net_device *dev)
 {
 	/*
@@ -565,7 +556,6 @@ static const struct net_device_ops lec_netdev_ops = {
 	.ndo_open		= lec_open,
 	.ndo_stop		= lec_close,
 	.ndo_start_xmit		= lec_start_xmit,
-	.ndo_change_mtu		= lec_change_mtu,
 	.ndo_tx_timeout		= lec_tx_timeout,
 	.ndo_set_rx_mode	= lec_set_multicast_list,
 };
@@ -742,6 +732,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
 		if (!dev_lec[i])
 			return -ENOMEM;
 		dev_lec[i]->netdev_ops = &lec_netdev_ops;
+		dev_lec[i]->max_mtu = 18190;
 		snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
 		if (register_netdev(dev_lec[i])) {
 			free_netdev(dev_lec[i]);
-- 
cgit v1.2.3


From b3e3893e1253692c3d2b8e8ebd5a26183ed30443 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Thu, 20 Oct 2016 13:55:22 -0400
Subject: net: use core MTU range checking in misc drivers

firewire-net:
- set min/max_mtu
- remove fwnet_change_mtu

nes:
- set max_mtu
- clean up nes_netdev_change_mtu

xpnet:
- set min/max_mtu
- remove xpnet_dev_change_mtu

hippi:
- set min/max_mtu
- remove hippi_change_mtu

batman-adv:
- set max_mtu
- remove batadv_interface_change_mtu
- initialization is a little async, not 100% certain that max_mtu is set
  in the optimal place, don't have hardware to test with

rionet:
- set min/max_mtu
- remove rionet_change_mtu

slip:
- set min/max_mtu
- streamline sl_change_mtu

um/net_kern:
- remove pointless ndo_change_mtu

hsi/clients/ssi_protocol:
- use core MTU range checking
- remove now redundant ssip_pn_set_mtu

ipoib:
- set a default max MTU value
- Note: ipoib's actual max MTU can vary, depending on if the device is in
  connected mode or not, so we'll just set the max_mtu value to the max
  possible, and let the ndo_change_mtu function continue to validate any new
  MTU change requests with checks for CM or not. Note that ipoib has no
  min_mtu set, and thus, the network core's mtu > 0 check is the only lower
  bounds here.

mptlan:
- use net core MTU range checking
- remove now redundant mpt_lan_change_mtu

fddi:
- min_mtu = 21, max_mtu = 4470
- remove now redundant fddi_change_mtu (including export)

fjes:
- min_mtu = 8192, max_mtu = 65536
- The max_mtu value is actually one over IP_MAX_MTU here, but the idea is to
  get past the core net MTU range checks so fjes_change_mtu can validate a
  new MTU against what it supports (see fjes_support_mtu in fjes_hw.c)

hsr:
- min_mtu = 0 (calls ether_setup, max_mtu is 1500)

f_phonet:
- min_mtu = 6, max_mtu = 65541

u_ether:
- min_mtu = 14, max_mtu = 15412

phonet/pep-gprs:
- min_mtu = 576, max_mtu = 65530
- remove redundant gprs_set_mtu

CC: netdev@vger.kernel.org
CC: linux-rdma@vger.kernel.org
CC: Stefan Richter <stefanr@s5r6.in-berlin.de>
CC: Faisal Latif <faisal.latif@intel.com>
CC: linux-rdma@vger.kernel.org
CC: Cliff Whickman <cpw@sgi.com>
CC: Robin Holt <robinmholt@gmail.com>
CC: Jes Sorensen <jes@trained-monkey.org>
CC: Marek Lindner <mareklindner@neomailbox.ch>
CC: Simon Wunderlich <sw@simonwunderlich.de>
CC: Antonio Quartulli <a@unstable.cc>
CC: Sathya Prakash <sathya.prakash@broadcom.com>
CC: Chaitra P B <chaitra.basappa@broadcom.com>
CC: Suganath Prabu Subramani <suganath-prabu.subramani@broadcom.com>
CC: MPT-FusionLinux.pdl@broadcom.com
CC: Sebastian Reichel <sre@kernel.org>
CC: Felipe Balbi <balbi@kernel.org>
CC: Arvid Brodin <arvid.brodin@alten.se>
CC: Remi Denis-Courmont <courmisch@gmail.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/um/drivers/net_kern.c                |  8 --------
 drivers/firewire/net.c                    | 18 ++++--------------
 drivers/hsi/clients/ssi_protocol.c        | 14 ++++----------
 drivers/infiniband/hw/nes/nes.c           |  1 -
 drivers/infiniband/hw/nes/nes.h           |  4 ++--
 drivers/infiniband/hw/nes/nes_nic.c       | 10 +++-------
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  1 +
 drivers/message/fusion/mptlan.c           | 15 ++++-----------
 drivers/misc/sgi-xp/xpnet.c               | 21 ++++-----------------
 drivers/net/fddi/skfp/skfddi.c            |  1 -
 drivers/net/fjes/fjes_main.c              |  2 ++
 drivers/net/hippi/rrunner.c               |  1 -
 drivers/net/rionet.c                      | 15 +++------------
 drivers/net/slip/slip.c                   | 11 +++++------
 drivers/usb/gadget/function/f_phonet.c    | 11 ++---------
 drivers/usb/gadget/function/u_ether.c     | 14 ++++----------
 include/linux/fddidevice.h                |  1 -
 include/linux/hippidevice.h               |  1 -
 net/802/fddi.c                            | 11 ++---------
 net/802/hippi.c                           | 14 ++------------
 net/batman-adv/soft-interface.c           | 13 +------------
 net/hsr/hsr_device.c                      |  1 +
 net/phonet/pep-gprs.c                     | 12 ++----------
 23 files changed, 46 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 2cd5b6874c7b..1669240c7a25 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -256,13 +256,6 @@ static void uml_net_tx_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int uml_net_change_mtu(struct net_device *dev, int new_mtu)
-{
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void uml_net_poll_controller(struct net_device *dev)
 {
@@ -374,7 +367,6 @@ static const struct net_device_ops uml_netdev_ops = {
 	.ndo_set_rx_mode	= uml_net_set_multicast_list,
 	.ndo_tx_timeout 	= uml_net_tx_timeout,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu 	= uml_net_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = uml_net_poll_controller,
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 309311b1faae..8430222151fc 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -1349,15 +1349,6 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	return NETDEV_TX_OK;
 }
 
-static int fwnet_change_mtu(struct net_device *net, int new_mtu)
-{
-	if (new_mtu < 68)
-		return -EINVAL;
-
-	net->mtu = new_mtu;
-	return 0;
-}
-
 static const struct ethtool_ops fwnet_ethtool_ops = {
 	.get_link	= ethtool_op_get_link,
 };
@@ -1366,7 +1357,6 @@ static const struct net_device_ops fwnet_netdev_ops = {
 	.ndo_open       = fwnet_open,
 	.ndo_stop	= fwnet_stop,
 	.ndo_start_xmit = fwnet_tx,
-	.ndo_change_mtu = fwnet_change_mtu,
 };
 
 static void fwnet_init_dev(struct net_device *net)
@@ -1435,7 +1425,6 @@ static int fwnet_probe(struct fw_unit *unit,
 	struct net_device *net;
 	bool allocated_netdev = false;
 	struct fwnet_device *dev;
-	unsigned max_mtu;
 	int ret;
 	union fwnet_hwaddr *ha;
 
@@ -1478,9 +1467,10 @@ static int fwnet_probe(struct fw_unit *unit,
 	 * Use the RFC 2734 default 1500 octets or the maximum payload
 	 * as initial MTU
 	 */
-	max_mtu = (1 << (card->max_receive + 1))
-		  - sizeof(struct rfc2734_header) - IEEE1394_GASP_HDR_SIZE;
-	net->mtu = min(1500U, max_mtu);
+	net->max_mtu = (1 << (card->max_receive + 1))
+		       - sizeof(struct rfc2734_header) - IEEE1394_GASP_HDR_SIZE;
+	net->mtu = min(1500U, net->max_mtu);
+	net->min_mtu = ETH_MIN_MTU;
 
 	/* Set our hardware address while we're at it */
 	ha = (union fwnet_hwaddr *)net->dev_addr;
diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
index 6031cd146556..7ef819680acd 100644
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -960,15 +960,6 @@ static int ssip_pn_stop(struct net_device *dev)
 	return 0;
 }
 
-static int ssip_pn_set_mtu(struct net_device *dev, int new_mtu)
-{
-	if (new_mtu > SSIP_MAX_MTU || new_mtu < PHONET_MIN_MTU)
-		return -EINVAL;
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 static void ssip_xmit_work(struct work_struct *work)
 {
 	struct ssi_protocol *ssi =
@@ -1060,7 +1051,6 @@ static const struct net_device_ops ssip_pn_ops = {
 	.ndo_open	= ssip_pn_open,
 	.ndo_stop	= ssip_pn_stop,
 	.ndo_start_xmit	= ssip_pn_xmit,
-	.ndo_change_mtu	= ssip_pn_set_mtu,
 };
 
 static void ssip_pn_setup(struct net_device *dev)
@@ -1136,6 +1126,10 @@ static int ssi_protocol_probe(struct device *dev)
 		goto out1;
 	}
 
+	/* MTU range: 6 - 65535 */
+	ssi->netdev->min_mtu = PHONET_MIN_MTU;
+	ssi->netdev->max_mtu = SSIP_MAX_MTU;
+
 	SET_NETDEV_DEV(ssi->netdev, dev);
 	netif_carrier_off(ssi->netdev);
 	err = register_netdev(ssi->netdev);
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 35cbb17bec12..2baa45a8e401 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -65,7 +65,6 @@ MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
-int max_mtu = 9000;
 int interrupt_mod_interval = 0;
 
 /* Interoperability */
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index e7430c9254d3..85acd0843b50 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -83,6 +83,8 @@
 #define NES_FIRST_QPN           64
 #define NES_SW_CONTEXT_ALIGN    1024
 
+#define NES_MAX_MTU		9000
+
 #define NES_NIC_MAX_NICS        16
 #define NES_MAX_ARP_TABLE_SIZE  4096
 
@@ -169,8 +171,6 @@ do { \
 #include "nes_cm.h"
 #include "nes_mgt.h"
 
-extern int max_mtu;
-#define max_frame_len (max_mtu+ETH_HLEN)
 extern int interrupt_mod_interval;
 extern int nes_if_count;
 extern int mpa_version;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 2b27d1351cf7..7f8597d6738b 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -981,20 +981,16 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct nes_vnic	*nesvnic = netdev_priv(netdev);
 	struct nes_device *nesdev = nesvnic->nesdev;
-	int ret = 0;
 	u8 jumbomode = 0;
 	u32 nic_active;
 	u32 nic_active_bit;
 	u32 uc_all_active;
 	u32 mc_all_active;
 
-	if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
-		return -EINVAL;
-
 	netdev->mtu = new_mtu;
 	nesvnic->max_frame_size	= new_mtu + VLAN_ETH_HLEN;
 
-	if (netdev->mtu	> 1500)	{
+	if (netdev->mtu	> ETH_DATA_LEN)	{
 		jumbomode=1;
 	}
 	nes_nic_init_timer_defaults(nesdev, jumbomode);
@@ -1020,7 +1016,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
 		nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
 	}
 
-	return ret;
+	return 0;
 }
 
 
@@ -1658,7 +1654,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
 
 	netdev->watchdog_timeo = NES_TX_TIMEOUT;
 	netdev->irq = nesdev->pcidev->irq;
-	netdev->mtu = ETH_DATA_LEN;
+	netdev->max_mtu = NES_MAX_MTU;
 	netdev->hard_header_len = ETH_HLEN;
 	netdev->addr_len = ETH_ALEN;
 	netdev->type = ARPHRD_ETHER;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cc059218c962..ae5d7cd100a5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2017,6 +2017,7 @@ static struct net_device *ipoib_add_port(const char *format,
 	/* MTU will be reset when mcast join happens */
 	priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);
 	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
+	priv->dev->max_mtu = IPOIB_CM_MTU;
 
 	priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
 
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index 6955c9e22d57..55dd71bbdc2a 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -548,16 +548,6 @@ mpt_lan_close(struct net_device *dev)
 	return 0;
 }
 
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-static int
-mpt_lan_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < MPT_LAN_MIN_MTU) || (new_mtu > MPT_LAN_MAX_MTU))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /* Tx timeout handler. */
 static void
@@ -1304,7 +1294,6 @@ static const struct net_device_ops mpt_netdev_ops = {
 	.ndo_open       = mpt_lan_open,
 	.ndo_stop       = mpt_lan_close,
 	.ndo_start_xmit = mpt_lan_sdu_send,
-	.ndo_change_mtu = mpt_lan_change_mtu,
 	.ndo_tx_timeout = mpt_lan_tx_timeout,
 };
 
@@ -1375,6 +1364,10 @@ mpt_register_lan_device (MPT_ADAPTER *mpt_dev, int pnum)
 	dev->netdev_ops = &mpt_netdev_ops;
 	dev->watchdog_timeo = MPT_LAN_TX_TIMEOUT;
 
+	/* MTU range: 96 - 65280 */
+	dev->min_mtu = MPT_LAN_MIN_MTU;
+	dev->max_mtu = MPT_LAN_MAX_MTU;
+
 	dlprintk((KERN_INFO MYNAM ": Finished registering dev "
 		"and setting initial values\n"));
 
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 557f9782c53c..0c26eaf5f62b 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -118,6 +118,8 @@ static DEFINE_SPINLOCK(xpnet_broadcast_lock);
  * now, the default is 64KB.
  */
 #define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
+/* 68 comes from min TCP+IP+MAC header */
+#define XPNET_MIN_MTU 68
 /* 32KB has been determined to be the ideal */
 #define XPNET_DEF_MTU (0x8000UL)
 
@@ -330,22 +332,6 @@ xpnet_dev_stop(struct net_device *dev)
 	return 0;
 }
 
-static int
-xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
-{
-	/* 68 comes from min TCP+IP+MAC header */
-	if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
-		dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
-			"between 68 and %ld\n", dev->name, new_mtu,
-			XPNET_MAX_MTU);
-		return -EINVAL;
-	}
-
-	dev->mtu = new_mtu;
-	dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
-	return 0;
-}
-
 /*
  * Notification that the other end has received the message and
  * DMA'd the skb information.  At this point, they are done with
@@ -519,7 +505,6 @@ static const struct net_device_ops xpnet_netdev_ops = {
 	.ndo_open		= xpnet_dev_open,
 	.ndo_stop		= xpnet_dev_stop,
 	.ndo_start_xmit		= xpnet_dev_hard_start_xmit,
-	.ndo_change_mtu		= xpnet_dev_change_mtu,
 	.ndo_tx_timeout		= xpnet_dev_tx_timeout,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -555,6 +540,8 @@ xpnet_init(void)
 
 	xpnet_device->netdev_ops = &xpnet_netdev_ops;
 	xpnet_device->mtu = XPNET_DEF_MTU;
+	xpnet_device->min_mtu = XPNET_MIN_MTU;
+	xpnet_device->max_mtu = XPNET_MAX_MTU;
 
 	/*
 	 * Multicast assumes the LSB of the first octet is set for multicast
diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c
index 51acc6d86e91..3a639180e4a0 100644
--- a/drivers/net/fddi/skfp/skfddi.c
+++ b/drivers/net/fddi/skfp/skfddi.c
@@ -166,7 +166,6 @@ static const struct net_device_ops skfp_netdev_ops = {
 	.ndo_stop		= skfp_close,
 	.ndo_start_xmit		= skfp_send_pkt,
 	.ndo_get_stats		= skfp_ctl_get_stats,
-	.ndo_change_mtu		= fddi_change_mtu,
 	.ndo_set_rx_mode	= skfp_ctl_set_multicast_list,
 	.ndo_set_mac_address	= skfp_ctl_set_mac_address,
 	.ndo_do_ioctl		= skfp_ioctl,
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index f36eb4ad40b7..b77e4ecf3cf2 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1316,6 +1316,8 @@ static void fjes_netdev_setup(struct net_device *netdev)
 	netdev->netdev_ops = &fjes_netdev_ops;
 	fjes_set_ethtool_ops(netdev);
 	netdev->mtu = fjes_support_mtu[3];
+	netdev->min_mtu = fjes_support_mtu[0];
+	netdev->max_mtu = fjes_support_mtu[3];
 	netdev->flags |= IFF_BROADCAST;
 	netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER;
 }
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index 95c0b45a68fb..f5a9728b89f3 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -68,7 +68,6 @@ static const struct net_device_ops rr_netdev_ops = {
 	.ndo_stop		= rr_close,
 	.ndo_do_ioctl		= rr_ioctl,
 	.ndo_start_xmit		= rr_start_xmit,
-	.ndo_change_mtu		= hippi_change_mtu,
 	.ndo_set_mac_address	= hippi_mac_addr,
 };
 
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index a31f4610b493..300bb1479b3a 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -466,17 +466,6 @@ static void rionet_set_msglevel(struct net_device *ndev, u32 value)
 	rnet->msg_enable = value;
 }
 
-static int rionet_change_mtu(struct net_device *ndev, int new_mtu)
-{
-	if ((new_mtu < 68) || (new_mtu > RIONET_MAX_MTU)) {
-		printk(KERN_ERR "%s: Invalid MTU size %d\n",
-		       ndev->name, new_mtu);
-		return -EINVAL;
-	}
-	ndev->mtu = new_mtu;
-	return 0;
-}
-
 static const struct ethtool_ops rionet_ethtool_ops = {
 	.get_drvinfo = rionet_get_drvinfo,
 	.get_msglevel = rionet_get_msglevel,
@@ -488,7 +477,6 @@ static const struct net_device_ops rionet_netdev_ops = {
 	.ndo_open		= rionet_open,
 	.ndo_stop		= rionet_close,
 	.ndo_start_xmit		= rionet_start_xmit,
-	.ndo_change_mtu		= rionet_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
@@ -525,6 +513,9 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 
 	ndev->netdev_ops = &rionet_netdev_ops;
 	ndev->mtu = RIONET_MAX_MTU;
+	/* MTU range: 68 - 4082 */
+	ndev->min_mtu = ETH_MIN_MTU;
+	ndev->max_mtu = RIONET_MAX_MTU;
 	ndev->features = NETIF_F_LLTX;
 	SET_NETDEV_DEV(ndev, &mport->dev);
 	ndev->ethtool_ops = &rionet_ethtool_ops;
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 9ed6d1c1ee45..7e933d8ff811 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -561,12 +561,7 @@ static int sl_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct slip *sl = netdev_priv(dev);
 
-	if (new_mtu < 68 || new_mtu > 65534)
-		return -EINVAL;
-
-	if (new_mtu != dev->mtu)
-		return sl_realloc_bufs(sl, new_mtu);
-	return 0;
+	return sl_realloc_bufs(sl, new_mtu);
 }
 
 /* Netdevice get statistics request */
@@ -663,6 +658,10 @@ static void sl_setup(struct net_device *dev)
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 10;
 
+	/* MTU range: 68 - 65534 */
+	dev->min_mtu = 68;
+	dev->max_mtu = 65534;
+
 	/* New-style flags. */
 	dev->flags		= IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST;
 }
diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c
index 0473d619d5bf..b4058f0000e4 100644
--- a/drivers/usb/gadget/function/f_phonet.c
+++ b/drivers/usb/gadget/function/f_phonet.c
@@ -261,19 +261,10 @@ out:
 	return NETDEV_TX_OK;
 }
 
-static int pn_net_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < PHONET_MIN_MTU) || (new_mtu > PHONET_MAX_MTU))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 static const struct net_device_ops pn_netdev_ops = {
 	.ndo_open	= pn_net_open,
 	.ndo_stop	= pn_net_close,
 	.ndo_start_xmit	= pn_net_xmit,
-	.ndo_change_mtu	= pn_net_mtu,
 };
 
 static void pn_net_setup(struct net_device *dev)
@@ -282,6 +273,8 @@ static void pn_net_setup(struct net_device *dev)
 	dev->type		= ARPHRD_PHONET;
 	dev->flags		= IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu		= PHONET_DEV_MTU;
+	dev->min_mtu		= PHONET_MIN_MTU;
+	dev->max_mtu		= PHONET_MAX_MTU;
 	dev->hard_header_len	= 1;
 	dev->dev_addr[0]	= PN_MEDIA_USB;
 	dev->addr_len		= 1;
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 9c8c9ed1dc9e..39a6df1e2ded 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -142,15 +142,6 @@ static inline int qlen(struct usb_gadget *gadget, unsigned qmult)
 
 /* NETWORK DRIVER HOOKUP (to the layer above this driver) */
 
-static int ueth_change_mtu(struct net_device *net, int new_mtu)
-{
-	if (new_mtu <= ETH_HLEN || new_mtu > GETHER_MAX_ETH_FRAME_LEN)
-		return -ERANGE;
-	net->mtu = new_mtu;
-
-	return 0;
-}
-
 static void eth_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *p)
 {
 	struct eth_dev *dev = netdev_priv(net);
@@ -736,7 +727,6 @@ static const struct net_device_ops eth_netdev_ops = {
 	.ndo_open		= eth_open,
 	.ndo_stop		= eth_stop,
 	.ndo_start_xmit		= eth_start_xmit,
-	.ndo_change_mtu		= ueth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
@@ -799,6 +789,10 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
 
 	net->ethtool_ops = &ops;
 
+	/* MTU range: 14 - 15412 */
+	net->min_mtu = ETH_HLEN;
+	net->max_mtu = GETHER_MAX_ETH_FRAME_LEN;
+
 	dev->gadget = g;
 	SET_NETDEV_DEV(net, &g->dev);
 	SET_NETDEV_DEVTYPE(net, &gadget_type);
diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h
index 9a79f0106da1..32c22cfb238b 100644
--- a/include/linux/fddidevice.h
+++ b/include/linux/fddidevice.h
@@ -26,7 +26,6 @@
 
 #ifdef __KERNEL__
 __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev);
-int fddi_change_mtu(struct net_device *dev, int new_mtu);
 struct net_device *alloc_fddidev(int sizeof_priv);
 #endif
 
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h
index 8ec23fb0b412..402f99e328d4 100644
--- a/include/linux/hippidevice.h
+++ b/include/linux/hippidevice.h
@@ -32,7 +32,6 @@ struct hippi_cb {
 };
 
 __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
-int hippi_change_mtu(struct net_device *dev, int new_mtu);
 int hippi_mac_addr(struct net_device *dev, void *p);
 int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p);
 struct net_device *alloc_hippi_dev(int sizeof_priv);
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 7d3a0af954e8..6356623fc238 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -141,15 +141,6 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 EXPORT_SYMBOL(fddi_type_trans);
 
-int fddi_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-EXPORT_SYMBOL(fddi_change_mtu);
-
 static const struct header_ops fddi_header_ops = {
 	.create		= fddi_header,
 };
@@ -161,6 +152,8 @@ static void fddi_setup(struct net_device *dev)
 	dev->type		= ARPHRD_FDDI;
 	dev->hard_header_len	= FDDI_K_SNAP_HLEN+3;	/* Assume 802.2 SNAP hdr len + 3 pad bytes */
 	dev->mtu		= FDDI_K_SNAP_DLEN;	/* Assume max payload of 802.2 SNAP frame */
+	dev->min_mtu		= FDDI_K_SNAP_HLEN;
+	dev->max_mtu		= FDDI_K_SNAP_DLEN;
 	dev->addr_len		= FDDI_K_ALEN;
 	dev->tx_queue_len	= 100;			/* Long queues on FDDI */
 	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
diff --git a/net/802/hippi.c b/net/802/hippi.c
index ade1a52cdcff..5e4427beab2b 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -116,18 +116,6 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 EXPORT_SYMBOL(hippi_type_trans);
 
-int hippi_change_mtu(struct net_device *dev, int new_mtu)
-{
-	/*
-	 * HIPPI's got these nice large MTUs.
-	 */
-	if ((new_mtu < 68) || (new_mtu > 65280))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-EXPORT_SYMBOL(hippi_change_mtu);
-
 /*
  * For HIPPI we will actually use the lower 4 bytes of the hardware
  * address as the I-FIELD rather than the actual hardware address.
@@ -174,6 +162,8 @@ static void hippi_setup(struct net_device *dev)
 	dev->type		= ARPHRD_HIPPI;
 	dev->hard_header_len 	= HIPPI_HLEN;
 	dev->mtu		= 65280;
+	dev->min_mtu		= 68;
+	dev->max_mtu		= 65280;
 	dev->addr_len		= HIPPI_ALEN;
 	dev->tx_queue_len	= 25 /* 5 */;
 	memset(dev->broadcast, 0xFF, HIPPI_ALEN);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 49e16b6e0ba3..112679d64be5 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -158,17 +158,6 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
 	return 0;
 }
 
-static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
-{
-	/* check ranges */
-	if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev)))
-		return -EINVAL;
-
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 /**
  * batadv_interface_set_rx_mode - set the rx mode of a device
  * @dev: registered network device to modify
@@ -920,7 +909,6 @@ static const struct net_device_ops batadv_netdev_ops = {
 	.ndo_vlan_rx_add_vid = batadv_interface_add_vid,
 	.ndo_vlan_rx_kill_vid = batadv_interface_kill_vid,
 	.ndo_set_mac_address = batadv_interface_set_mac_addr,
-	.ndo_change_mtu = batadv_interface_change_mtu,
 	.ndo_set_rx_mode = batadv_interface_set_rx_mode,
 	.ndo_start_xmit = batadv_interface_tx,
 	.ndo_validate_addr = eth_validate_addr,
@@ -987,6 +975,7 @@ struct net_device *batadv_softif_create(struct net *net, const char *name)
 	dev_net_set(soft_iface, net);
 
 	soft_iface->rtnl_link_ops = &batadv_link_ops;
+	soft_iface->max_mtu = batadv_hardif_min_mtu(soft_iface);
 
 	ret = register_netdevice(soft_iface);
 	if (ret < 0) {
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 16737cd8dae8..fc65b145f6e7 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -398,6 +398,7 @@ void hsr_dev_setup(struct net_device *dev)
 	random_ether_addr(dev->dev_addr);
 
 	ether_setup(dev);
+	dev->min_mtu = 0;
 	dev->header_ops = &hsr_header_ops;
 	dev->netdev_ops = &hsr_device_ops;
 	SET_NETDEV_DEVTYPE(dev, &hsr_type);
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index fa8237fdc57b..21c28b51be94 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -217,20 +217,10 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static int gprs_set_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11)))
-		return -EINVAL;
-
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 static const struct net_device_ops gprs_netdev_ops = {
 	.ndo_open	= gprs_open,
 	.ndo_stop	= gprs_close,
 	.ndo_start_xmit	= gprs_xmit,
-	.ndo_change_mtu	= gprs_set_mtu,
 };
 
 static void gprs_setup(struct net_device *dev)
@@ -239,6 +229,8 @@ static void gprs_setup(struct net_device *dev)
 	dev->type		= ARPHRD_PHONET_PIPE;
 	dev->flags		= IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu		= GPRS_DEFAULT_MTU;
+	dev->min_mtu		= 576;
+	dev->max_mtu		= (PHONET_MAX_MTU - 11);
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 10;
-- 
cgit v1.2.3


From e5f1b245870d59be0e6cc3b33edf5406a3b59648 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Wed, 5 Oct 2016 09:33:12 +0200
Subject: cpuidle: governors: Remove remaining old module code

The governor's code use try_module_get() and put_module() to refcount
the governor's module. But the governors are not compiled as module.

The refcount does not prevent to switch the governor or unload
a module as they aren't compiled as modules. The code is pointless,
so remove it.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governor.c         | 4 ----
 drivers/cpuidle/governors/ladder.c | 2 --
 drivers/cpuidle/governors/menu.c   | 2 --
 include/linux/cpuidle.h            | 2 --
 4 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index fb9f511cca23..4e78263e34a4 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -9,7 +9,6 @@
  */
 
 #include <linux/mutex.h>
-#include <linux/module.h>
 #include <linux/cpuidle.h>
 
 #include "cpuidle.h"
@@ -53,14 +52,11 @@ int cpuidle_switch_governor(struct cpuidle_governor *gov)
 	if (cpuidle_curr_governor) {
 		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
 			cpuidle_disable_device(dev);
-		module_put(cpuidle_curr_governor->owner);
 	}
 
 	cpuidle_curr_governor = gov;
 
 	if (gov) {
-		if (!try_module_get(cpuidle_curr_governor->owner))
-			return -EINVAL;
 		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
 			cpuidle_enable_device(dev);
 		cpuidle_install_idle_handler();
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 63bd5a403e22..fe8f08948fcb 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -15,7 +15,6 @@
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
 #include <linux/pm_qos.h>
-#include <linux/module.h>
 #include <linux/jiffies.h>
 #include <linux/tick.h>
 
@@ -177,7 +176,6 @@ static struct cpuidle_governor ladder_governor = {
 	.enable =	ladder_enable_device,
 	.select =	ladder_select_state,
 	.reflect =	ladder_reflect,
-	.owner =	THIS_MODULE,
 };
 
 /**
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 03d38c291de6..d9b5b9398a0f 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -19,7 +19,6 @@
 #include <linux/tick.h>
 #include <linux/sched.h>
 #include <linux/math64.h>
-#include <linux/module.h>
 
 /*
  * Please note when changing the tuning values:
@@ -484,7 +483,6 @@ static struct cpuidle_governor menu_governor = {
 	.enable =	menu_enable_device,
 	.select =	menu_select,
 	.reflect =	menu_reflect,
-	.owner =	THIS_MODULE,
 };
 
 /**
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index bb31373c3478..15deea449edc 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -235,8 +235,6 @@ struct cpuidle_governor {
 	int  (*select)		(struct cpuidle_driver *drv,
 					struct cpuidle_device *dev);
 	void (*reflect)		(struct cpuidle_device *dev, int index);
-
-	struct module 		*owner;
 };
 
 #ifdef CONFIG_CPU_IDLE
-- 
cgit v1.2.3


From 62006c1702b3b1be0c0726949e0ee0ea2326be9c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 17 Oct 2016 20:16:58 +0200
Subject: PM / Runtime: Remove the exported function pm_children_suspended()

The exported function pm_children_suspended() has only one caller, which is
the runtime PM internal function, rpm_check_suspend_allowed().

Let's clean-up this code, by removing pm_children_suspended() altogether
and instead do the one-liner check directly in rpm_check_suspend_allowed().

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 3 ++-
 include/linux/pm_runtime.h   | 7 -------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 82a081ea4317..53b427dfc403 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -241,7 +241,8 @@ static int rpm_check_suspend_allowed(struct device *dev)
 		retval = -EACCES;
 	else if (atomic_read(&dev->power.usage_count) > 0)
 		retval = -EAGAIN;
-	else if (!pm_children_suspended(dev))
+	else if (!dev->power.ignore_children &&
+			atomic_read(&dev->power.child_count))
 		retval = -EBUSY;
 
 	/* Pending resume requests take precedence over suspends. */
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 2e14d2667b6c..61ea5666c94c 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -61,12 +61,6 @@ static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
 	dev->power.ignore_children = enable;
 }
 
-static inline bool pm_children_suspended(struct device *dev)
-{
-	return dev->power.ignore_children
-		|| !atomic_read(&dev->power.child_count);
-}
-
 static inline void pm_runtime_get_noresume(struct device *dev)
 {
 	atomic_inc(&dev->power.usage_count);
@@ -162,7 +156,6 @@ static inline void pm_runtime_allow(struct device *dev) {}
 static inline void pm_runtime_forbid(struct device *dev) {}
 
 static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
-static inline bool pm_children_suspended(struct device *dev) { return false; }
 static inline void pm_runtime_get_noresume(struct device *dev) {}
 static inline void pm_runtime_put_noidle(struct device *dev) {}
 static inline bool device_run_wake(struct device *dev) { return false; }
-- 
cgit v1.2.3


From b1a60995a684f2b6052cda640b0704361ab40089 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 17 Oct 2016 20:17:00 +0200
Subject: PM / Runtime: Convert pm_runtime_set_suspended() to return an int

Because pm_runtime_set_suspended() invokes __pm_runtime_set_status(), which
can fail, pm_runtime_set_suspended() can also fail.

Instead of hiding a potential error, let's propagate it by converting
pm_runtime_set_suspended() from a void to return an int. In this way users
are able to check the error code and act accordingly.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_runtime.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 61ea5666c94c..4957fc185ea9 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -258,9 +258,9 @@ static inline int pm_runtime_set_active(struct device *dev)
 	return __pm_runtime_set_status(dev, RPM_ACTIVE);
 }
 
-static inline void pm_runtime_set_suspended(struct device *dev)
+static inline int pm_runtime_set_suspended(struct device *dev)
 {
-	__pm_runtime_set_status(dev, RPM_SUSPENDED);
+	return __pm_runtime_set_status(dev, RPM_SUSPENDED);
 }
 
 static inline void pm_runtime_disable(struct device *dev)
-- 
cgit v1.2.3


From 59d65b73a23cee48e6f3e44686f199d79b7ee854 Mon Sep 17 00:00:00 2001
From: Lina Iyer <lina.iyer@linaro.org>
Date: Fri, 14 Oct 2016 10:47:49 -0700
Subject: PM / Domains: Make genpd state allocation dynamic

Allow PM Domain states to be defined dynamically by the drivers. This
removes the limitation on the maximum number of states possible for a
domain.

Suggested-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/mach-imx/gpc.c     | 17 ++++++++++-------
 drivers/base/power/domain.c | 35 +++++++++++++++++++++++------------
 include/linux/pm_domain.h   |  5 ++---
 3 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 0df062d8b2c9..57a410bbb6a2 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -380,13 +380,6 @@ static struct pu_domain imx6q_pu_domain = {
 		.name = "PU",
 		.power_off = imx6q_pm_pu_power_off,
 		.power_on = imx6q_pm_pu_power_on,
-		.states = {
-			[0] = {
-				.power_off_latency_ns = 25000,
-				.power_on_latency_ns = 2000000,
-			},
-		},
-		.state_count = 1,
 	},
 };
 
@@ -430,6 +423,16 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg)
 	if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
 		return 0;
 
+	imx6q_pu_domain.base.states = devm_kzalloc(dev,
+					sizeof(*imx6q_pu_domain.base.states),
+					GFP_KERNEL);
+	if (!imx6q_pu_domain.base.states)
+		return -ENOMEM;
+
+	imx6q_pu_domain.base.states[0].power_off_latency_ns = 25000;
+	imx6q_pu_domain.base.states[0].power_on_latency_ns = 2000000;
+	imx6q_pu_domain.base.state_count = 1;
+
 	pm_genpd_init(&imx6q_pu_domain.base, NULL, false);
 	return of_genpd_add_provider_onecell(dev->of_node,
 					     &imx_gpc_onecell_data);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e023066e4215..37ab7f1ef178 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1282,6 +1282,21 @@ out:
 }
 EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
 
+static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
+{
+	struct genpd_power_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	genpd->states = state;
+	genpd->state_count = 1;
+	genpd->free = state;
+
+	return 0;
+}
+
 /**
  * pm_genpd_init - Initialize a generic I/O PM domain object.
  * @genpd: PM domain object to initialize.
@@ -1293,6 +1308,8 @@ EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
 int pm_genpd_init(struct generic_pm_domain *genpd,
 		  struct dev_power_governor *gov, bool is_off)
 {
+	int ret;
+
 	if (IS_ERR_OR_NULL(genpd))
 		return -EINVAL;
 
@@ -1325,19 +1342,12 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
 		genpd->dev_ops.start = pm_clk_resume;
 	}
 
-	if (genpd->state_idx >= GENPD_MAX_NUM_STATES) {
-		pr_warn("Initial state index out of bounds.\n");
-		genpd->state_idx = GENPD_MAX_NUM_STATES - 1;
-	}
-
-	if (genpd->state_count > GENPD_MAX_NUM_STATES) {
-		pr_warn("Limiting states to  %d\n", GENPD_MAX_NUM_STATES);
-		genpd->state_count = GENPD_MAX_NUM_STATES;
-	}
-
 	/* Use only one "off" state if there were no states declared */
-	if (genpd->state_count == 0)
-		genpd->state_count = 1;
+	if (genpd->state_count == 0) {
+		ret = genpd_set_default_power_state(genpd);
+		if (ret)
+			return ret;
+	}
 
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
@@ -1377,6 +1387,7 @@ static int genpd_remove(struct generic_pm_domain *genpd)
 	list_del(&genpd->gpd_list_node);
 	mutex_unlock(&genpd->lock);
 	cancel_work_sync(&genpd->power_off_work);
+	kfree(genpd->free);
 	pr_debug("%s: removed %s\n", __func__, genpd->name);
 
 	return 0;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index a09fe5c009c8..de1d8f331b03 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -19,8 +19,6 @@
 /* Defines used for the flags field in the struct generic_pm_domain */
 #define GENPD_FLAG_PM_CLK	(1U << 0) /* PM domain uses PM clk */
 
-#define GENPD_MAX_NUM_STATES	8 /* Number of possible low power states */
-
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
 	GPD_STATE_POWER_OFF,	/* PM domain is off */
@@ -70,9 +68,10 @@ struct generic_pm_domain {
 	void (*detach_dev)(struct generic_pm_domain *domain,
 			   struct device *dev);
 	unsigned int flags;		/* Bit field of configs for genpd */
-	struct genpd_power_state states[GENPD_MAX_NUM_STATES];
+	struct genpd_power_state *states;
 	unsigned int state_count; /* number of states */
 	unsigned int state_idx; /* state that genpd will go to when off */
+	void *free; /* Free the state that was allocated for default */
 
 };
 
-- 
cgit v1.2.3


From 405f7226014093a2809f27ba32a8230e770ac876 Mon Sep 17 00:00:00 2001
From: Lina Iyer <lina.iyer@linaro.org>
Date: Fri, 14 Oct 2016 10:47:50 -0700
Subject: PM / Domains: Add residency property to genpd states

Residency of a domain's idle state indicates that the minimum idle time
for the domain's idle state to be beneficial for power. Add the
parameter to the state node. Future patches, will use the residency
value in the genpd governor to determine if it is worth while to enter
an idle state.

Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_domain.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index de1d8f331b03..f4492eb71701 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -38,6 +38,7 @@ struct gpd_dev_ops {
 struct genpd_power_state {
 	s64 power_off_latency_ns;
 	s64 power_on_latency_ns;
+	s64 residency_ns;
 };
 
 struct generic_pm_domain {
-- 
cgit v1.2.3


From 30f604283e05d34cb10108c7ba017e5f4fc9d62c Mon Sep 17 00:00:00 2001
From: Lina Iyer <lina.iyer@linaro.org>
Date: Fri, 14 Oct 2016 10:47:51 -0700
Subject: PM / Domains: Allow domain power states to be read from DT

This patch allows domains to define idle states in the DT. SoC's can
define domain idle states in DT using the "domain-idle-states" property
of the domain provider. Add API to read the idle states from DT that can
be set in the genpd object.

This patch is based on the original patch by Marc Titinger.

Signed-off-by: Marc Titinger <mtitinger+renesas@baylibre.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 94 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_domain.h   |  8 ++++
 2 files changed, 102 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 37ab7f1ef178..9af75ba0472a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1916,6 +1916,100 @@ out:
 	return ret ? -EPROBE_DEFER : 0;
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
+
+static const struct of_device_id idle_state_match[] = {
+	{ .compatible = "arm,idle-state", },
+	{ }
+};
+
+static int genpd_parse_state(struct genpd_power_state *genpd_state,
+				    struct device_node *state_node)
+{
+	int err;
+	u32 residency;
+	u32 entry_latency, exit_latency;
+	const struct of_device_id *match_id;
+
+	match_id = of_match_node(idle_state_match, state_node);
+	if (!match_id)
+		return -EINVAL;
+
+	err = of_property_read_u32(state_node, "entry-latency-us",
+						&entry_latency);
+	if (err) {
+		pr_debug(" * %s missing entry-latency-us property\n",
+						state_node->full_name);
+		return -EINVAL;
+	}
+
+	err = of_property_read_u32(state_node, "exit-latency-us",
+						&exit_latency);
+	if (err) {
+		pr_debug(" * %s missing exit-latency-us property\n",
+						state_node->full_name);
+		return -EINVAL;
+	}
+
+	err = of_property_read_u32(state_node, "min-residency-us", &residency);
+	if (!err)
+		genpd_state->residency_ns = 1000 * residency;
+
+	genpd_state->power_on_latency_ns = 1000 * exit_latency;
+	genpd_state->power_off_latency_ns = 1000 * entry_latency;
+
+	return 0;
+}
+
+/**
+ * of_genpd_parse_idle_states: Return array of idle states for the genpd.
+ *
+ * @dn: The genpd device node
+ * @states: The pointer to which the state array will be saved.
+ * @n: The count of elements in the array returned from this function.
+ *
+ * Returns the device states parsed from the OF node. The memory for the states
+ * is allocated by this function and is the responsibility of the caller to
+ * free the memory after use.
+ */
+int of_genpd_parse_idle_states(struct device_node *dn,
+			struct genpd_power_state **states, int *n)
+{
+	struct genpd_power_state *st;
+	struct device_node *np;
+	int i = 0;
+	int err, ret;
+	int count;
+	struct of_phandle_iterator it;
+
+	count = of_count_phandle_with_args(dn, "domain-idle-states", NULL);
+	if (!count)
+		return -EINVAL;
+
+	st = kcalloc(count, sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	/* Loop over the phandles until all the requested entry is found */
+	of_for_each_phandle(&it, err, dn, "domain-idle-states", NULL, 0) {
+		np = it.node;
+		ret = genpd_parse_state(&st[i++], np);
+		if (ret) {
+			pr_err
+			("Parsing idle state node %s failed with err %d\n",
+							np->full_name, ret);
+			of_node_put(np);
+			kfree(st);
+			return ret;
+		}
+	}
+
+	*n = count;
+	*states = st;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states);
+
 #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
 
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index f4492eb71701..b4894969fbec 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -205,6 +205,8 @@ extern int of_genpd_add_device(struct of_phandle_args *args,
 extern int of_genpd_add_subdomain(struct of_phandle_args *parent,
 				  struct of_phandle_args *new_subdomain);
 extern struct generic_pm_domain *of_genpd_remove_last(struct device_node *np);
+extern int of_genpd_parse_idle_states(struct device_node *dn,
+			struct genpd_power_state **states, int *n);
 
 int genpd_dev_pm_attach(struct device *dev);
 #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
@@ -234,6 +236,12 @@ static inline int of_genpd_add_subdomain(struct of_phandle_args *parent,
 	return -ENODEV;
 }
 
+static inline int of_genpd_parse_idle_states(struct device_node *dn,
+			struct genpd_power_state **states, int *n)
+{
+	return -ENODEV;
+}
+
 static inline int genpd_dev_pm_attach(struct device *dev)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From 0c9b694a8a7d4853318c4f2ce315afa2bd3664b6 Mon Sep 17 00:00:00 2001
From: Lina Iyer <lina.iyer@linaro.org>
Date: Fri, 14 Oct 2016 10:47:52 -0700
Subject: PM / Domains: Save the fwnode in genpd_power_state

Save the fwnode for the genpd state in the state node. PM Domain clients
may use the fwnode to read in the platform specific domain state
properties and associate them with the state.

Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 1 +
 include/linux/pm_domain.h   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 9af75ba0472a..1a6073aaca0e 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1956,6 +1956,7 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state,
 
 	genpd_state->power_on_latency_ns = 1000 * exit_latency;
 	genpd_state->power_off_latency_ns = 1000 * entry_latency;
+	genpd_state->fwnode = &state_node->fwnode;
 
 	return 0;
 }
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b4894969fbec..6a8988166899 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -39,6 +39,7 @@ struct genpd_power_state {
 	s64 power_off_latency_ns;
 	s64 power_on_latency_ns;
 	s64 residency_ns;
+	struct fwnode_handle *fwnode;
 };
 
 struct generic_pm_domain {
-- 
cgit v1.2.3


From 35241d12f750d2f1556a9c85f175ce7044716881 Mon Sep 17 00:00:00 2001
From: Lina Iyer <lina.iyer@linaro.org>
Date: Fri, 14 Oct 2016 10:47:54 -0700
Subject: PM / Domains: Abstract genpd locking

Abstract genpd lock/unlock calls, in preparation for domain specific
locks added in the following patches.

Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 121 +++++++++++++++++++++++++++++---------------
 include/linux/pm_domain.h   |   5 +-
 2 files changed, 85 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 1a6073aaca0e..4194012cdf86 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -39,6 +39,46 @@
 static LIST_HEAD(gpd_list);
 static DEFINE_MUTEX(gpd_list_lock);
 
+struct genpd_lock_ops {
+	void (*lock)(struct generic_pm_domain *genpd);
+	void (*lock_nested)(struct generic_pm_domain *genpd, int depth);
+	int (*lock_interruptible)(struct generic_pm_domain *genpd);
+	void (*unlock)(struct generic_pm_domain *genpd);
+};
+
+static void genpd_lock_mtx(struct generic_pm_domain *genpd)
+{
+	mutex_lock(&genpd->mlock);
+}
+
+static void genpd_lock_nested_mtx(struct generic_pm_domain *genpd,
+					int depth)
+{
+	mutex_lock_nested(&genpd->mlock, depth);
+}
+
+static int genpd_lock_interruptible_mtx(struct generic_pm_domain *genpd)
+{
+	return mutex_lock_interruptible(&genpd->mlock);
+}
+
+static void genpd_unlock_mtx(struct generic_pm_domain *genpd)
+{
+	return mutex_unlock(&genpd->mlock);
+}
+
+static const struct genpd_lock_ops genpd_mtx_ops = {
+	.lock = genpd_lock_mtx,
+	.lock_nested = genpd_lock_nested_mtx,
+	.lock_interruptible = genpd_lock_interruptible_mtx,
+	.unlock = genpd_unlock_mtx,
+};
+
+#define genpd_lock(p)			p->lock_ops->lock(p)
+#define genpd_lock_nested(p, d)		p->lock_ops->lock_nested(p, d)
+#define genpd_lock_interruptible(p)	p->lock_ops->lock_interruptible(p)
+#define genpd_unlock(p)			p->lock_ops->unlock(p)
+
 /*
  * Get the generic PM domain for a particular struct device.
  * This validates the struct device pointer, the PM domain pointer,
@@ -200,9 +240,9 @@ static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 
 		genpd_sd_counter_inc(master);
 
-		mutex_lock_nested(&master->lock, depth + 1);
+		genpd_lock_nested(master, depth + 1);
 		ret = genpd_poweron(master, depth + 1);
-		mutex_unlock(&master->lock);
+		genpd_unlock(master);
 
 		if (ret) {
 			genpd_sd_counter_dec(master);
@@ -255,9 +295,9 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 		spin_unlock_irq(&dev->power.lock);
 
 		if (!IS_ERR(genpd)) {
-			mutex_lock(&genpd->lock);
+			genpd_lock(genpd);
 			genpd->max_off_time_changed = true;
-			mutex_unlock(&genpd->lock);
+			genpd_unlock(genpd);
 		}
 
 		dev = dev->parent;
@@ -354,9 +394,9 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 
 	genpd = container_of(work, struct generic_pm_domain, power_off_work);
 
-	mutex_lock(&genpd->lock);
+	genpd_lock(genpd);
 	genpd_poweroff(genpd, true);
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 }
 
 /**
@@ -472,9 +512,9 @@ static int genpd_runtime_suspend(struct device *dev)
 	if (dev->power.irq_safe)
 		return 0;
 
-	mutex_lock(&genpd->lock);
+	genpd_lock(genpd);
 	genpd_poweroff(genpd, false);
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 
 	return 0;
 }
@@ -509,9 +549,9 @@ static int genpd_runtime_resume(struct device *dev)
 		goto out;
 	}
 
-	mutex_lock(&genpd->lock);
+	genpd_lock(genpd);
 	ret = genpd_poweron(genpd, 0);
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 
 	if (ret)
 		return ret;
@@ -547,9 +587,9 @@ err_stop:
 	genpd_stop_dev(genpd, dev);
 err_poweroff:
 	if (!dev->power.irq_safe) {
-		mutex_lock(&genpd->lock);
+		genpd_lock(genpd);
 		genpd_poweroff(genpd, 0);
-		mutex_unlock(&genpd->lock);
+		genpd_unlock(genpd);
 	}
 
 	return ret;
@@ -732,20 +772,20 @@ static int pm_genpd_prepare(struct device *dev)
 	if (resume_needed(dev, genpd))
 		pm_runtime_resume(dev);
 
-	mutex_lock(&genpd->lock);
+	genpd_lock(genpd);
 
 	if (genpd->prepared_count++ == 0)
 		genpd->suspended_count = 0;
 
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 
 	ret = pm_generic_prepare(dev);
 	if (ret) {
-		mutex_lock(&genpd->lock);
+		genpd_lock(genpd);
 
 		genpd->prepared_count--;
 
-		mutex_unlock(&genpd->lock);
+		genpd_unlock(genpd);
 	}
 
 	return ret;
@@ -936,13 +976,13 @@ static void pm_genpd_complete(struct device *dev)
 
 	pm_generic_complete(dev);
 
-	mutex_lock(&genpd->lock);
+	genpd_lock(genpd);
 
 	genpd->prepared_count--;
 	if (!genpd->prepared_count)
 		genpd_queue_power_off_work(genpd);
 
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 }
 
 /**
@@ -1071,7 +1111,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	if (IS_ERR(gpd_data))
 		return PTR_ERR(gpd_data);
 
-	mutex_lock(&genpd->lock);
+	genpd_lock(genpd);
 
 	if (genpd->prepared_count > 0) {
 		ret = -EAGAIN;
@@ -1088,7 +1128,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
 
  out:
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 
 	if (ret)
 		genpd_free_dev_data(dev, gpd_data);
@@ -1130,7 +1170,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
 	gpd_data = to_gpd_data(pdd);
 	dev_pm_qos_remove_notifier(dev, &gpd_data->nb);
 
-	mutex_lock(&genpd->lock);
+	genpd_lock(genpd);
 
 	if (genpd->prepared_count > 0) {
 		ret = -EAGAIN;
@@ -1145,14 +1185,14 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
 
 	list_del_init(&pdd->list_node);
 
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 
 	genpd_free_dev_data(dev, gpd_data);
 
 	return 0;
 
  out:
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 	dev_pm_qos_add_notifier(dev, &gpd_data->nb);
 
 	return ret;
@@ -1187,8 +1227,8 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 	if (!link)
 		return -ENOMEM;
 
-	mutex_lock(&subdomain->lock);
-	mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
+	genpd_lock(subdomain);
+	genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING);
 
 	if (genpd->status == GPD_STATE_POWER_OFF
 	    &&  subdomain->status != GPD_STATE_POWER_OFF) {
@@ -1211,8 +1251,8 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 		genpd_sd_counter_inc(genpd);
 
  out:
-	mutex_unlock(&genpd->lock);
-	mutex_unlock(&subdomain->lock);
+	genpd_unlock(genpd);
+	genpd_unlock(subdomain);
 	if (ret)
 		kfree(link);
 	return ret;
@@ -1250,8 +1290,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
 		return -EINVAL;
 
-	mutex_lock(&subdomain->lock);
-	mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
+	genpd_lock(subdomain);
+	genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING);
 
 	if (!list_empty(&subdomain->master_links) || subdomain->device_count) {
 		pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
@@ -1275,8 +1315,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 	}
 
 out:
-	mutex_unlock(&genpd->lock);
-	mutex_unlock(&subdomain->lock);
+	genpd_unlock(genpd);
+	genpd_unlock(subdomain);
 
 	return ret;
 }
@@ -1316,7 +1356,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
 	INIT_LIST_HEAD(&genpd->master_links);
 	INIT_LIST_HEAD(&genpd->slave_links);
 	INIT_LIST_HEAD(&genpd->dev_list);
-	mutex_init(&genpd->lock);
+	mutex_init(&genpd->mlock);
+	genpd->lock_ops = &genpd_mtx_ops;
 	genpd->gov = gov;
 	INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn);
 	atomic_set(&genpd->sd_count, 0);
@@ -1364,16 +1405,16 @@ static int genpd_remove(struct generic_pm_domain *genpd)
 	if (IS_ERR_OR_NULL(genpd))
 		return -EINVAL;
 
-	mutex_lock(&genpd->lock);
+	genpd_lock(genpd);
 
 	if (genpd->has_provider) {
-		mutex_unlock(&genpd->lock);
+		genpd_unlock(genpd);
 		pr_err("Provider present, unable to remove %s\n", genpd->name);
 		return -EBUSY;
 	}
 
 	if (!list_empty(&genpd->master_links) || genpd->device_count) {
-		mutex_unlock(&genpd->lock);
+		genpd_unlock(genpd);
 		pr_err("%s: unable to remove %s\n", __func__, genpd->name);
 		return -EBUSY;
 	}
@@ -1385,7 +1426,7 @@ static int genpd_remove(struct generic_pm_domain *genpd)
 	}
 
 	list_del(&genpd->gpd_list_node);
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 	cancel_work_sync(&genpd->power_off_work);
 	kfree(genpd->free);
 	pr_debug("%s: removed %s\n", __func__, genpd->name);
@@ -1909,9 +1950,9 @@ int genpd_dev_pm_attach(struct device *dev)
 	dev->pm_domain->detach = genpd_dev_pm_detach;
 	dev->pm_domain->sync = genpd_dev_pm_sync;
 
-	mutex_lock(&pd->lock);
+	genpd_lock(pd);
 	ret = genpd_poweron(pd, 0);
-	mutex_unlock(&pd->lock);
+	genpd_unlock(pd);
 out:
 	return ret ? -EPROBE_DEFER : 0;
 }
@@ -2064,7 +2105,7 @@ static int pm_genpd_summary_one(struct seq_file *s,
 	char state[16];
 	int ret;
 
-	ret = mutex_lock_interruptible(&genpd->lock);
+	ret = genpd_lock_interruptible(genpd);
 	if (ret)
 		return -ERESTARTSYS;
 
@@ -2101,7 +2142,7 @@ static int pm_genpd_summary_one(struct seq_file *s,
 
 	seq_puts(s, "\n");
 exit:
-	mutex_unlock(&genpd->lock);
+	genpd_unlock(genpd);
 
 	return 0;
 }
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 6a8988166899..811b968eb740 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -42,13 +42,14 @@ struct genpd_power_state {
 	struct fwnode_handle *fwnode;
 };
 
+struct genpd_lock_ops;
+
 struct generic_pm_domain {
 	struct dev_pm_domain domain;	/* PM domain operations */
 	struct list_head gpd_list_node;	/* Node in the global PM domains list */
 	struct list_head master_links;	/* Links with PM domain as a master */
 	struct list_head slave_links;	/* Links with PM domain as a slave */
 	struct list_head dev_list;	/* List of devices */
-	struct mutex lock;
 	struct dev_power_governor *gov;
 	struct work_struct power_off_work;
 	struct fwnode_handle *provider;	/* Identity of the domain provider */
@@ -74,6 +75,8 @@ struct generic_pm_domain {
 	unsigned int state_count; /* number of states */
 	unsigned int state_idx; /* state that genpd will go to when off */
 	void *free; /* Free the state that was allocated for default */
+	const struct genpd_lock_ops *lock_ops;
+	struct mutex mlock;
 
 };
 
-- 
cgit v1.2.3


From d716f4798ff8c65ace4a6ab291f9a4ff265df4ba Mon Sep 17 00:00:00 2001
From: Lina Iyer <lina.iyer@linaro.org>
Date: Fri, 14 Oct 2016 10:47:55 -0700
Subject: PM / Domains: Support IRQ safe PM domains

Generic Power Domains currently support turning on/off only in process
context. This prevents the usage of PM domains for domains that could be
powered on/off in a context where IRQs are disabled. Many such domains
exist today and do not get powered off, when the IRQ safe devices in
that domain are powered off, because of this limitation.

However, not all domains can operate in IRQ safe contexts. Genpd
therefore, has to support both cases where the domain may or may not
operate in IRQ safe contexts. Configuring genpd to use an appropriate
lock for that domain, would allow domains that have IRQ safe devices to
runtime suspend and resume, in atomic context.

To achieve domain specific locking, set the domain's ->flag to
GENPD_FLAG_IRQ_SAFE while defining the domain. This indicates that genpd
should use a spinlock instead of a mutex for locking the domain. Locking
is abstracted through genpd_lock() and genpd_unlock() functions that use
the flag to determine the appropriate lock to be used for that domain.

Domains that have lower latency to suspend and resume and can operate
with IRQs disabled may now be able to save power, when the component
devices and sub-domains are idle at runtime.

The restriction this imposes on the domain hierarchy is that non-IRQ
safe domains may not have IRQ-safe subdomains, but IRQ safe domains may
have IRQ safe and non-IRQ safe subdomains and devices.

Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 111 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/pm_domain.h   |  10 +++-
 2 files changed, 110 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 4194012cdf86..aac656a889dc 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -74,11 +74,70 @@ static const struct genpd_lock_ops genpd_mtx_ops = {
 	.unlock = genpd_unlock_mtx,
 };
 
+static void genpd_lock_spin(struct generic_pm_domain *genpd)
+	__acquires(&genpd->slock)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&genpd->slock, flags);
+	genpd->lock_flags = flags;
+}
+
+static void genpd_lock_nested_spin(struct generic_pm_domain *genpd,
+					int depth)
+	__acquires(&genpd->slock)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave_nested(&genpd->slock, flags, depth);
+	genpd->lock_flags = flags;
+}
+
+static int genpd_lock_interruptible_spin(struct generic_pm_domain *genpd)
+	__acquires(&genpd->slock)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&genpd->slock, flags);
+	genpd->lock_flags = flags;
+	return 0;
+}
+
+static void genpd_unlock_spin(struct generic_pm_domain *genpd)
+	__releases(&genpd->slock)
+{
+	spin_unlock_irqrestore(&genpd->slock, genpd->lock_flags);
+}
+
+static const struct genpd_lock_ops genpd_spin_ops = {
+	.lock = genpd_lock_spin,
+	.lock_nested = genpd_lock_nested_spin,
+	.lock_interruptible = genpd_lock_interruptible_spin,
+	.unlock = genpd_unlock_spin,
+};
+
 #define genpd_lock(p)			p->lock_ops->lock(p)
 #define genpd_lock_nested(p, d)		p->lock_ops->lock_nested(p, d)
 #define genpd_lock_interruptible(p)	p->lock_ops->lock_interruptible(p)
 #define genpd_unlock(p)			p->lock_ops->unlock(p)
 
+#define genpd_is_irq_safe(genpd)	(genpd->flags & GENPD_FLAG_IRQ_SAFE)
+
+static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
+		struct generic_pm_domain *genpd)
+{
+	bool ret;
+
+	ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd);
+
+	/* Warn once for each IRQ safe dev in no sleep domain */
+	if (ret)
+		dev_warn_once(dev, "PM domain %s will not be powered off\n",
+				genpd->name);
+
+	return ret;
+}
+
 /*
  * Get the generic PM domain for a particular struct device.
  * This validates the struct device pointer, the PM domain pointer,
@@ -343,7 +402,12 @@ static int genpd_poweroff(struct generic_pm_domain *genpd, bool is_async)
 		if (stat > PM_QOS_FLAGS_NONE)
 			return -EBUSY;
 
-		if (!pm_runtime_suspended(pdd->dev) || pdd->dev->power.irq_safe)
+		/*
+		 * Do not allow PM domain to be powered off, when an IRQ safe
+		 * device is part of a non-IRQ safe domain.
+		 */
+		if (!pm_runtime_suspended(pdd->dev) ||
+			irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd))
 			not_suspended++;
 	}
 
@@ -506,10 +570,10 @@ static int genpd_runtime_suspend(struct device *dev)
 	}
 
 	/*
-	 * If power.irq_safe is set, this routine will be run with interrupts
-	 * off, so it can't use mutexes.
+	 * If power.irq_safe is set, this routine may be run with
+	 * IRQs disabled, so suspend only if the PM domain also is irq_safe.
 	 */
-	if (dev->power.irq_safe)
+	if (irq_safe_dev_in_no_sleep_domain(dev, genpd))
 		return 0;
 
 	genpd_lock(genpd);
@@ -543,8 +607,11 @@ static int genpd_runtime_resume(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	/* If power.irq_safe, the PM domain is never powered off. */
-	if (dev->power.irq_safe) {
+	/*
+	 * As we don't power off a non IRQ safe domain, which holds
+	 * an IRQ safe device, we don't need to restore power to it.
+	 */
+	if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) {
 		timed = false;
 		goto out;
 	}
@@ -586,7 +653,8 @@ static int genpd_runtime_resume(struct device *dev)
 err_stop:
 	genpd_stop_dev(genpd, dev);
 err_poweroff:
-	if (!dev->power.irq_safe) {
+	if (!pm_runtime_is_irq_safe(dev) ||
+		(pm_runtime_is_irq_safe(dev) && genpd_is_irq_safe(genpd))) {
 		genpd_lock(genpd);
 		genpd_poweroff(genpd, 0);
 		genpd_unlock(genpd);
@@ -1223,6 +1291,17 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 	    || genpd == subdomain)
 		return -EINVAL;
 
+	/*
+	 * If the domain can be powered on/off in an IRQ safe
+	 * context, ensure that the subdomain can also be
+	 * powered on/off in that context.
+	 */
+	if (!genpd_is_irq_safe(genpd) && genpd_is_irq_safe(subdomain)) {
+		WARN("Parent %s of subdomain %s must be IRQ safe\n",
+				genpd->name, subdomain->name);
+		return -EINVAL;
+	}
+
 	link = kzalloc(sizeof(*link), GFP_KERNEL);
 	if (!link)
 		return -ENOMEM;
@@ -1337,6 +1416,17 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
 	return 0;
 }
 
+static void genpd_lock_init(struct generic_pm_domain *genpd)
+{
+	if (genpd->flags & GENPD_FLAG_IRQ_SAFE) {
+		spin_lock_init(&genpd->slock);
+		genpd->lock_ops = &genpd_spin_ops;
+	} else {
+		mutex_init(&genpd->mlock);
+		genpd->lock_ops = &genpd_mtx_ops;
+	}
+}
+
 /**
  * pm_genpd_init - Initialize a generic I/O PM domain object.
  * @genpd: PM domain object to initialize.
@@ -1356,8 +1446,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
 	INIT_LIST_HEAD(&genpd->master_links);
 	INIT_LIST_HEAD(&genpd->slave_links);
 	INIT_LIST_HEAD(&genpd->dev_list);
-	mutex_init(&genpd->mlock);
-	genpd->lock_ops = &genpd_mtx_ops;
+	genpd_lock_init(genpd);
 	genpd->gov = gov;
 	INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn);
 	atomic_set(&genpd->sd_count, 0);
@@ -2131,7 +2220,9 @@ static int pm_genpd_summary_one(struct seq_file *s,
 	}
 
 	list_for_each_entry(pm_data, &genpd->dev_list, list_node) {
-		kobj_path = kobject_get_path(&pm_data->dev->kobj, GFP_KERNEL);
+		kobj_path = kobject_get_path(&pm_data->dev->kobj,
+				genpd_is_irq_safe(genpd) ?
+				GFP_ATOMIC : GFP_KERNEL);
 		if (kobj_path == NULL)
 			continue;
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 811b968eb740..81ece61075df 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -15,9 +15,11 @@
 #include <linux/err.h>
 #include <linux/of.h>
 #include <linux/notifier.h>
+#include <linux/spinlock.h>
 
 /* Defines used for the flags field in the struct generic_pm_domain */
 #define GENPD_FLAG_PM_CLK	(1U << 0) /* PM domain uses PM clk */
+#define GENPD_FLAG_IRQ_SAFE	(1U << 1) /* PM domain operates in atomic */
 
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
@@ -76,7 +78,13 @@ struct generic_pm_domain {
 	unsigned int state_idx; /* state that genpd will go to when off */
 	void *free; /* Free the state that was allocated for default */
 	const struct genpd_lock_ops *lock_ops;
-	struct mutex mlock;
+	union {
+		struct mutex mlock;
+		struct {
+			spinlock_t slock;
+			unsigned long lock_flags;
+		};
+	};
 
 };
 
-- 
cgit v1.2.3


From 2d0e30c30f84d08dc16f0f2af41f1b8a85f0755e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 Oct 2016 12:46:33 +0200
Subject: bpf: add helper for retrieving current numa node id

Use case is mainly for soreuseport to select sockets for the local
numa node, but since generic, lets also add this for other networking
and tracing program types.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h |  6 ++++++
 kernel/bpf/core.c        |  1 +
 kernel/bpf/helpers.c     | 12 ++++++++++++
 kernel/trace/bpf_trace.c |  2 ++
 net/core/filter.c        |  2 ++
 6 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c201017b5730..edcd96ded8aa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -319,6 +319,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
+extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
 extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
 extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f09c70b97eca..374ef582ae18 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -426,6 +426,12 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_set_hash_invalid,
 
+	/**
+	 * bpf_get_numa_node_id()
+	 * Returns the id of the current NUMA node.
+	 */
+	BPF_FUNC_get_numa_node_id,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index aa6d98154106..82a04143368e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1043,6 +1043,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
+const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
 
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 39918402e6e9..045cbe673356 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -13,6 +13,7 @@
 #include <linux/rcupdate.h>
 #include <linux/random.h>
 #include <linux/smp.h>
+#include <linux/topology.h>
 #include <linux/ktime.h>
 #include <linux/sched.h>
 #include <linux/uidgid.h>
@@ -92,6 +93,17 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_0(bpf_get_numa_node_id)
+{
+	return numa_node_id();
+}
+
+const struct bpf_func_proto bpf_get_numa_node_id_proto = {
+	.func		= bpf_get_numa_node_id,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 BPF_CALL_0(bpf_ktime_get_ns)
 {
 	/* NMI safe access to clock monotonic */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 5dcb99281259..fa77311dadb2 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -422,6 +422,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return bpf_get_trace_printk_proto();
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
+	case BPF_FUNC_get_numa_node_id:
+		return &bpf_get_numa_node_id_proto;
 	case BPF_FUNC_perf_event_read:
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_probe_write_user:
diff --git a/net/core/filter.c b/net/core/filter.c
index 00351cdf7d0c..cd9e2ba66b0e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2492,6 +2492,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_raw_smp_processor_id_proto;
+	case BPF_FUNC_get_numa_node_id:
+		return &bpf_get_numa_node_id_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_ktime_get_ns:
-- 
cgit v1.2.3


From 304887041d953b6692c0d4a9f8fafb252d32e9a0 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@mellanox.com>
Date: Thu, 20 Oct 2016 16:28:01 +0000
Subject: platform/x86: Introduce support for Mellanox hotplug driver

Enable system support for the Mellanox Technologies hotplug platform
driver, which provides support for the next Mellanox basic systems:
"msx6710", "msx6720", "msb7700", "msn2700", "msx1410", "msn2410",
"msb7800", "msn2740", "msn2100" and also various number of derivative
systems from the above basic types.
This driver handles hot-plug events for the power suppliers, power
cables and fans for the above systems.

The Kconfig currently controlling compilation of this code is:
driver/platform/x86:config MLX_CPLD_PLATFORM
                       tristate "Mellanox platform hotplug driver support"

Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 MAINTAINERS                                   |   7 +
 drivers/platform/x86/Kconfig                  |  11 +
 drivers/platform/x86/Makefile                 |   1 +
 drivers/platform/x86/mlxcpld-hotplug.c        | 515 ++++++++++++++++++++++++++
 include/linux/platform_data/mlxcpld-hotplug.h |  99 +++++
 5 files changed, 633 insertions(+)
 create mode 100644 drivers/platform/x86/mlxcpld-hotplug.c
 create mode 100644 include/linux/platform_data/mlxcpld-hotplug.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 1cd38a7e0064..3e30399e715f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7881,6 +7881,13 @@ L:      platform-driver-x86@vger.kernel.org
 S:      Supported
 F:      arch/x86/platform/mellanox/mlx-platform.c
 
+MELLANOX MLX CPLD HOTPLUG DRIVER
+M:	Vadim Pasternak <vadimp@mellanox.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Supported
+F:	drivers/platform/x86/mlxcpld-hotplug.c
+F:	include/linux/platform_data/mlxcpld-hotplug.h
+
 SOFT-ROCE DRIVER (rxe)
 M:	Moni Shoua <monis@mellanox.com>
 L:	linux-rdma@vger.kernel.org
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index b8a21d7b25d4..185376901d9c 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -1027,4 +1027,15 @@ config INTEL_TELEMETRY
 	  used to get various SoC events and parameters
 	  directly via debugfs files. Various tools may use
 	  this interface for SoC state monitoring.
+
+config MLX_CPLD_PLATFORM
+	tristate "Mellanox platform hotplug driver support"
+	default n
+	depends on MLX_PLATFORM
+	select HWMON
+	select I2C
+	---help---
+	  This driver handles hot-plug events for the power suppliers, power
+	  cables and fans on the wide range Mellanox IB and Ethernet systems.
+
 endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 2efa86d2a1a7..1f06b6339cf7 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -71,3 +71,4 @@ obj-$(CONFIG_INTEL_TELEMETRY)	+= intel_telemetry_core.o \
 				   intel_telemetry_pltdrv.o \
 				   intel_telemetry_debugfs.o
 obj-$(CONFIG_INTEL_PMC_CORE)    += intel_pmc_core.o
+obj-$(CONFIG_MLX_CPLD_PLATFORM)	+= mlxcpld-hotplug.o
diff --git a/drivers/platform/x86/mlxcpld-hotplug.c b/drivers/platform/x86/mlxcpld-hotplug.c
new file mode 100644
index 000000000000..aff3686b3b37
--- /dev/null
+++ b/drivers/platform/x86/mlxcpld-hotplug.c
@@ -0,0 +1,515 @@
+/*
+ * drivers/platform/x86/mlxcpld-hotplug.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_data/mlxcpld-hotplug.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+/* Offset of event and mask registers from status register */
+#define MLXCPLD_HOTPLUG_EVENT_OFF	1
+#define MLXCPLD_HOTPLUG_MASK_OFF	2
+#define MLXCPLD_HOTPLUG_AGGR_MASK_OFF	1
+
+#define MLXCPLD_HOTPLUG_ATTRS_NUM	8
+
+/**
+ * enum mlxcpld_hotplug_attr_type - sysfs attributes for hotplug events:
+ * @MLXCPLD_HOTPLUG_ATTR_TYPE_PSU: power supply unit attribute;
+ * @MLXCPLD_HOTPLUG_ATTR_TYPE_PWR: power cable attribute;
+ * @MLXCPLD_HOTPLUG_ATTR_TYPE_FAN: FAN drawer attribute;
+ */
+enum mlxcpld_hotplug_attr_type {
+	MLXCPLD_HOTPLUG_ATTR_TYPE_PSU,
+	MLXCPLD_HOTPLUG_ATTR_TYPE_PWR,
+	MLXCPLD_HOTPLUG_ATTR_TYPE_FAN,
+};
+
+/**
+ * struct mlxcpld_hotplug_priv_data - platform private data:
+ * @irq: platform interrupt number;
+ * @pdev: platform device;
+ * @plat: platform data;
+ * @hwmon: hwmon device;
+ * @mlxcpld_hotplug_attr: sysfs attributes array;
+ * @mlxcpld_hotplug_dev_attr: sysfs sensor device attribute array;
+ * @group: sysfs attribute group;
+ * @groups: list of sysfs attribute group for hwmon registration;
+ * @dwork: delayed work template;
+ * @lock: spin lock;
+ * @aggr_cache: last value of aggregation register status;
+ * @psu_cache: last value of PSU register status;
+ * @pwr_cache: last value of power register status;
+ * @fan_cache: last value of FAN register status;
+ */
+struct mlxcpld_hotplug_priv_data {
+	int irq;
+	struct platform_device *pdev;
+	struct mlxcpld_hotplug_platform_data *plat;
+	struct device *hwmon;
+	struct attribute *mlxcpld_hotplug_attr[MLXCPLD_HOTPLUG_ATTRS_NUM + 1];
+	struct sensor_device_attribute_2
+			mlxcpld_hotplug_dev_attr[MLXCPLD_HOTPLUG_ATTRS_NUM];
+	struct attribute_group group;
+	const struct attribute_group *groups[2];
+	struct delayed_work dwork;
+	spinlock_t lock;
+	u8 aggr_cache;
+	u8 psu_cache;
+	u8 pwr_cache;
+	u8 fan_cache;
+};
+
+static ssize_t mlxcpld_hotplug_attr_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct mlxcpld_hotplug_priv_data *priv = platform_get_drvdata(pdev);
+	int index = to_sensor_dev_attr_2(attr)->index;
+	int nr = to_sensor_dev_attr_2(attr)->nr;
+	u8 reg_val = 0;
+
+	switch (nr) {
+	case MLXCPLD_HOTPLUG_ATTR_TYPE_PSU:
+		/* Bit = 0 : PSU is present. */
+		reg_val = !!!(inb(priv->plat->psu_reg_offset) & BIT(index));
+		break;
+
+	case MLXCPLD_HOTPLUG_ATTR_TYPE_PWR:
+		/* Bit = 1 : power cable is attached. */
+		reg_val = !!(inb(priv->plat->pwr_reg_offset) & BIT(index %
+						priv->plat->pwr_count));
+		break;
+
+	case MLXCPLD_HOTPLUG_ATTR_TYPE_FAN:
+		/* Bit = 0 : FAN is present. */
+		reg_val = !!!(inb(priv->plat->fan_reg_offset) & BIT(index %
+						priv->plat->fan_count));
+		break;
+	}
+
+	return sprintf(buf, "%u\n", reg_val);
+}
+
+#define PRIV_ATTR(i) priv->mlxcpld_hotplug_attr[i]
+#define PRIV_DEV_ATTR(i) priv->mlxcpld_hotplug_dev_attr[i]
+static int mlxcpld_hotplug_attr_init(struct mlxcpld_hotplug_priv_data *priv)
+{
+	int num_attrs = priv->plat->psu_count + priv->plat->pwr_count +
+			priv->plat->fan_count;
+	int i;
+
+	priv->group.attrs = devm_kzalloc(&priv->pdev->dev, num_attrs *
+					 sizeof(struct attribute *),
+					 GFP_KERNEL);
+	if (!priv->group.attrs)
+		return -ENOMEM;
+
+	for (i = 0; i < num_attrs; i++) {
+		PRIV_ATTR(i) = &PRIV_DEV_ATTR(i).dev_attr.attr;
+
+		if (i < priv->plat->psu_count) {
+			PRIV_ATTR(i)->name = devm_kasprintf(&priv->pdev->dev,
+						GFP_KERNEL, "psu%u", i + 1);
+			PRIV_DEV_ATTR(i).nr = MLXCPLD_HOTPLUG_ATTR_TYPE_PSU;
+		} else if (i < priv->plat->psu_count + priv->plat->pwr_count) {
+			PRIV_ATTR(i)->name = devm_kasprintf(&priv->pdev->dev,
+						GFP_KERNEL, "pwr%u", i %
+						priv->plat->pwr_count + 1);
+			PRIV_DEV_ATTR(i).nr = MLXCPLD_HOTPLUG_ATTR_TYPE_PWR;
+		} else {
+			PRIV_ATTR(i)->name = devm_kasprintf(&priv->pdev->dev,
+						GFP_KERNEL, "fan%u", i %
+						priv->plat->fan_count + 1);
+			PRIV_DEV_ATTR(i).nr = MLXCPLD_HOTPLUG_ATTR_TYPE_FAN;
+		}
+
+		if (!PRIV_ATTR(i)->name) {
+			dev_err(&priv->pdev->dev, "Memory allocation failed for sysfs attribute %d.\n",
+				i + 1);
+			return -ENOMEM;
+		}
+
+		PRIV_DEV_ATTR(i).dev_attr.attr.name = PRIV_ATTR(i)->name;
+		PRIV_DEV_ATTR(i).dev_attr.attr.mode = S_IRUGO;
+		PRIV_DEV_ATTR(i).dev_attr.show = mlxcpld_hotplug_attr_show;
+		PRIV_DEV_ATTR(i).index = i;
+		sysfs_attr_init(&PRIV_DEV_ATTR(i).dev_attr.attr);
+	}
+
+	priv->group.attrs = priv->mlxcpld_hotplug_attr;
+	priv->groups[0] = &priv->group;
+	priv->groups[1] = NULL;
+
+	return 0;
+}
+
+static int mlxcpld_hotplug_device_create(struct device *dev,
+					 struct mlxcpld_hotplug_device *item)
+{
+	item->adapter = i2c_get_adapter(item->bus);
+	if (!item->adapter) {
+		dev_err(dev, "Failed to get adapter for bus %d\n",
+			item->bus);
+		return -EFAULT;
+	}
+
+	item->client = i2c_new_device(item->adapter, &item->brdinfo);
+	if (!item->client) {
+		dev_err(dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
+			item->brdinfo.type, item->bus, item->brdinfo.addr);
+		i2c_put_adapter(item->adapter);
+		item->adapter = NULL;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static void mlxcpld_hotplug_device_destroy(struct mlxcpld_hotplug_device *item)
+{
+	if (item->client) {
+		i2c_unregister_device(item->client);
+		item->client = NULL;
+	}
+
+	if (item->adapter) {
+		i2c_put_adapter(item->adapter);
+		item->adapter = NULL;
+	}
+}
+
+static inline void
+mlxcpld_hotplug_work_helper(struct device *dev,
+			    struct mlxcpld_hotplug_device *item, u8 is_inverse,
+			    u16 offset, u8 mask, u8 *cache)
+{
+	u8 val, asserted;
+	int bit;
+
+	/* Mask event. */
+	outb(0, offset + MLXCPLD_HOTPLUG_MASK_OFF);
+	/* Read status. */
+	val = inb(offset) & mask;
+	asserted = *cache ^ val;
+	*cache = val;
+
+	/*
+	 * Validate if item related to received signal type is valid.
+	 * It should never happen, excepted the situation when some
+	 * piece of hardware is broken. In such situation just produce
+	 * error message and return. Caller must continue to handle the
+	 * signals from other devices if any.
+	 */
+	if (unlikely(!item)) {
+		dev_err(dev, "False signal is received: register at offset 0x%02x, mask 0x%02x.\n",
+			offset, mask);
+		return;
+	}
+
+	for_each_set_bit(bit, (unsigned long *)&asserted, 8) {
+		if (val & BIT(bit)) {
+			if (is_inverse)
+				mlxcpld_hotplug_device_destroy(item + bit);
+			else
+				mlxcpld_hotplug_device_create(dev, item + bit);
+		} else {
+			if (is_inverse)
+				mlxcpld_hotplug_device_create(dev, item + bit);
+			else
+				mlxcpld_hotplug_device_destroy(item + bit);
+		}
+	}
+
+	/* Acknowledge event. */
+	outb(0, offset + MLXCPLD_HOTPLUG_EVENT_OFF);
+	/* Unmask event. */
+	outb(mask, offset + MLXCPLD_HOTPLUG_MASK_OFF);
+}
+
+/*
+ * mlxcpld_hotplug_work_handler - performs traversing of CPLD interrupt
+ * registers according to the below hierarchy schema:
+ *
+ *                   Aggregation registers (status/mask)
+ * PSU registers:           *---*
+ * *-----------------*      |   |
+ * |status/event/mask|----->| * |
+ * *-----------------*      |   |
+ * Power registers:         |   |
+ * *-----------------*      |   |
+ * |status/event/mask|----->| * |---> CPU
+ * *-----------------*      |   |
+ * FAN registers:
+ * *-----------------*      |   |
+ * |status/event/mask|----->| * |
+ * *-----------------*      |   |
+ *                          *---*
+ * In case some system changed are detected: FAN in/out, PSU in/out, power
+ * cable attached/detached, relevant device is created or destroyed.
+ */
+static void mlxcpld_hotplug_work_handler(struct work_struct *work)
+{
+	struct mlxcpld_hotplug_priv_data *priv = container_of(work,
+				struct mlxcpld_hotplug_priv_data, dwork.work);
+	u8 val, aggr_asserted;
+	unsigned long flags;
+
+	/* Mask aggregation event. */
+	outb(0, priv->plat->top_aggr_offset + MLXCPLD_HOTPLUG_AGGR_MASK_OFF);
+	/* Read aggregation status. */
+	val = inb(priv->plat->top_aggr_offset) & priv->plat->top_aggr_mask;
+	aggr_asserted = priv->aggr_cache ^ val;
+	priv->aggr_cache = val;
+
+	/* Handle PSU configuration changes. */
+	if (aggr_asserted & priv->plat->top_aggr_psu_mask)
+		mlxcpld_hotplug_work_helper(&priv->pdev->dev, priv->plat->psu,
+					    1, priv->plat->psu_reg_offset,
+					    priv->plat->psu_mask,
+					    &priv->psu_cache);
+
+	/* Handle power cable configuration changes. */
+	if (aggr_asserted & priv->plat->top_aggr_pwr_mask)
+		mlxcpld_hotplug_work_helper(&priv->pdev->dev, priv->plat->pwr,
+					    0, priv->plat->pwr_reg_offset,
+					    priv->plat->pwr_mask,
+					    &priv->pwr_cache);
+
+	/* Handle FAN configuration changes. */
+	if (aggr_asserted & priv->plat->top_aggr_fan_mask)
+		mlxcpld_hotplug_work_helper(&priv->pdev->dev, priv->plat->fan,
+					    1, priv->plat->fan_reg_offset,
+					    priv->plat->fan_mask,
+					    &priv->fan_cache);
+
+	if (aggr_asserted) {
+		spin_lock_irqsave(&priv->lock, flags);
+
+		/*
+		 * It is possible, that some signals have been inserted, while
+		 * interrupt has been masked by mlxcpld_hotplug_work_handler.
+		 * In this case such signals will be missed. In order to handle
+		 * these signals delayed work is canceled and work task
+		 * re-scheduled for immediate execution. It allows to handle
+		 * missed signals, if any. In other case work handler just
+		 * validates that no new signals have been received during
+		 * masking.
+		 */
+		cancel_delayed_work(&priv->dwork);
+		schedule_delayed_work(&priv->dwork, 0);
+
+		spin_unlock_irqrestore(&priv->lock, flags);
+
+		return;
+	}
+
+	/* Unmask aggregation event (no need acknowledge). */
+	outb(priv->plat->top_aggr_mask, priv->plat->top_aggr_offset +
+						MLXCPLD_HOTPLUG_AGGR_MASK_OFF);
+}
+
+static void mlxcpld_hotplug_set_irq(struct mlxcpld_hotplug_priv_data *priv)
+{
+	/* Clear psu presense event. */
+	outb(0, priv->plat->psu_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF);
+	/* Set psu initial status as mask and unmask psu event. */
+	priv->psu_cache = priv->plat->psu_mask;
+	outb(priv->plat->psu_mask, priv->plat->psu_reg_offset +
+						MLXCPLD_HOTPLUG_MASK_OFF);
+
+	/* Clear power cable event. */
+	outb(0, priv->plat->pwr_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF);
+	/* Keep power initial status as zero and unmask power event. */
+	outb(priv->plat->pwr_mask, priv->plat->pwr_reg_offset +
+						MLXCPLD_HOTPLUG_MASK_OFF);
+
+	/* Clear fan presense event. */
+	outb(0, priv->plat->fan_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF);
+	/* Set fan initial status as mask and unmask fan event. */
+	priv->fan_cache = priv->plat->fan_mask;
+	outb(priv->plat->fan_mask, priv->plat->fan_reg_offset +
+						MLXCPLD_HOTPLUG_MASK_OFF);
+
+	/* Keep aggregation initial status as zero and unmask events. */
+	outb(priv->plat->top_aggr_mask, priv->plat->top_aggr_offset +
+						MLXCPLD_HOTPLUG_AGGR_MASK_OFF);
+
+	/* Invoke work handler for initializing hot plug devices setting. */
+	mlxcpld_hotplug_work_handler(&priv->dwork.work);
+
+	enable_irq(priv->irq);
+}
+
+static void mlxcpld_hotplug_unset_irq(struct mlxcpld_hotplug_priv_data *priv)
+{
+	int i;
+
+	disable_irq(priv->irq);
+	cancel_delayed_work_sync(&priv->dwork);
+
+	/* Mask aggregation event. */
+	outb(0, priv->plat->top_aggr_offset + MLXCPLD_HOTPLUG_AGGR_MASK_OFF);
+
+	/* Mask psu presense event. */
+	outb(0, priv->plat->psu_reg_offset + MLXCPLD_HOTPLUG_MASK_OFF);
+	/* Clear psu presense event. */
+	outb(0, priv->plat->psu_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF);
+
+	/* Mask power cable event. */
+	outb(0, priv->plat->pwr_reg_offset + MLXCPLD_HOTPLUG_MASK_OFF);
+	/* Clear power cable event. */
+	outb(0, priv->plat->pwr_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF);
+
+	/* Mask fan presense event. */
+	outb(0, priv->plat->fan_reg_offset + MLXCPLD_HOTPLUG_MASK_OFF);
+	/* Clear fan presense event. */
+	outb(0, priv->plat->fan_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF);
+
+	/* Remove all the attached devices. */
+	for (i = 0; i < priv->plat->psu_count; i++)
+		mlxcpld_hotplug_device_destroy(priv->plat->psu + i);
+
+	for (i = 0; i < priv->plat->pwr_count; i++)
+		mlxcpld_hotplug_device_destroy(priv->plat->pwr + i);
+
+	for (i = 0; i < priv->plat->fan_count; i++)
+		mlxcpld_hotplug_device_destroy(priv->plat->fan + i);
+}
+
+static irqreturn_t mlxcpld_hotplug_irq_handler(int irq, void *dev)
+{
+	struct mlxcpld_hotplug_priv_data *priv =
+				(struct mlxcpld_hotplug_priv_data *)dev;
+
+	/* Schedule work task for immediate execution.*/
+	schedule_delayed_work(&priv->dwork, 0);
+
+	return IRQ_HANDLED;
+}
+
+static int mlxcpld_hotplug_probe(struct platform_device *pdev)
+{
+	struct mlxcpld_hotplug_platform_data *pdata;
+	struct mlxcpld_hotplug_priv_data *priv;
+	int err;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		dev_err(&pdev->dev, "Failed to get platform data.\n");
+		return -EINVAL;
+	}
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->pdev = pdev;
+	priv->plat = pdata;
+
+	priv->irq = platform_get_irq(pdev, 0);
+	if (priv->irq < 0) {
+		dev_err(&pdev->dev, "Failed to get platform irq: %d\n",
+			priv->irq);
+		return priv->irq;
+	}
+
+	err = devm_request_irq(&pdev->dev, priv->irq,
+				mlxcpld_hotplug_irq_handler, 0, pdev->name,
+				priv);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to request irq: %d\n", err);
+		return err;
+	}
+	disable_irq(priv->irq);
+
+	INIT_DELAYED_WORK(&priv->dwork, mlxcpld_hotplug_work_handler);
+	spin_lock_init(&priv->lock);
+
+	err = mlxcpld_hotplug_attr_init(priv);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate attributes: %d\n", err);
+		return err;
+	}
+
+	priv->hwmon = devm_hwmon_device_register_with_groups(&pdev->dev,
+					"mlxcpld_hotplug", priv, priv->groups);
+	if (IS_ERR(priv->hwmon)) {
+		dev_err(&pdev->dev, "Failed to register hwmon device %ld\n",
+			PTR_ERR(priv->hwmon));
+		return PTR_ERR(priv->hwmon);
+	}
+
+	platform_set_drvdata(pdev, priv);
+
+	/* Perform initial interrupts setup. */
+	mlxcpld_hotplug_set_irq(priv);
+
+	return 0;
+}
+
+static int mlxcpld_hotplug_remove(struct platform_device *pdev)
+{
+	struct mlxcpld_hotplug_priv_data *priv = platform_get_drvdata(pdev);
+
+	/* Clean interrupts setup. */
+	mlxcpld_hotplug_unset_irq(priv);
+
+	return 0;
+}
+
+static struct platform_driver mlxcpld_hotplug_driver = {
+	.driver = {
+		.name = "mlxcpld-hotplug",
+	},
+	.probe = mlxcpld_hotplug_probe,
+	.remove = mlxcpld_hotplug_remove,
+};
+
+module_platform_driver(mlxcpld_hotplug_driver);
+
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox CPLD hotplug platform driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("platform:mlxcpld-hotplug");
diff --git a/include/linux/platform_data/mlxcpld-hotplug.h b/include/linux/platform_data/mlxcpld-hotplug.h
new file mode 100644
index 000000000000..e4cfcffaa6f4
--- /dev/null
+++ b/include/linux/platform_data/mlxcpld-hotplug.h
@@ -0,0 +1,99 @@
+/*
+ * include/linux/platform_data/mlxcpld-hotplug.h
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_MLXCPLD_HOTPLUG_H
+#define __LINUX_PLATFORM_DATA_MLXCPLD_HOTPLUG_H
+
+/**
+ * struct mlxcpld_hotplug_device - I2C device data:
+ * @adapter: I2C device adapter;
+ * @client: I2C device client;
+ * @brdinfo: device board information;
+ * @bus: I2C bus, where device is attached;
+ *
+ * Structure represents I2C hotplug device static data (board topology) and
+ * dynamic data (related kernel objects handles).
+ */
+struct mlxcpld_hotplug_device {
+	struct i2c_adapter *adapter;
+	struct i2c_client *client;
+	struct i2c_board_info brdinfo;
+	u16 bus;
+};
+
+/**
+ * struct mlxcpld_hotplug_platform_data - device platform data:
+ * @top_aggr_offset: offset of top aggregation interrupt register;
+ * @top_aggr_mask: top aggregation interrupt common mask;
+ * @top_aggr_psu_mask: top aggregation interrupt PSU mask;
+ * @psu_reg_offset: offset of PSU interrupt register;
+ * @psu_mask: PSU interrupt mask;
+ * @psu_count: number of equipped replaceable PSUs;
+ * @psu: pointer to PSU devices data array;
+ * @top_aggr_pwr_mask: top aggregation interrupt power mask;
+ * @pwr_reg_offset: offset of power interrupt register
+ * @pwr_mask: power interrupt mask;
+ * @pwr_count: number of power sources;
+ * @pwr: pointer to power devices data array;
+ * @top_aggr_fan_mask: top aggregation interrupt FAN mask;
+ * @fan_reg_offset: offset of FAN interrupt register;
+ * @fan_mask: FAN interrupt mask;
+ * @fan_count: number of equipped replaceable FANs;
+ * @fan: pointer to FAN devices data array;
+ *
+ * Structure represents board platform data, related to system hotplug events,
+ * like FAN, PSU, power cable insertion and removing. This data provides the
+ * number of hot-pluggable devices and hardware description for event handling.
+ */
+struct mlxcpld_hotplug_platform_data {
+	u16 top_aggr_offset;
+	u8 top_aggr_mask;
+	u8 top_aggr_psu_mask;
+	u16 psu_reg_offset;
+	u8 psu_mask;
+	u8 psu_count;
+	struct mlxcpld_hotplug_device *psu;
+	u8 top_aggr_pwr_mask;
+	u16 pwr_reg_offset;
+	u8 pwr_mask;
+	u8 pwr_count;
+	struct mlxcpld_hotplug_device *pwr;
+	u8 top_aggr_fan_mask;
+	u16 fan_reg_offset;
+	u8 fan_mask;
+	u8 fan_count;
+	struct mlxcpld_hotplug_device *fan;
+};
+
+#endif /* __LINUX_PLATFORM_DATA_MLXCPLD_HOTPLUG_H */
-- 
cgit v1.2.3


From b440f1d90ec54fd2586537ea46e958343ad4b151 Mon Sep 17 00:00:00 2001
From: Tomas Novotny <tomas@novotny.cz>
Date: Tue, 11 Oct 2016 15:57:40 +0200
Subject: iio: dac: mcp4725: use regulator framework

Use a standard framework to get the reference voltage. It is done that way
in the iio subsystem and it will simplify extending of the driver.

Structure mcp4725_platform_data is left undeleted because it used in the
next patch.

This change breaks the current users of the driver, but there is no
mainline user of struct mcp4725_platform_data.

Signed-off-by: Tomas Novotny <tomas@novotny.cz>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/dac/mcp4725.c       | 46 +++++++++++++++++++++++++++++++++--------
 include/linux/iio/dac/mcp4725.h |  1 -
 2 files changed, 37 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
index cca935c06f2b..2b28b1f5b3a2 100644
--- a/drivers/iio/dac/mcp4725.c
+++ b/drivers/iio/dac/mcp4725.c
@@ -18,6 +18,7 @@
 #include <linux/i2c.h>
 #include <linux/err.h>
 #include <linux/delay.h>
+#include <linux/regulator/consumer.h>
 
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
@@ -28,10 +29,10 @@
 
 struct mcp4725_data {
 	struct i2c_client *client;
-	u16 vref_mv;
 	u16 dac_value;
 	bool powerdown;
 	unsigned powerdown_mode;
+	struct regulator *vdd_reg;
 };
 
 static int mcp4725_suspend(struct device *dev)
@@ -283,13 +284,18 @@ static int mcp4725_read_raw(struct iio_dev *indio_dev,
 			   int *val, int *val2, long mask)
 {
 	struct mcp4725_data *data = iio_priv(indio_dev);
+	int ret;
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
 		*val = data->dac_value;
 		return IIO_VAL_INT;
 	case IIO_CHAN_INFO_SCALE:
-		*val = data->vref_mv;
+		ret = regulator_get_voltage(data->vdd_reg);
+		if (ret < 0)
+			return ret;
+
+		*val = ret / 1000;
 		*val2 = 12;
 		return IIO_VAL_FRACTIONAL_LOG2;
 	}
@@ -328,12 +334,12 @@ static int mcp4725_probe(struct i2c_client *client,
 {
 	struct mcp4725_data *data;
 	struct iio_dev *indio_dev;
-	struct mcp4725_platform_data *platform_data = client->dev.platform_data;
+	struct mcp4725_platform_data *pdata = dev_get_platdata(&client->dev);
 	u8 inbuf[3];
 	u8 pd;
 	int err;
 
-	if (!platform_data || !platform_data->vref_mv) {
+	if (!pdata) {
 		dev_err(&client->dev, "invalid platform data");
 		return -EINVAL;
 	}
@@ -345,6 +351,14 @@ static int mcp4725_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, indio_dev);
 	data->client = client;
 
+	data->vdd_reg = devm_regulator_get(&client->dev, "vdd");
+	if (IS_ERR(data->vdd_reg))
+		return PTR_ERR(data->vdd_reg);
+
+	err = regulator_enable(data->vdd_reg);
+	if (err)
+		return err;
+
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->name = id->name;
 	indio_dev->info = &mcp4725_info;
@@ -352,25 +366,39 @@ static int mcp4725_probe(struct i2c_client *client,
 	indio_dev->num_channels = 1;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
-	data->vref_mv = platform_data->vref_mv;
-
 	/* read current DAC value */
 	err = i2c_master_recv(client, inbuf, 3);
 	if (err < 0) {
 		dev_err(&client->dev, "failed to read DAC value");
-		return err;
+		goto err_disable_vdd_reg;
 	}
 	pd = (inbuf[0] >> 1) & 0x3;
 	data->powerdown = pd > 0 ? true : false;
 	data->powerdown_mode = pd ? pd - 1 : 2; /* largest register to gnd */
 	data->dac_value = (inbuf[1] << 4) | (inbuf[2] >> 4);
 
-	return iio_device_register(indio_dev);
+	err = iio_device_register(indio_dev);
+	if (err)
+		goto err_disable_vdd_reg;
+
+	return 0;
+
+
+err_disable_vdd_reg:
+	regulator_disable(data->vdd_reg);
+
+	return err;
 }
 
 static int mcp4725_remove(struct i2c_client *client)
 {
-	iio_device_unregister(i2c_get_clientdata(client));
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct mcp4725_data *data = iio_priv(indio_dev);
+
+	iio_device_unregister(indio_dev);
+
+	regulator_disable(data->vdd_reg);
+
 	return 0;
 }
 
diff --git a/include/linux/iio/dac/mcp4725.h b/include/linux/iio/dac/mcp4725.h
index 91530e6611e9..7c062e8d2a48 100644
--- a/include/linux/iio/dac/mcp4725.h
+++ b/include/linux/iio/dac/mcp4725.h
@@ -10,7 +10,6 @@
 #define IIO_DAC_MCP4725_H_
 
 struct mcp4725_platform_data {
-	u16 vref_mv;
 };
 
 #endif /* IIO_DAC_MCP4725_H_ */
-- 
cgit v1.2.3


From 29157c6d601db8cb9f3bea93fc933b73db3bf869 Mon Sep 17 00:00:00 2001
From: Tomas Novotny <tomas@novotny.cz>
Date: Tue, 18 Oct 2016 19:43:08 +0200
Subject: iio: dac: mcp4725: support voltage reference selection

MCP47x6 chip supports selection of a voltage reference (VDD, VREF buffered
or unbuffered). MCP4725 doesn't have this feature thus the eventual setting
is ignored and user is warned.

The setting is stored only in the volatile memory of the chip. You need to
manually store it to the EEPROM of the chip via 'store_eeprom' sysfs entry.

Signed-off-by: Tomas Novotny <tomas@novotny.cz>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/dac/mcp4725.c       | 99 ++++++++++++++++++++++++++++++++++++++---
 include/linux/iio/dac/mcp4725.h | 11 +++++
 2 files changed, 103 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
index 5b2dfa0a0d2c..1e9d8f387e00 100644
--- a/drivers/iio/dac/mcp4725.c
+++ b/drivers/iio/dac/mcp4725.c
@@ -27,12 +27,20 @@
 
 #define MCP4725_DRV_NAME "mcp4725"
 
+#define MCP472X_REF_VDD			0x00
+#define MCP472X_REF_VREF_UNBUFFERED	0x02
+#define MCP472X_REF_VREF_BUFFERED	0x03
+
 struct mcp4725_data {
 	struct i2c_client *client;
+	int id;
+	unsigned ref_mode;
+	bool vref_buffered;
 	u16 dac_value;
 	bool powerdown;
 	unsigned powerdown_mode;
 	struct regulator *vdd_reg;
+	struct regulator *vref_reg;
 };
 
 static int mcp4725_suspend(struct device *dev)
@@ -87,6 +95,7 @@ static ssize_t mcp4725_store_eeprom(struct device *dev,
 		return 0;
 
 	inoutbuf[0] = 0x60; /* write EEPROM */
+	inoutbuf[0] |= data->ref_mode << 3;
 	inoutbuf[1] = data->dac_value >> 4;
 	inoutbuf[2] = (data->dac_value & 0xf) << 4;
 
@@ -279,6 +288,28 @@ static int mcp4725_set_value(struct iio_dev *indio_dev, int val)
 		return 0;
 }
 
+static int mcp4726_set_cfg(struct iio_dev *indio_dev)
+{
+	struct mcp4725_data *data = iio_priv(indio_dev);
+	u8 outbuf[3];
+	int ret;
+
+	outbuf[0] = 0x40;
+	outbuf[0] |= data->ref_mode << 3;
+	if (data->powerdown)
+		outbuf[0] |= data->powerdown << 1;
+	outbuf[1] = data->dac_value >> 4;
+	outbuf[2] = (data->dac_value & 0xf) << 4;
+
+	ret = i2c_master_send(data->client, outbuf, 3);
+	if (ret < 0)
+		return ret;
+	else if (ret != 3)
+		return -EIO;
+	else
+		return 0;
+}
+
 static int mcp4725_read_raw(struct iio_dev *indio_dev,
 			   struct iio_chan_spec const *chan,
 			   int *val, int *val2, long mask)
@@ -291,7 +322,11 @@ static int mcp4725_read_raw(struct iio_dev *indio_dev,
 		*val = data->dac_value;
 		return IIO_VAL_INT;
 	case IIO_CHAN_INFO_SCALE:
-		ret = regulator_get_voltage(data->vdd_reg);
+		if (data->ref_mode == MCP472X_REF_VDD)
+			ret = regulator_get_voltage(data->vdd_reg);
+		else
+			ret = regulator_get_voltage(data->vref_reg);
+
 		if (ret < 0)
 			return ret;
 
@@ -335,8 +370,9 @@ static int mcp4725_probe(struct i2c_client *client,
 	struct mcp4725_data *data;
 	struct iio_dev *indio_dev;
 	struct mcp4725_platform_data *pdata = dev_get_platdata(&client->dev);
-	u8 inbuf[3];
+	u8 inbuf[4];
 	u8 pd;
+	u8 ref;
 	int err;
 
 	if (!pdata) {
@@ -350,6 +386,26 @@ static int mcp4725_probe(struct i2c_client *client,
 	data = iio_priv(indio_dev);
 	i2c_set_clientdata(client, indio_dev);
 	data->client = client;
+	data->id = id->driver_data;
+
+	if (data->id == MCP4725 && pdata->use_vref) {
+		dev_err(&client->dev,
+			"external reference is unavailable on MCP4725");
+		return -EINVAL;
+	}
+
+	if (!pdata->use_vref && pdata->vref_buffered) {
+		dev_err(&client->dev,
+			"buffering is unavailable on the internal reference");
+		return -EINVAL;
+	}
+
+	if (!pdata->use_vref)
+		data->ref_mode = MCP472X_REF_VDD;
+	else
+		data->ref_mode = pdata->vref_buffered ?
+			MCP472X_REF_VREF_BUFFERED :
+			MCP472X_REF_VREF_UNBUFFERED;
 
 	data->vdd_reg = devm_regulator_get(&client->dev, "vdd");
 	if (IS_ERR(data->vdd_reg))
@@ -359,6 +415,18 @@ static int mcp4725_probe(struct i2c_client *client,
 	if (err)
 		return err;
 
+	if (pdata->use_vref) {
+		data->vref_reg = devm_regulator_get(&client->dev, "vref");
+		if (IS_ERR(data->vref_reg)) {
+			err = PTR_ERR(data->vdd_reg);
+			goto err_disable_vdd_reg;
+		}
+
+		err = regulator_enable(data->vref_reg);
+		if (err)
+			goto err_disable_vdd_reg;
+	}
+
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->name = id->name;
 	indio_dev->info = &mcp4725_info;
@@ -366,23 +434,38 @@ static int mcp4725_probe(struct i2c_client *client,
 	indio_dev->num_channels = 1;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
-	/* read current DAC value */
-	err = i2c_master_recv(client, inbuf, 3);
+	/* read current DAC value and settings */
+	err = i2c_master_recv(client, inbuf, data->id == MCP4725 ? 3 : 4);
+
 	if (err < 0) {
 		dev_err(&client->dev, "failed to read DAC value");
-		goto err_disable_vdd_reg;
+		goto err_disable_vref_reg;
 	}
 	pd = (inbuf[0] >> 1) & 0x3;
 	data->powerdown = pd > 0 ? true : false;
 	data->powerdown_mode = pd ? pd - 1 : 2; /* largest resistor to gnd */
 	data->dac_value = (inbuf[1] << 4) | (inbuf[2] >> 4);
-
+	if (data->id == MCP4726)
+		ref = (inbuf[3] >> 3) & 0x3;
+
+	if (data->id == MCP4726 && ref != data->ref_mode) {
+		dev_info(&client->dev,
+			"voltage reference mode differs (conf: %u, eeprom: %u), setting %u",
+			data->ref_mode, ref, data->ref_mode);
+		err = mcp4726_set_cfg(indio_dev);
+		if (err < 0)
+			goto err_disable_vref_reg;
+	}
+ 
 	err = iio_device_register(indio_dev);
 	if (err)
-		goto err_disable_vdd_reg;
+		goto err_disable_vref_reg;
 
 	return 0;
 
+err_disable_vref_reg:
+	if (data->vref_reg)
+		regulator_disable(data->vref_reg);
 
 err_disable_vdd_reg:
 	regulator_disable(data->vdd_reg);
@@ -397,6 +480,8 @@ static int mcp4725_remove(struct i2c_client *client)
 
 	iio_device_unregister(indio_dev);
 
+	if (data->vref_reg)
+		regulator_disable(data->vref_reg);
 	regulator_disable(data->vdd_reg);
 
 	return 0;
diff --git a/include/linux/iio/dac/mcp4725.h b/include/linux/iio/dac/mcp4725.h
index 7c062e8d2a48..628b2cf54c50 100644
--- a/include/linux/iio/dac/mcp4725.h
+++ b/include/linux/iio/dac/mcp4725.h
@@ -9,7 +9,18 @@
 #ifndef IIO_DAC_MCP4725_H_
 #define IIO_DAC_MCP4725_H_
 
+/**
+ * struct mcp4725_platform_data - MCP4725/6 DAC specific data.
+ * @use_vref: Whether an external reference voltage on Vref pin should be used.
+ *            Additional vref-supply must be specified when used.
+ * @vref_buffered: Controls buffering of the external reference voltage.
+ *
+ * Vref related settings are available only on MCP4756. See
+ * Documentation/devicetree/bindings/iio/dac/mcp4725.txt for more information.
+ */
 struct mcp4725_platform_data {
+	bool use_vref;
+	bool vref_buffered;
 };
 
 #endif /* IIO_DAC_MCP4725_H_ */
-- 
cgit v1.2.3


From 7b889adbac8db4220e07c93fff0b0b235d08496b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <sthemmin@microsoft.com>
Date: Sun, 23 Oct 2016 09:30:44 -0700
Subject: doc: add missing docbook parameter for fence-array

Fixes 'make htmldocs' warning.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161023093044.324edfb6@xeon-e3
---
 include/linux/fence-array.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fence-array.h b/include/linux/fence-array.h
index a44794e508df..9ea2bde10ac1 100644
--- a/include/linux/fence-array.h
+++ b/include/linux/fence-array.h
@@ -53,6 +53,7 @@ extern const struct fence_ops fence_array_ops;
 
 /**
  * fence_is_array - check if a fence is from the array subsclass
+ * @fence: fence to test
  *
  * Return true if it is a fence_array and false otherwise.
  */
-- 
cgit v1.2.3


From 8c27ceff3604b249a9efafbd1bd8b141b79e619d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 18 Oct 2016 10:12:27 -0200
Subject: docs: fix locations of several documents that got moved

The previous patch renamed several files that are cross-referenced
along the Kernel documentation. Adjust the links to point to
the right places.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/00-INDEX                             | 54 +++++++++++-----------
 Documentation/ABI/README                           |  2 +-
 Documentation/ABI/testing/sysfs-kernel-slab        |  2 +-
 Documentation/DocBook/kernel-hacking.tmpl          |  4 +-
 Documentation/acpi/video_extension.txt             |  2 +-
 Documentation/admin-guide/README.rst               | 13 +++---
 Documentation/admin-guide/bad-memory.rst           |  2 +-
 Documentation/admin-guide/binfmt-misc.rst          |  4 +-
 Documentation/admin-guide/braille-console.rst      |  6 +--
 Documentation/admin-guide/bug-hunting.rst          |  7 +--
 Documentation/admin-guide/devices.rst              |  2 +-
 Documentation/admin-guide/kernel-parameters.rst    |  6 +--
 Documentation/admin-guide/oops-tracing.rst         |  2 +-
 Documentation/admin-guide/ramoops.rst              |  2 +-
 Documentation/admin-guide/reporting-bugs.rst       |  6 +--
 Documentation/admin-guide/security-bugs.rst        |  2 +-
 Documentation/admin-guide/unicode.rst              |  2 +-
 Documentation/arm/Booting                          |  2 +-
 Documentation/atomic_ops.txt                       |  2 +-
 Documentation/blockdev/ramdisk.txt                 |  2 +-
 Documentation/cgroup-v1/00-INDEX                   |  2 +-
 .../devicetree/bindings/rtc/maxim,ds3231.txt       |  2 +-
 Documentation/devicetree/bindings/rtc/pcf8563.txt  |  2 +-
 .../devicetree/bindings/submitting-patches.txt     |  2 +-
 Documentation/filesystems/locks.txt                |  2 +-
 Documentation/filesystems/nfs/nfsroot.txt          |  4 +-
 Documentation/frv/booting.txt                      |  2 +-
 Documentation/hwmon/submitting-patches             |  8 ++--
 Documentation/isdn/README                          |  2 +-
 Documentation/ja_JP/HOWTO                          | 24 +++++-----
 Documentation/ja_JP/SubmitChecklist                |  8 ++--
 Documentation/ja_JP/SubmittingPatches              | 18 ++++----
 Documentation/ja_JP/stable_api_nonsense.txt        |  4 +-
 Documentation/ja_JP/stable_kernel_rules.txt        |  6 +--
 Documentation/kernel-per-CPU-kthreads.txt          |  2 +-
 Documentation/ko_KR/HOWTO                          | 30 ++++++------
 Documentation/ko_KR/stable_api_nonsense.txt        |  4 +-
 Documentation/lockup-watchdogs.txt                 |  4 +-
 Documentation/m68k/kernel-options.txt              |  2 +-
 Documentation/media/uapi/v4l/diff-v4l.rst          |  4 +-
 Documentation/media/v4l-drivers/bttv.rst           |  4 +-
 Documentation/memory-hotplug.txt                   |  2 +-
 Documentation/networking/netconsole.txt            |  2 +-
 Documentation/networking/netdev-FAQ.txt            |  8 ++--
 Documentation/networking/vortex.txt                |  2 +-
 Documentation/power/00-INDEX                       |  2 +-
 Documentation/power/pci.txt                        | 10 ++--
 Documentation/power/runtime_pm.txt                 |  2 +-
 Documentation/power/swsusp-dmcrypt.txt             |  2 +-
 Documentation/process/4.Coding.rst                 |  4 +-
 Documentation/process/5.Posting.rst                | 12 ++---
 Documentation/process/8.Conclusion.rst             |  6 +--
 Documentation/process/adding-syscalls.rst          |  2 +-
 Documentation/process/coding-style.rst             |  2 +-
 Documentation/process/howto.rst                    | 24 +++++-----
 Documentation/process/management-style.rst         |  2 +-
 Documentation/process/stable-kernel-rules.rst      |  4 +-
 Documentation/process/submit-checklist.rst         |  6 +--
 Documentation/process/submitting-drivers.rst       |  8 ++--
 Documentation/process/submitting-patches.rst       | 14 +++---
 Documentation/rfkill.txt                           |  2 +-
 Documentation/scsi/scsi-parameters.txt             |  2 +-
 Documentation/scsi/scsi_mid_low_api.txt            |  2 +-
 Documentation/scsi/sym53c8xx_2.txt                 |  2 +-
 Documentation/sound/alsa/alsa-parameters.txt       |  2 +-
 Documentation/sound/oss/oss-parameters.txt         |  2 +-
 Documentation/sysctl/kernel.txt                    |  4 +-
 Documentation/virtual/kvm/review-checklist.txt     |  4 +-
 Documentation/vm/numa                              |  2 +-
 .../watchdog/convert_drivers_to_kernel_api.txt     |  2 +-
 Documentation/watchdog/watchdog-parameters.txt     |  2 +-
 Documentation/x86/boot.txt                         |  2 +-
 Documentation/zh_CN/CodingStyle                    |  6 +--
 Documentation/zh_CN/HOWTO                          | 30 ++++++------
 Documentation/zh_CN/SecurityBugs                   |  6 +--
 Documentation/zh_CN/SubmittingDrivers              | 12 ++---
 Documentation/zh_CN/SubmittingPatches              | 14 +++---
 Documentation/zh_CN/arm/Booting                    |  2 +-
 Documentation/zh_CN/email-clients.txt              |  4 +-
 Documentation/zh_CN/oops-tracing.txt               |  6 +--
 Documentation/zh_CN/stable_api_nonsense.txt        |  4 +-
 Documentation/zh_CN/stable_kernel_rules.txt        |  6 +--
 .../zh_CN/volatile-considered-harmful.txt          |  4 +-
 MAINTAINERS                                        | 10 ++--
 arch/x86/Kconfig                                   |  2 +-
 drivers/acpi/Kconfig                               |  2 +-
 drivers/ata/libata-core.c                          |  2 +-
 drivers/char/pcmcia/cm4000_cs.c                    |  4 +-
 drivers/net/can/grcan.c                            |  2 +-
 drivers/nvdimm/Kconfig                             |  2 +-
 drivers/staging/vme/devices/vme_user.c             |  2 +-
 drivers/video/fbdev/skeletonfb.c                   |  8 ++--
 drivers/virtio/Kconfig                             |  2 +-
 fs/Kconfig.binfmt                                  |  4 +-
 fs/pstore/Kconfig                                  |  2 +-
 include/linux/device.h                             |  2 +-
 include/linux/pm.h                                 |  2 +-
 include/uapi/linux/major.h                         |  2 +-
 init/Kconfig                                       |  2 +-
 init/main.c                                        |  2 +-
 lib/Kconfig.debug                                  |  2 +-
 scripts/checkpatch.pl                              |  6 +--
 tools/testing/selftests/futex/README               |  2 +-
 103 files changed, 280 insertions(+), 278 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index d07575a8499e..39caa6544d1f 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -15,11 +15,11 @@ Following translations are available on the WWW:
 ABI/
 	- info on kernel <-> userspace ABI and relative interface stability.
 
-BUG-HUNTING
+admin-guide/bug-hunting.rst
 	- brute force method of doing binary search of patches to find bug.
-Changes
+process/changes.rst
 	- list of changes that break older software packages.
-CodingStyle
+process/coding-style.rst
 	- how the maintainers expect the C code in the kernel to look.
 DMA-API.txt
 	- DMA API, pci_ API & extensions for non-consistent memory machines.
@@ -33,7 +33,7 @@ DocBook/
 	- directory with DocBook templates etc. for kernel documentation.
 EDID/
 	- directory with info on customizing EDID for broken gfx/displays.
-HOWTO
+process/howto.rst
 	- the process and procedures of how to do Linux kernel development.
 IPMI.txt
 	- info on Linux Intelligent Platform Management Interface (IPMI) Driver.
@@ -48,7 +48,7 @@ Intel-IOMMU.txt
 Makefile
 	- This file does nothing. Removing it breaks make htmldocs and
 	  make distclean.
-ManagementStyle
+process/management-style.rst
 	- how to (attempt to) manage kernel hackers.
 RCU/
 	- directory with info on RCU (read-copy update).
@@ -56,13 +56,13 @@ SAK.txt
 	- info on Secure Attention Keys.
 SM501.txt
 	- Silicon Motion SM501 multimedia companion chip
-SecurityBugs
+admin-guide/security-bugs.rst
 	- procedure for reporting security bugs found in the kernel.
-SubmitChecklist
+process/submit-checklist.rst
 	- Linux kernel patch submission checklist.
-SubmittingDrivers
+process/submitting-drivers.rst
 	- procedure to get a new driver source included into the kernel tree.
-SubmittingPatches
+process/submitting-patches.rst
 	- procedure to get a source patch included into the kernel tree.
 VGA-softcursor.txt
 	- how to change your VGA cursor from a blinking underscore.
@@ -72,7 +72,7 @@ acpi/
 	- info on ACPI-specific hooks in the kernel.
 aoe/
 	- description of AoE (ATA over Ethernet) along with config examples.
-applying-patches.txt
+process/applying-patches.rst
 	- description of various trees and how to apply their patches.
 arm/
 	- directory with info about Linux on the ARM architecture.
@@ -86,7 +86,7 @@ auxdisplay/
 	- misc. LCD driver documentation (cfag12864b, ks0108).
 backlight/
 	- directory with info on controlling backlights in flat panel displays
-bad_memory.txt
+admin-guide/bad-memory.rst
 	- how to use kernel parameters to exclude bad RAM regions.
 basic_profiling.txt
 	- basic instructions for those who wants to profile Linux kernel.
@@ -154,7 +154,7 @@ process/
 	- how to work with the mainline kernel development process.
 device-mapper/
 	- directory with info on Device Mapper.
-devices.txt
+admin-guide/devices.rst
 	- plain ASCII listing of all the nodes in /dev/ with major minor #'s.
 devicetree/
 	- directory with info on device tree files used by OF/PowerPC/ARM
@@ -178,7 +178,7 @@ efi-stub.txt
 	- How to use the EFI boot stub to bypass GRUB or elilo on EFI systems.
 eisa.txt
 	- info on EISA bus support.
-email-clients.txt
+process/email-clients.rst
 	- info on how to use e-mail to send un-mangled (git) patches.
 extcon/
 	- directory with porting guide for Android kernel switch driver.
@@ -226,9 +226,9 @@ ia64/
 	- directory with info about Linux on Intel 64 bit architecture.
 infiniband/
 	- directory with documents concerning Linux InfiniBand support.
-init.txt
+admin-guide/init.rst
 	- what to do when the kernel can't find the 1st process to run.
-initrd.txt
+admin-guide/initrd.rst
 	- how to use the RAM disk as an initial/temporary root filesystem.
 input/
 	- info on Linux input device support.
@@ -248,7 +248,7 @@ isapnp.txt
 	- info on Linux ISA Plug & Play support.
 isdn/
 	- directory with info on the Linux ISDN support, and supported cards.
-java.txt
+admin-guide/java.rst
 	- info on the in-kernel binary support for Java(tm).
 ja_JP/
 	- directory with Japanese translations of various documents
@@ -256,11 +256,11 @@ kbuild/
 	- directory with info about the kernel build process.
 kdump/
 	- directory with mini HowTo on getting the crash dump code to work.
-kernel-docs.txt
+process/kernel-docs.rst
 	- listing of various WWW + books that document kernel internals.
 kernel-documentation.rst
 	- how to write and format reStructuredText kernel documentation
-kernel-parameters.txt
+admin-guide/kernel-parameters.rst
 	- summary listing of command line / boot prompt args for the kernel.
 kernel-per-CPU-kthreads.txt
 	- List of all per-CPU kthreads and how they introduce jitter.
@@ -302,7 +302,7 @@ magic-number.txt
 	- list of magic numbers used to mark/protect kernel data structures.
 mailbox.txt
 	- How to write drivers for the common mailbox framework (IPC).
-md.txt
+admin-guide/md.rst
 	- info on boot arguments for the multiple devices driver.
 media-framework.txt
 	- info on media framework, its data structures, functions and usage.
@@ -326,7 +326,7 @@ module-signing.txt
 	- Kernel module signing for increased security when loading modules.
 mtd/
 	- directory with info about memory technology devices (flash)
-mono.txt
+admin-guide/mono.rst
 	- how to execute Mono-based .NET binaries with the help of BINFMT_MISC.
 namespaces/
 	- directory with various information about namespaces
@@ -340,7 +340,7 @@ nommu-mmap.txt
 	- documentation about no-mmu memory mapping support.
 numastat.txt
 	- info on how to read Numa policy hit/miss statistics in sysfs.
-oops-tracing.txt
+admin-guide/oops-tracing.rst
 	- how to decode those nasty internal kernel error dump messages.
 padata.txt
 	- An introduction to the "padata" parallel execution API
@@ -378,7 +378,7 @@ ptp/
 	- directory with info on support for IEEE 1588 PTP clocks in Linux.
 pwm.txt
 	- info on the pulse width modulation driver subsystem
-ramoops.txt
+admin-guide/ramoops.rst
 	- documentation of the ramoops oops/panic logging module.
 rapidio/
 	- directory with info on RapidIO packet-based fabric interconnect
@@ -406,7 +406,7 @@ security/
 	- directory that contains security-related info
 serial/
 	- directory with info on the low level serial API.
-serial-console.txt
+admin-guide/serial-console.rst
 	- how to set up Linux with a serial line console as the default.
 sgi-ioc4.txt
 	- description of the SGI IOC4 PCI (multi function) device.
@@ -420,9 +420,9 @@ sparse.txt
 	- info on how to obtain and use the sparse tool for typechecking.
 spi/
 	- overview of Linux kernel Serial Peripheral Interface (SPI) support.
-stable_api_nonsense.txt
+process/stable-api-nonsense.rst
 	- info on why the kernel does not have a stable in-kernel api or abi.
-stable_kernel_rules.txt
+process/stable-kernel-rules.rst
 	- rules and procedures for the -stable kernel releases.
 static-keys.txt
 	- info on how static keys allow debug code in hotpaths via patching
@@ -444,7 +444,7 @@ trace/
 	- directory with info on tracing technologies within linux
 unaligned-memory-access.txt
 	- info on how to avoid arch breaking unaligned memory access in code.
-unicode.txt
+admin-guide/unicode.rst
 	- info on the Unicode character/font mapping used in Linux.
 unshare.txt
 	- description of the Linux unshare system call.
@@ -466,7 +466,7 @@ vm/
 	- directory with info on the Linux vm code.
 vme_api.txt
 	- file relating info on the VME bus API in linux
-volatile-considered-harmful.txt
+process/volatile-considered-harmful.rst
 	- Why the "volatile" type class should not be used
 w1/
 	- directory with documents regarding the 1-wire (w1) subsystem.
diff --git a/Documentation/ABI/README b/Documentation/ABI/README
index 1fafc4b0753b..3121029dce21 100644
--- a/Documentation/ABI/README
+++ b/Documentation/ABI/README
@@ -84,4 +84,4 @@ stable:
 
 - Kernel-internal symbols.  Do not rely on the presence, absence, location, or
   type of any kernel symbol, either in System.map files or the kernel binary
-  itself.  See Documentation/stable_api_nonsense.txt.
+  itself.  See Documentation/process/stable-api-nonsense.rst.
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index 91bd6ca5440f..2cc0a72b64be 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -347,7 +347,7 @@ Description:
 		because of fragmentation, SLUB will retry with the minimum order
 		possible depending on its characteristics.
 		When debug_guardpage_minorder=N (N > 0) parameter is specified
-		(see Documentation/kernel-parameters.txt), the minimum possible
+		(see Documentation/admin-guide/kernel-parameters.rst), the minimum possible
 		order is used and this sysfs entry can not be used to change
 		the order at run time.
 
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index 2a272275c81b..da5c087462b1 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -1208,8 +1208,8 @@ static struct block_device_operations opt_fops = {
    
    <listitem>
     <para>
-     Finally, don't forget to read <filename>Documentation/SubmittingPatches</filename>
-     and possibly <filename>Documentation/SubmittingDrivers</filename>.
+     Finally, don't forget to read <filename>Documentation/process/submitting-patches.rst</filename>
+     and possibly <filename>Documentation/process/submitting-drivers.rst</filename>.
     </para>
    </listitem>
   </itemizedlist>
diff --git a/Documentation/acpi/video_extension.txt b/Documentation/acpi/video_extension.txt
index 78b32ac02466..79bf6a4921be 100644
--- a/Documentation/acpi/video_extension.txt
+++ b/Documentation/acpi/video_extension.txt
@@ -101,6 +101,6 @@ received a notification, it will set the backlight level accordingly. This does
 not affect the sending of event to user space, they are always sent to user
 space regardless of whether or not the video module controls the backlight level
 directly. This behaviour can be controlled through the brightness_switch_enabled
-module parameter as documented in kernel-parameters.txt. It is recommended to
+module parameter as documented in admin-guide/kernel-parameters.rst. It is recommended to
 disable this behaviour once a GUI environment starts up and wants to have full
 control of the backlight level.
diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst
index 05aad8543340..1b6dfb2b3adb 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -50,7 +50,8 @@ Documentation
  - There are various README files in the Documentation/ subdirectory:
    these typically contain kernel-specific installation notes for some
    drivers for example. See Documentation/00-INDEX for a list of what
-   is contained in each file.  Please read the Changes file, as it
+   is contained in each file.  Please read the
+   :ref:`Documentation/process/changes.rst <changes>` file, as it
    contains information about the problems, which may result by upgrading
    your kernel.
 
@@ -96,7 +97,7 @@ Installing the kernel source
    and 4.0.2 patches. Similarly, if you are running kernel version 4.0.2 and
    want to jump to 4.0.3, you must first reverse the 4.0.2 patch (that is,
    patch -R) **before** applying the 4.0.3 patch. You can read more on this in
-   :ref:`Documentation/applying-patches.txt <applying_patches>`.
+   :ref:`Documentation/process/applying-patches.rst <applying_patches>`.
 
    Alternatively, the script patch-kernel can be used to automate this
    process.  It determines the current kernel version and applies any
@@ -120,7 +121,7 @@ Software requirements
 
    Compiling and running the 4.x kernels requires up-to-date
    versions of various software packages.  Consult
-   :ref:`Documentation/Changes <changes>` for the minimum version numbers
+   :ref:`Documentation/process/changes.rst <changes>` for the minimum version numbers
    required and how to get updates for these packages.  Beware that using
    excessively old versions of these packages can cause indirect
    errors that are very difficult to track down, so don't assume that
@@ -254,7 +255,7 @@ Compiling the kernel
 --------------------
 
  - Make sure you have at least gcc 3.2 available.
-   For more information, refer to :ref:`Documentation/Changes <changes>`.
+   For more information, refer to :ref:`Documentation/process/changes.rst <changes>`.
 
    Please note that you can still run a.out user programs with this kernel.
 
@@ -355,7 +356,7 @@ If something goes wrong
    help debugging the problem.  The text above the dump is also
    important: it tells something about why the kernel dumped code (in
    the above example, it's due to a bad kernel pointer). More information
-   on making sense of the dump is in Documentation/oops-tracing.txt
+   on making sense of the dump is in Documentation/admin-guide/oops-tracing.rst
 
  - If you compiled the kernel with CONFIG_KALLSYMS you can send the dump
    as is, otherwise you will have to use the ``ksymoops`` program to make
@@ -393,7 +394,7 @@ If something goes wrong
 
    If you for some reason cannot do the above (you have a pre-compiled
    kernel image or similar), telling me as much about your setup as
-   possible will help.  Please read the :ref:`REPORTING-BUGS <reportingbugs>`
+   possible will help.  Please read the :ref:`admin-guide/reporting-bugs.rst <reportingbugs>`
    document for details.
 
  - Alternatively, you can use gdb on a running kernel. (read-only; i.e. you
diff --git a/Documentation/admin-guide/bad-memory.rst b/Documentation/admin-guide/bad-memory.rst
index 017fc86430c3..a5c0e25e496f 100644
--- a/Documentation/admin-guide/bad-memory.rst
+++ b/Documentation/admin-guide/bad-memory.rst
@@ -33,7 +33,7 @@ memmap is already in the kernel and usable as kernel-parameter at
 boot-time.  Its syntax is slightly strange and you may need to
 calculate the values by yourself!
 
-Syntax to exclude a memory area (see kernel-parameters.txt for details)::
+Syntax to exclude a memory area (see admin-guide/kernel-parameters.rst for details)::
 
 	memmap=<size>$<address>
 
diff --git a/Documentation/admin-guide/binfmt-misc.rst b/Documentation/admin-guide/binfmt-misc.rst
index 9c5ff8f260bf..97b0d7927078 100644
--- a/Documentation/admin-guide/binfmt-misc.rst
+++ b/Documentation/admin-guide/binfmt-misc.rst
@@ -124,7 +124,7 @@ A few examples (assumed you are in ``/proc/sys/fs/binfmt_misc``):
 
     echo ':DOSWin:M::MZ::/usr/local/bin/wine:' > register
 
-For java support see Documentation/java.txt
+For java support see Documentation/admin-guide/java.rst
 
 
 You can enable/disable binfmt_misc or one binary type by echoing 0 (to disable)
@@ -140,7 +140,7 @@ Hints
 -----
 
 If you want to pass special arguments to your interpreter, you can
-write a wrapper script for it. See Documentation/java.txt for an
+write a wrapper script for it. See Documentation/admin-guide/java.rst for an
 example.
 
 Your interpreter should NOT look in the PATH for the filename; the kernel
diff --git a/Documentation/admin-guide/braille-console.rst b/Documentation/admin-guide/braille-console.rst
index fa3702dc04ab..18e79337dcfd 100644
--- a/Documentation/admin-guide/braille-console.rst
+++ b/Documentation/admin-guide/braille-console.rst
@@ -3,7 +3,7 @@ Linux Braille Console
 
 To get early boot messages on a braille device (before userspace screen
 readers can start), you first need to compile the support for the usual serial
-console (see :ref:`Documentation/serial-console.txt <serial_console>`), and
+console (see :ref:`Documentation/admin-guide/serial-console.rst <serial_console>`), and
 for braille device
 (in :menuselection:`Device Drivers --> Accessibility support --> Console on braille device`).
 
@@ -13,7 +13,7 @@ format is::
 	console=brl,serial_options...
 
 where ``serial_options...`` are the same as described in
-:ref:`Documentation/serial-console.txt <serial_console>`.
+:ref:`Documentation/admin-guide/serial-console.rst <serial_console>`.
 
 So for instance you can use ``console=brl,ttyS0`` if the braille device is connected to the first serial port, and ``console=brl,ttyS0,115200`` to
 override the baud rate to 115200, etc.
@@ -31,7 +31,7 @@ parameter.
 For simplicity, only one braille console can be enabled, other uses of
 ``console=brl,...`` will be discarded.  Also note that it does not interfere with
 the console selection mechanism described in
-:ref:`Documentation/serial-console.txt <serial_console>`.
+:ref:`Documentation/admin-guide/serial-console.rst <serial_console>`.
 
 For now, only the VisioBraille device is supported.
 
diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index a8ef794aadae..d35dd9fd1af0 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -15,7 +15,7 @@ give up. Report as much as you have found to the relevant maintainer. See
 MAINTAINERS for who that is for the subsystem you have worked on.
 
 Before you submit a bug report read
-:ref:`Documentation/REPORTING-BUGS <reportingbugs>`.
+:ref:`Documentation/admin-guide/reporting-bugs.rst <reportingbugs>`.
 
 Devices not appearing
 =====================
@@ -244,5 +244,6 @@ Once you have worked out a fix please submit it upstream. After all open
 source is about sharing what you do and don't you want to be recognised for
 your genius?
 
-Please do read :ref:`Documentation/SubmittingPatches <submittingpatches>`
-though to help your code get accepted.
+Please do read
+ref:`Documentation/process/submitting-patches.rst <submittingpatches>` though
+to help your code get accepted.
diff --git a/Documentation/admin-guide/devices.rst b/Documentation/admin-guide/devices.rst
index b29555041531..89db341fba7a 100644
--- a/Documentation/admin-guide/devices.rst
+++ b/Documentation/admin-guide/devices.rst
@@ -10,7 +10,7 @@ The LaTeX version of this document is no longer maintained, nor is
 the document that used to reside at lanana.org.  This version in the
 mainline Linux kernel is the master document.  Updates shall be sent
 as patches to the kernel maintainers (see the
-:ref:`Documentation/SubmittingPatches <submittingpatches>` document).
+:ref:`Documentation/process/submitting-patches.rst <submittingpatches>` document).
 Specifically explore the sections titled "CHAR and MISC DRIVERS", and
 "BLOCK LAYER" in the MAINTAINERS file to find the right maintainers
 to involve for character and block devices.
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index b0804273b6e3..d2f2725f032e 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -815,7 +815,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted::
 			bits, and "f" is flow control ("r" for RTS or
 			omit it).  Default is "9600n8".
 
-			See Documentation/serial-console.txt for more
+			See Documentation/admin-guide/serial-console.rst for more
 			information.  See
 			Documentation/networking/netconsole.txt for an
 			alternative.
@@ -2239,7 +2239,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted::
 	mce=option	[X86-64] See Documentation/x86/x86_64/boot-options.txt
 
 	md=		[HW] RAID subsystems devices and level
-			See Documentation/md.txt.
+			See Documentation/admin-guide/md.rst.
 
 	mdacon=		[MDA]
 			Format: <first>,<last>
@@ -3322,7 +3322,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted::
 	r128=		[HW,DRM]
 
 	raid=		[HW,RAID]
-			See Documentation/md.txt.
+			See Documentation/admin-guide/md.rst.
 
 	ramdisk_size=	[RAM] Sizes of RAM disks in kilobytes
 			See Documentation/blockdev/ramdisk.txt.
diff --git a/Documentation/admin-guide/oops-tracing.rst b/Documentation/admin-guide/oops-tracing.rst
index 3e25ea7349ee..13be8d7bcfe7 100644
--- a/Documentation/admin-guide/oops-tracing.rst
+++ b/Documentation/admin-guide/oops-tracing.rst
@@ -44,7 +44,7 @@ the disk is not available then you have three options :
     so won't help for 'early' oopses)
 
 (2) Boot with a serial console (see
-    :ref:`Documentation/serial-console.txt <serial_console>`),
+    :ref:`Documentation/admin-guide/serial-console.rst <serial_console>`),
     run a null modem to a second machine and capture the output there
     using your favourite communication program.  Minicom works well.
 
diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst
index 7eaf1e71c083..fe95c027e37c 100644
--- a/Documentation/admin-guide/ramoops.rst
+++ b/Documentation/admin-guide/ramoops.rst
@@ -61,7 +61,7 @@ Setting the ramoops parameters can be done in several different manners:
 	mem=128M ramoops.mem_address=0x8000000 ramoops.ecc=1
 
  B. Use Device Tree bindings, as described in
- ``Documentation/device-tree/bindings/reserved-memory/ramoops.txt``.
+ ``Documentation/device-tree/bindings/reserved-memory/admin-guide/ramoops.rst``.
  For example::
 
 	reserved-memory {
diff --git a/Documentation/admin-guide/reporting-bugs.rst b/Documentation/admin-guide/reporting-bugs.rst
index 05c53ac7fa76..0c0f2698ec5a 100644
--- a/Documentation/admin-guide/reporting-bugs.rst
+++ b/Documentation/admin-guide/reporting-bugs.rst
@@ -61,7 +61,7 @@ files to the get_maintainer.pl script::
 
 If it is a security bug, please copy the Security Contact listed in the
 MAINTAINERS file.  They can help coordinate bugfix and disclosure.  See
-:ref:`Documentation/SecurityBugs <securitybugs>` for more information.
+:ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>` for more information.
 
 If you can't figure out which subsystem caused the issue, you should file
 a bug in kernel.org bugzilla and send email to
@@ -94,7 +94,7 @@ step-by-step instructions for how a user can trigger the bug.
 
 If the failure includes an "OOPS:", take a picture of the screen, capture
 a netconsole trace, or type the message from your screen into the bug
-report.  Please read "Documentation/oops-tracing.txt" before posting your
+report.  Please read "Documentation/admin-guide/oops-tracing.rst" before posting your
 bug report. This explains what you should do with the "Oops" information
 to make it useful to the recipient.
 
@@ -120,7 +120,7 @@ summary from [1.]>" for easy identification by the developers::
   [4.2.] Kernel .config file:
   [5.] Most recent kernel version which did not have the bug:
   [6.] Output of Oops.. message (if applicable) with symbolic information
-       resolved (see Documentation/oops-tracing.txt)
+       resolved (see Documentation/admin-guide/oops-tracing.rst)
   [7.] A small shell script or example program which triggers the
        problem (if possible)
   [8.] Environment
diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/admin-guide/security-bugs.rst
index df795e22d08b..4f7414cad586 100644
--- a/Documentation/admin-guide/security-bugs.rst
+++ b/Documentation/admin-guide/security-bugs.rst
@@ -19,7 +19,7 @@ area maintainers to understand and fix the security vulnerability.
 
 As it is with any bug, the more information provided the easier it
 will be to diagnose and fix.  Please review the procedure outlined in
-REPORTING-BUGS if you are unclear about what information is helpful.
+admin-guide/reporting-bugs.rst if you are unclear about what information is helpful.
 Any exploit code is very helpful and will not be released without
 consent from the reporter unless it has already been made public.
 
diff --git a/Documentation/admin-guide/unicode.rst b/Documentation/admin-guide/unicode.rst
index 012e8e895842..4e5c3df9d55f 100644
--- a/Documentation/admin-guide/unicode.rst
+++ b/Documentation/admin-guide/unicode.rst
@@ -7,7 +7,7 @@ This file is maintained by H. Peter Anvin <unicode@lanana.org> as part
 of the Linux Assigned Names And Numbers Authority (LANANA) project.
 The current version can be found at:
 
-	    http://www.lanana.org/docs/unicode/unicode.txt
+	    http://www.lanana.org/docs/unicode/admin-guide/unicode.rst
 
 Introdution
 -----------
diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting
index 83c1df2fc758..259f00af3ab3 100644
--- a/Documentation/arm/Booting
+++ b/Documentation/arm/Booting
@@ -51,7 +51,7 @@ As an alternative, the boot loader can pass the relevant 'console='
 option to the kernel via the tagged lists specifying the port, and
 serial format options as described in
 
-       Documentation/kernel-parameters.txt.
+       Documentation/admin-guide/kernel-parameters.rst.
 
 
 3. Detect the machine type
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index c9d1cacb4395..7281bf939779 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -16,7 +16,7 @@ will fail.  Something like the following should suffice:
 	typedef struct { long counter; } atomic_long_t;
 
 Historically, counter has been declared volatile.  This is now discouraged.
-See Documentation/volatile-considered-harmful.txt for the complete rationale.
+See Documentation/process/volatile-considered-harmful.rst for the complete rationale.
 
 local_t is very similar to atomic_t. If the counter is per CPU and only
 updated by one CPU, local_t is probably more appropriate. Please see
diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.txt
index fe2ef978d85a..501e12e0323e 100644
--- a/Documentation/blockdev/ramdisk.txt
+++ b/Documentation/blockdev/ramdisk.txt
@@ -14,7 +14,7 @@ Contents:
 
 The RAM disk driver is a way to use main system memory as a block device.  It
 is required for initrd, an initial filesystem used if you need to load modules
-in order to access the root filesystem (see Documentation/initrd.txt).  It can
+in order to access the root filesystem (see Documentation/admin-guide/initrd.rst).  It can
 also be used for a temporary filesystem for crypto work, since the contents
 are erased on reboot.
 
diff --git a/Documentation/cgroup-v1/00-INDEX b/Documentation/cgroup-v1/00-INDEX
index 106885ad670d..13e0c85e7b35 100644
--- a/Documentation/cgroup-v1/00-INDEX
+++ b/Documentation/cgroup-v1/00-INDEX
@@ -8,7 +8,7 @@ cpuacct.txt
 	- CPU Accounting Controller; account CPU usage for groups of tasks.
 cpusets.txt
 	- documents the cpusets feature; assign CPUs and Mem to a set of tasks.
-devices.txt
+admin-guide/devices.rst
 	- Device Whitelist Controller; description, interface and security.
 freezer-subsystem.txt
 	- checkpointing; rationale to not use signals, interface.
diff --git a/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt b/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt
index ddef330d2709..1ad4c1c2b3b3 100644
--- a/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt
+++ b/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt
@@ -1,7 +1,7 @@
 * Maxim DS3231 Real Time Clock
 
 Required properties:
-see: Documentation/devicetree/bindings/i2c/trivial-devices.txt
+see: Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst
 
 Optional property:
 - #clock-cells: Should be 1.
diff --git a/Documentation/devicetree/bindings/rtc/pcf8563.txt b/Documentation/devicetree/bindings/rtc/pcf8563.txt
index 72f6d2c9665e..086c998c5561 100644
--- a/Documentation/devicetree/bindings/rtc/pcf8563.txt
+++ b/Documentation/devicetree/bindings/rtc/pcf8563.txt
@@ -3,7 +3,7 @@
 Philips PCF8563/Epson RTC8564 Real Time Clock
 
 Required properties:
-see: Documentation/devicetree/bindings/i2c/trivial-devices.txt
+see: Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst
 
 Optional property:
 - #clock-cells: Should be 0.
diff --git a/Documentation/devicetree/bindings/submitting-patches.txt b/Documentation/devicetree/bindings/submitting-patches.txt
index 7d44eae7ab0b..274058c583dd 100644
--- a/Documentation/devicetree/bindings/submitting-patches.txt
+++ b/Documentation/devicetree/bindings/submitting-patches.txt
@@ -3,7 +3,7 @@
 
 I. For patch submitters
 
-  0) Normal patch submission rules from Documentation/SubmittingPatches
+  0) Normal patch submission rules from Documentation/process/submitting-patches.rst
      applies.
 
   1) The Documentation/ portion of the patch should be a separate patch.
diff --git a/Documentation/filesystems/locks.txt b/Documentation/filesystems/locks.txt
index 2cf81082581d..5368690f412e 100644
--- a/Documentation/filesystems/locks.txt
+++ b/Documentation/filesystems/locks.txt
@@ -19,7 +19,7 @@ forever.
 
 This should not cause problems for anybody, since everybody using a
 2.1.x kernel should have updated their C library to a suitable version
-anyway (see the file "Documentation/Changes".)
+anyway (see the file "Documentation/process/changes.rst".)
 
 1.2 Allow Mixed Locks Again
 ---------------------------
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
index 0b2883b17d4c..5efae00f6c7f 100644
--- a/Documentation/filesystems/nfs/nfsroot.txt
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -11,7 +11,7 @@ Updated 2006 by Horms <horms@verge.net.au>
 In order to use a diskless system, such as an X-terminal or printer server
 for example, it is necessary for the root filesystem to be present on a
 non-disk device. This may be an initramfs (see Documentation/filesystems/
-ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt) or a
+ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/admin-guide/initrd.rst) or a
 filesystem mounted via NFS. The following text describes on how to use NFS
 for the root filesystem. For the rest of this text 'client' means the
 diskless system, and 'server' means the NFS server.
@@ -284,7 +284,7 @@ They depend on various facilities being available:
 	"kernel <relative-path-below /tftpboot>". The nfsroot parameters
 	are passed to the kernel by adding them to the "append" line.
 	It is common to use serial console in conjunction with pxeliunx,
-	see Documentation/serial-console.txt for more information.
+	see Documentation/admin-guide/serial-console.rst for more information.
 
 	For more information on isolinux, including how to create bootdisks
 	for prebuilt kernels, see http://syslinux.zytor.com/
diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt
index 9bdf4b46e741..cd9dc1dfb144 100644
--- a/Documentation/frv/booting.txt
+++ b/Documentation/frv/booting.txt
@@ -119,7 +119,7 @@ separated by spaces:
 	253:0		Device with major 253 and minor 0
 
       Authoritative information can be found in
-      "Documentation/kernel-parameters.txt".
+      "Documentation/admin-guide/kernel-parameters.rst".
 
   (*) rw
 
diff --git a/Documentation/hwmon/submitting-patches b/Documentation/hwmon/submitting-patches
index 57f60307accc..f88221b46153 100644
--- a/Documentation/hwmon/submitting-patches
+++ b/Documentation/hwmon/submitting-patches
@@ -10,10 +10,10 @@ increase the chances of your change being accepted.
 ----------
 
 * It should be unnecessary to mention, but please read and follow
-    Documentation/SubmitChecklist
-    Documentation/SubmittingDrivers
-    Documentation/SubmittingPatches
-    Documentation/CodingStyle
+    Documentation/process/submit-checklist.rst
+    Documentation/process/submitting-drivers.rst
+    Documentation/process/submitting-patches.rst
+    Documentation/process/coding-style.rst
 
 * Please run your patch through 'checkpatch --strict'. There should be no
   errors, no warnings, and few if any check messages. If there are any
diff --git a/Documentation/isdn/README b/Documentation/isdn/README
index cfb1884342ee..32d4e80c2c03 100644
--- a/Documentation/isdn/README
+++ b/Documentation/isdn/README
@@ -332,7 +332,7 @@ README for the ISDN-subsystem
 4. Device-inodes
 
    The major and minor numbers and their names are described in
-   Documentation/devices.txt. The major numbers are:
+   Documentation/admin-guide/devices.rst. The major numbers are:
 
      43 for the ISDN-tty's.
      44 for the ISDN-callout-tty's.
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
index 581c14bdd7be..b03fc8047f03 100644
--- a/Documentation/ja_JP/HOWTO
+++ b/Documentation/ja_JP/HOWTO
@@ -127,15 +127,15 @@ linux-api@ver.kernel.org に送ることを勧めます。
      小限のレベルで必要な数々のソフトウェアパッケージの一覧を示してい
      ます。
 
-  Documentation/CodingStyle
+  Documentation/process/coding-style.rst
     これは Linux カーネルのコーディングスタイルと背景にある理由を記述
     しています。全ての新しいコードはこのドキュメントにあるガイドライン
     に従っていることを期待されています。大部分のメンテナはこれらのルー
     ルに従っているものだけを受け付け、多くの人は正しいスタイルのコード
     だけをレビューします。
 
-  Documentation/SubmittingPatches
-  Documentation/SubmittingDrivers
+  Documentation/process/submitting-patches.rst
+  Documentation/process/submitting-drivers.rst
      これらのファイルには、どうやってうまくパッチを作って投稿するかに
      ついて非常に詳しく書かれており、以下を含みます(これだけに限らない
      けれども)
@@ -153,7 +153,7 @@ linux-api@ver.kernel.org に送ることを勧めます。
 	"Linux kernel patch submission format"
 		http://linux.yyz.us/patch-format.html
 
-  Documentation/stable_api_nonsense.txt
+  Documentation/process/stable-api-nonsense.rst
      このファイルはカーネルの中に不変のAPIを持たないことにした意識的な
      決断の背景にある理由について書かれています。以下のようなことを含
      んでいます-
@@ -164,29 +164,29 @@ linux-api@ver.kernel.org に送ることを勧めます。
      このドキュメントは Linux 開発の思想を理解するのに非常に重要です。
      そして、他のOSでの開発者が Linux に移る時にとても重要です。
 
-  Documentation/SecurityBugs
+  Documentation/admin-guide/security-bugs.rst
     もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ
     のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を
     支援してください。
 
-  Documentation/ManagementStyle
+  Documentation/process/management-style.rst
     このドキュメントは Linux カーネルのメンテナ達がどう行動するか、
     彼らの手法の背景にある共有されている精神について記述しています。こ
     れはカーネル開発の初心者なら（もしくは、単に興味があるだけの人でも）
     重要です。なぜならこのドキュメントは、カーネルメンテナ達の独特な
     行動についての多くの誤解や混乱を解消するからです。
 
-  Documentation/stable_kernel_rules.txt
+  Documentation/process/stable-kernel-rules.rst
     このファイルはどのように stable カーネルのリリースが行われるかのルー
     ルが記述されています。そしてこれらのリリースの中のどこかで変更を取
     り入れてもらいたい場合に何をすれば良いかが示されています。
 
-  Documentation/kernel-docs.txt
+  Documentation/process/kernel-docs.rst
 　　カーネル開発に付随する外部ドキュメントのリストです。もしあなたが
     探しているものがカーネル内のドキュメントでみつからなかった場合、
     このリストをあたってみてください。
 
-  Documentation/applying-patches.txt
+  Documentation/process/applying-patches.rst
     パッチとはなにか、パッチをどうやって様々なカーネルの開発ブランチに
     適用するのかについて正確に記述した良い入門書です。
 
@@ -314,7 +314,7 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー
 た問題がなければもう少し長くなることもあります。セキュリティ関連の問題
 の場合はこれに対してだいたいの場合、すぐにリリースがされます。
 
-カーネルツリーに入っている、Documentation/stable_kernel_rules.txt ファ
+カーネルツリーに入っている、Documentation/process/stable-kernel-rules.rst ファ
 イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ
 リースプロセスがどう動くかが記述されています。
 
@@ -372,7 +372,7 @@ bugzilla.kernel.org は Linux カーネル開発者がカーネルのバグを
 場所です。ユーザは見つけたバグの全てをこのツールで報告すべきです。
 どう kernel bugzilla を使うかの詳細は、以下を参照してください-
 	http://bugzilla.kernel.org/page.cgi?id=faq.html
-メインカーネルソースディレクトリにあるファイル REPORTING-BUGS はカーネ
+メインカーネルソースディレクトリにあるファイル admin-guide/reporting-bugs.rst はカーネ
 ルバグらしいものについてどうレポートするかの良いテンプレートであり、問
 題の追跡を助けるためにカーネル開発者にとってどんな情報が必要なのかの詳
 細が書かれています。
@@ -438,7 +438,7 @@ MAINTAINERS ファイルにリストがありますので参照してくださ
 メールの先頭でなく、各引用行の間にあなたの言いたいことを追加するべきで
 す。
 
-もしパッチをメールに付ける場合は、Documentation/SubmittingPatches に提
+もしパッチをメールに付ける場合は、Documentation/process/submitting-patches.rst に提
 示されているように、それは プレーンな可読テキストにすることを忘れない
 ようにしましょう。カーネル開発者は 添付や圧縮したパッチを扱いたがりま
 せん-
diff --git a/Documentation/ja_JP/SubmitChecklist b/Documentation/ja_JP/SubmitChecklist
index cb5507b1ac81..60c7c35ac517 100644
--- a/Documentation/ja_JP/SubmitChecklist
+++ b/Documentation/ja_JP/SubmitChecklist
@@ -1,5 +1,5 @@
 NOTE:
-This is a version of Documentation/SubmitChecklist into Japanese.
+This is a version of Documentation/process/submit-checklist.rst into Japanese.
 This document is maintained by Takenori Nagano <t-nagano@ah.jp.nec.com>
 and the JF Project team <http://www.linux.or.jp/JF/>.
 If you find any difference between this document and the original file
@@ -14,7 +14,7 @@ to update the original English file first.
 Last Updated: 2008/07/14
 ==================================
 これは、
-linux-2.6.26/Documentation/SubmitChecklist の和訳です。
+linux-2.6.26/Documentation/process/submit-checklist.rst の和訳です。
 
 翻訳団体： JF プロジェクト < http://www.linux.or.jp/JF/ >
 翻訳日： 2008/07/14
@@ -27,7 +27,7 @@ Linux カーネルパッチ投稿者向けチェックリスト
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 本書では、パッチをより素早く取り込んでもらいたい開発者が実践すべき基本的な事柄
-をいくつか紹介します。ここにある全ての事柄は、Documentation/SubmittingPatches
+をいくつか紹介します。ここにある全ての事柄は、Documentation/process/submitting-patches.rst
 などのLinuxカーネルパッチ投稿に際しての心得を補足するものです。
 
  1: 妥当なCONFIGオプションや変更されたCONFIGオプション、つまり =y, =m, =n
@@ -84,7 +84,7 @@ Linux カーネルパッチ投稿者向けチェックリスト
     必ずドキュメントを追加してください。
 
 17: 新しいブートパラメータを追加した場合には、
-    必ずDocumentation/kernel-parameters.txt に説明を追加してください。
+    必ずDocumentation/admin-guide/kernel-parameters.rst に説明を追加してください。
 
 18: 新しくmoduleにパラメータを追加した場合には、MODULE_PARM_DESC()を
     利用して必ずその説明を記述してください。
diff --git a/Documentation/ja_JP/SubmittingPatches b/Documentation/ja_JP/SubmittingPatches
index 5d6ae639bfa0..02139656463e 100644
--- a/Documentation/ja_JP/SubmittingPatches
+++ b/Documentation/ja_JP/SubmittingPatches
@@ -1,5 +1,5 @@
 NOTE:
-This is a version of Documentation/SubmittingPatches into Japanese.
+This is a version of Documentation/process/submitting-patches.rst into Japanese.
 This document is maintained by Keiichi KII <k-keiichi@bx.jp.nec.com>
 and the JF Project team <http://www.linux.or.jp/JF/>.
 If you find any difference between this document and the original file
@@ -15,7 +15,7 @@ Last Updated: 2011/06/09
 
 ==================================
 これは、
-linux-2.6.39/Documentation/SubmittingPatches の和訳
+linux-2.6.39/Documentation/process/submitting-patches.rst の和訳
 です。
 翻訳団体： JF プロジェクト < http://www.linux.or.jp/JF/ >
 翻訳日： 2011/06/09
@@ -34,9 +34,9 @@ Linux カーネルに変更を加えたいと思っている個人又は会社
 おじけづかせることもあります。この文章はあなたの変更を大いに受け入れ
 てもらえやすくする提案を集めたものです。
 
-コードを投稿する前に、Documentation/SubmitChecklist の項目リストに目
+コードを投稿する前に、Documentation/process/submit-checklist.rst の項目リストに目
 を通してチェックしてください。もしあなたがドライバーを投稿しようとし
-ているなら、Documentation/SubmittingDrivers にも目を通してください。
+ているなら、Documentation/process/submitting-drivers.rst にも目を通してください。
 
 --------------------------------------------
 セクション1 パッチの作り方と送り方
@@ -148,7 +148,7 @@ http://savannah.nongnu.org/projects/quilt
 4) パッチのスタイルチェック
 
 あなたのパッチが基本的な( Linux カーネルの)コーディングスタイルに違反し
-ていないかをチェックして下さい。その詳細を Documentation/CodingStyle で
+ていないかをチェックして下さい。その詳細を Documentation/process/coding-style.rst で
 見つけることができます。コーディングスタイルの違反はレビューする人の
 時間を無駄にするだけなので、恐らくあなたのパッチは読まれることすらなく
 拒否されるでしょう。
@@ -246,7 +246,7 @@ MIME 形式の添付ファイルは Linus に手間を取らせることにな
 あれば、誰かが MIME 形式のパッチを再送するよう求めるかもしれません。
 
 余計な変更を加えずにあなたのパッチを送信するための電子メールクライアントの設定
-のヒントについては Documentation/email-clients.txt を参照してください。
+のヒントについては Documentation/process/email-clients.rst を参照してください。
 
 8) 電子メールのサイズ
 
@@ -609,7 +609,7 @@ diffstat の結果を生成するために「 git diff -M --stat --summary 」
 し例外を適用するには、本当に妥当な理由が不可欠です。あなたは恐らくこの
 セクションを Linus のコンピュータ・サイエンス101と呼ぶでしょう。
 
-1) Documentation/CodingStyleを参照
+1) Documentation/process/coding-style.rstを参照
 
 言うまでもなく、あなたのコードがこのコーディングスタイルからあまりに
 も逸脱していると、レビューやコメントなしに受け取ってもらえないかもし
@@ -704,8 +704,8 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
 NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
   <https://lkml.org/lkml/2005/7/11/336>
 
-Kernel Documentation/CodingStyle:
-  <http://users.sosdg.org/~qiyong/lxr/source/Documentation/CodingStyle>
+Kernel Documentation/process/coding-style.rst:
+  <http://users.sosdg.org/~qiyong/lxr/source/Documentation/process/coding-style.rst>
 
 Linus Torvalds's mail on the canonical patch format:
   <http://lkml.org/lkml/2005/4/7/183>
diff --git a/Documentation/ja_JP/stable_api_nonsense.txt b/Documentation/ja_JP/stable_api_nonsense.txt
index 7653b5cbfed2..a3b40a4bdcfd 100644
--- a/Documentation/ja_JP/stable_api_nonsense.txt
+++ b/Documentation/ja_JP/stable_api_nonsense.txt
@@ -1,5 +1,5 @@
 NOTE:
-This is a version of Documentation/stable_api_nonsense.txt into Japanese.
+This is a version of Documentation/process/stable-api-nonsense.rst into Japanese.
 This document is maintained by IKEDA, Munehiro <m-ikeda@ds.jp.nec.com>
 and the JF Project team <http://www.linux.or.jp/JF/>.
 If you find any difference between this document and the original file
@@ -14,7 +14,7 @@ to update the original English file first.
 Last Updated: 2007/07/18
 ==================================
 これは、
-linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt の和訳
+linux-2.6.22-rc4/Documentation/process/stable-api-nonsense.rst の和訳
 です。
 翻訳団体： JF プロジェクト < http://www.linux.or.jp/JF/ >
 翻訳日 ： 2007/06/11
diff --git a/Documentation/ja_JP/stable_kernel_rules.txt b/Documentation/ja_JP/stable_kernel_rules.txt
index 9dbda9b5d21e..f9249aecba64 100644
--- a/Documentation/ja_JP/stable_kernel_rules.txt
+++ b/Documentation/ja_JP/stable_kernel_rules.txt
@@ -1,5 +1,5 @@
 NOTE:
-This is Japanese translated version of "Documentation/stable_kernel_rules.txt".
+This is Japanese translated version of "Documentation/process/stable-kernel-rules.rst".
 This one is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com>
 and JF Project team <www.linux.or.jp/JF>.
 If you find difference with original file or problem in translation,
@@ -12,7 +12,7 @@ file at first.
 
 ==================================
 これは、
-linux-2.6.29/Documentation/stable_kernel_rules.txt
+linux-2.6.29/Documentation/process/stable-kernel-rules.rst
 の和訳です。
 
 翻訳団体： JF プロジェクト < http://www.linux.or.jp/JF/ >
@@ -43,7 +43,7 @@ linux-2.6.29/Documentation/stable_kernel_rules.txt
    "理論的には競合状態になる"ようなものは不可。
  - いかなる些細な修正も含めることはできない。(スペルの修正、空白のクリー
    ンアップなど)
- - Documentation/SubmittingPatches の規則に従ったものでなければならない。
+ - Documentation/process/submitting-patches.rst の規則に従ったものでなければならない。
  - パッチ自体か同等の修正が Linus のツリーに既に存在しなければならない。
 　 Linus のツリーでのコミットID を -stable へのパッチ投稿の際に引用す
    ること。
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index bbc3a8b8cff4..df31e30b6a02 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -264,7 +264,7 @@ To reduce its OS jitter, do at least one of the following:
 	kthreads from being created in the first place.
 2.	Boot with "nosoftlockup=0", which will also prevent these kthreads
 	from being created.  Other related watchdog and softlockup boot
-	parameters may be found in Documentation/kernel-parameters.txt
+	parameters may be found in Documentation/admin-guide/kernel-parameters.rst
 	and Documentation/watchdog/watchdog-parameters.txt.
 3.	Echo a zero to /proc/sys/kernel/watchdog to disable the
 	watchdog timer.
diff --git a/Documentation/ko_KR/HOWTO b/Documentation/ko_KR/HOWTO
index 9a3e65924d54..025252731af5 100644
--- a/Documentation/ko_KR/HOWTO
+++ b/Documentation/ko_KR/HOWTO
@@ -1,5 +1,5 @@
 NOTE:
-This is a version of Documentation/HOWTO translated into korean
+This is a version of Documentation/process/howto.rst translated into korean
 This document is maintained by Minchan Kim <minchan@kernel.org>
 If you find any difference between this document and the original file or
 a problem with the translation, please contact the maintainer of this file.
@@ -11,7 +11,7 @@ try to update the original English file first.
 
 ==================================
 이 문서는
-Documentation/HOWTO
+Documentation/process/howto.rst
 의 한글 번역입니다.
 
 역자： 김민찬 <minchan@kernel.org>
@@ -98,18 +98,18 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다.
     빌드하기 위해 필요한 것을 설명한다. 커널에 입문하는 사람들은 여기서
     시작해야 한다.
 
-  Documentation/Changes
+  Documentation/process/changes.rst
     이 파일은 커널을 성공적으로 빌드하고 실행시키기 위해 필요한 다양한
     소프트웨어 패키지들의 최소 버젼을 나열한다.
 
-  Documentation/CodingStyle
+  Documentation/process/coding-style.rst
     이 문서는 리눅스 커널 코딩 스타일과 그렇게 한 몇몇 이유를 설명한다.
     모든 새로운 코드는 이 문서에 가이드라인들을 따라야 한다. 대부분의
     메인테이너들은 이 규칙을 따르는 패치들만을 받아들일 것이고 많은 사람들이
     그 패치가 올바른 스타일일 경우만 코드를 검토할 것이다.
 
-  Documentation/SubmittingPatches
-  Documentation/SubmittingDrivers
+  Documentation/process/submitting-patches.rst
+  Documentation/process/submitting-drivers.rst
     이 파일들은 성공적으로 패치를 만들고 보내는 법을 다음의 내용들로
     굉장히 상세히 설명하고 있다(그러나 다음으로 한정되진 않는다).
        - Email 내용들
@@ -126,7 +126,7 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다.
     "Linux kernel patch submission format"
         http://linux.yyz.us/patch-format.html
 
-   Documentation/stable_api_nonsense.txt
+   Documentation/process/stable-api-nonsense.rst
     이 문서는 의도적으로 커널이 불변하는 API를 갖지 않도록 결정한
     이유를 설명하며 다음과 같은 것들을 포함한다.
        - 서브시스템 shim-layer(호환성을 위해?)
@@ -136,12 +136,12 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다.
     리눅스로 전향하는 사람들에게는 매우 중요하다.
 
 
-  Documentation/SecurityBugs
+  Documentation/admin-guide/security-bugs.rst
     여러분들이 리눅스 커널의 보안 문제를 발견했다고 생각한다면 이 문서에
     나온 단계에 따라서 커널 개발자들에게 알리고 그 문제를 해결할 수 있도록
     도와 달라.
 
-  Documentation/ManagementStyle
+  Documentation/process/management-style.rst
     이 문서는 리눅스 커널 메인테이너들이 그들의 방법론에 녹아 있는
     정신을 어떻게 공유하고 운영하는지를 설명한다. 이것은 커널 개발에 입문하는
     모든 사람들(또는 커널 개발에 작은 호기심이라도 있는 사람들)이
@@ -149,17 +149,17 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다.
     독특한 행동에 관하여 흔히 있는 오해들과 혼란들을 해소하고 있기
     때문이다.
 
-  Documentation/stable_kernel_rules.txt
+  Documentation/process/stable-kernel-rules.rst
     이 문서는 안정적인 커널 배포가 이루어지는 규칙을 설명하고 있으며
     여러분들이 이러한 배포들 중 하나에 변경을 하길 원한다면
     무엇을 해야 하는지를 설명한다.
 
-  Documentation/kernel-docs.txt
+  Documentation/process/kernel-docs.rst
     커널 개발에 관계된 외부 문서의 리스트이다. 커널 내의 포함된 문서들
     중에 여러분이 찾고 싶은 문서를 발견하지 못할 경우 이 리스트를
     살펴보라.
 
-  Documentation/applying-patches.txt
+  Documentation/process/applying-patches.rst
     패치가 무엇이며 그것을 커널의 다른 개발 브랜치들에 어떻게
     적용하는지에 관하여 자세히 설명하고 있는 좋은 입문서이다.
 
@@ -276,7 +276,7 @@ Andrew Morton의 글이 있다.
 4.x.y는 "stable" 팀<stable@vger.kernel.org>에 의해 관리되며 거의 매번 격주로
 배포된다.
 
-커널 트리 문서들 내에 Documentation/stable_kernel_rules.txt 파일은 어떤
+커널 트리 문서들 내에 Documentation/process/stable-kernel-rules.rst 파일은 어떤
 종류의 변경들이 -stable 트리로 들어왔는지와 배포 프로세스가 어떻게
 진행되는지를 설명한다.
 
@@ -328,7 +328,7 @@ bugzilla.kernel.org는 리눅스 커널 개발자들이 커널의 버그를 추
 kernel bugzilla를 사용하는 자세한 방법은 다음을 참조하라.
     http://test.kernel.org/bugzilla/faq.html
 
-메인 커널 소스 디렉토리에 있는 REPORTING-BUGS 파일은 커널 버그라고 생각되는
+메인 커널 소스 디렉토리에 있는 admin-guide/reporting-bugs.rst 파일은 커널 버그라고 생각되는
 것을 보고하는 방법에 관한 좋은 템플릿이며 문제를 추적하기 위해서 커널
 개발자들이 필요로 하는 정보가 무엇들인지를 상세히 설명하고 있다.
 
@@ -391,7 +391,7 @@ bugme-janitor 메일링 리스트(bugzilla에 모든 변화들이 여기서 메
 "John 커널해커는 작성했다...."를 유지하며 여러분들의 의견을 그 메일의 윗부분에
 작성하지 말고 각 인용한 단락들 사이에 넣어라.
 
-여러분들이 패치들을 메일에 넣는다면 그것들은 Documentation/SubmittingPatches에
+여러분들이 패치들을 메일에 넣는다면 그것들은 Documentation/process/submitting-patches.rst에
 나와있는데로 명백히(plain) 읽을 수 있는 텍스트여야 한다. 커널 개발자들은
 첨부파일이나 압축된 패치들을 원하지 않는다. 그들은 여러분들의 패치의
 각 라인 단위로 코멘트를 하길 원하며 압축하거나 첨부하지 않고 보내는 것이
diff --git a/Documentation/ko_KR/stable_api_nonsense.txt b/Documentation/ko_KR/stable_api_nonsense.txt
index 3ba10b11d556..4d93af1efd61 100644
--- a/Documentation/ko_KR/stable_api_nonsense.txt
+++ b/Documentation/ko_KR/stable_api_nonsense.txt
@@ -1,5 +1,5 @@
 NOTE:
-This is a version of Documentation/stable_api_nonsense.txt translated
+This is a version of Documentation/process/stable-api-nonsense.rst translated
 into korean
 This document is maintained by Minchan Kim <minchan@kernel.org>
 If you find any difference between this document and the original file or
@@ -12,7 +12,7 @@ try to update the original English file first.
 
 ==================================
 이 문서는
-Documentation/stable_api_nonsense.txt
+Documentation/process/stable-api-nonsense.rst
 의 한글 번역입니다.
 
 역자： 김민찬 <minchan@kernel.org>
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt
index 4a6e33e1af61..c8b8378513d6 100644
--- a/Documentation/lockup-watchdogs.txt
+++ b/Documentation/lockup-watchdogs.txt
@@ -11,7 +11,7 @@ details), without giving other tasks a chance to run. The current
 stack trace is displayed upon detection and, by default, the system
 will stay locked up. Alternatively, the kernel can be configured to
 panic; a sysctl, "kernel.softlockup_panic", a kernel parameter,
-"softlockup_panic" (see "Documentation/kernel-parameters.txt" for
+"softlockup_panic" (see "Documentation/admin-guide/kernel-parameters.rst" for
 details), and a compile option, "BOOTPARAM_SOFTLOCKUP_PANIC", are
 provided for this.
 
@@ -23,7 +23,7 @@ upon detection and the system will stay locked up unless the default
 behavior is changed, which can be done through a sysctl,
 'hardlockup_panic', a compile time knob, "BOOTPARAM_HARDLOCKUP_PANIC",
 and a kernel parameter, "nmi_watchdog"
-(see "Documentation/kernel-parameters.txt" for details).
+(see "Documentation/admin-guide/kernel-parameters.rst" for details).
 
 The panic option can be used in combination with panic_timeout (this
 timeout is set through the confusingly named "kernel.panic" sysctl),
diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt
index eaf32a1fd0b1..79d21246c75a 100644
--- a/Documentation/m68k/kernel-options.txt
+++ b/Documentation/m68k/kernel-options.txt
@@ -139,7 +139,7 @@ follows:
   PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
 
 Authoritative information can be found in
-"Documentation/kernel-parameters.txt".
+"Documentation/admin-guide/kernel-parameters.rst".
 
 
 2.2) ro, rw
diff --git a/Documentation/media/uapi/v4l/diff-v4l.rst b/Documentation/media/uapi/v4l/diff-v4l.rst
index 76b2ecab8657..8209eeb63dd2 100644
--- a/Documentation/media/uapi/v4l/diff-v4l.rst
+++ b/Documentation/media/uapi/v4l/diff-v4l.rst
@@ -648,12 +648,12 @@ microcode programming. A new interface for MPEG compression and playback
 devices is documented in :ref:`extended-controls`.
 
 .. [#f1]
-   According to Documentation/devices.txt these should be symbolic links
+   According to Documentation/admin-guide/devices.rst these should be symbolic links
    to ``/dev/video0``. Note the original bttv interface is not
    compatible with V4L or V4L2.
 
 .. [#f2]
-   According to ``Documentation/devices.txt`` a symbolic link to
+   According to ``Documentation/admin-guide/devices.rst`` a symbolic link to
    ``/dev/radio0``.
 
 .. [#f3]
diff --git a/Documentation/media/v4l-drivers/bttv.rst b/Documentation/media/v4l-drivers/bttv.rst
index 7abc1c9a261b..bc63b12efafd 100644
--- a/Documentation/media/v4l-drivers/bttv.rst
+++ b/Documentation/media/v4l-drivers/bttv.rst
@@ -304,10 +304,10 @@ bug.  It is very helpful if you can tell where exactly it broke
 With a hard freeze you probably doesn't find anything in the logfiles.
 The only way to capture any kernel messages is to hook up a serial
 console and let some terminal application log the messages.  /me uses
-screen.  See Documentation/serial-console.txt for details on setting
+screen.  See Documentation/admin-guide/serial-console.rst for details on setting
 up a serial console.
 
-Read Documentation/oops-tracing.txt to learn how to get any useful
+Read Documentation/admin-guide/oops-tracing.rst to learn how to get any useful
 information out of a register+stack dump printed by the kernel on
 protection faults (so-called "kernel oops").
 
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 0d7cb955aa01..5de846d3ecc0 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -324,7 +324,7 @@ guarantee that the memory block contains only migratable pages.
 Now, a boot option for making a memory block which consists of migratable pages
 is supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
 create ZONE_MOVABLE...a zone which is just used for movable pages.
-(See also Documentation/kernel-parameters.txt)
+(See also Documentation/admin-guide/kernel-parameters.rst)
 
 Assume the system has "TOTAL" amount of memory at boot time, this boot option
 creates ZONE_MOVABLE as following.
diff --git a/Documentation/networking/netconsole.txt b/Documentation/networking/netconsole.txt
index 30409a36e95d..296ea00fd3eb 100644
--- a/Documentation/networking/netconsole.txt
+++ b/Documentation/networking/netconsole.txt
@@ -200,7 +200,7 @@ priority messages to the console. You can change this at runtime using:
 or by specifying "debug" on the kernel command line at boot, to send
 all kernel messages to the console. A specific value for this parameter
 can also be set using the "loglevel" kernel boot option. See the
-dmesg(8) man page and Documentation/kernel-parameters.txt for details.
+dmesg(8) man page and Documentation/admin-guide/kernel-parameters.rst for details.
 
 Netconsole was designed to be as instantaneous as possible, to
 enable the logging of even the most critical kernel bugs. It works
diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt
index 0fe1c6e0dbcd..cdebc5c8705f 100644
--- a/Documentation/networking/netdev-FAQ.txt
+++ b/Documentation/networking/netdev-FAQ.txt
@@ -136,14 +136,14 @@ A: Normally Greg Kroah-Hartman collects stable commits himself, but
 
 Q: I see a network patch and I think it should be backported to stable.
    Should I request it via "stable@vger.kernel.org" like the references in
-   the kernel's Documentation/stable_kernel_rules.txt file say?
+   the kernel's Documentation/process/stable-kernel-rules.rst file say?
 
 A: No, not for networking.  Check the stable queues as per above 1st to see
    if it is already queued.  If not, then send a mail to netdev, listing
    the upstream commit ID and why you think it should be a stable candidate.
 
    Before you jump to go do the above, do note that the normal stable rules
-   in Documentation/stable_kernel_rules.txt still apply.  So you need to
+   in Documentation/process/stable-kernel-rules.rst still apply.  So you need to
    explicitly indicate why it is a critical fix and exactly what users are
    impacted.  In addition, you need to convince yourself that you _really_
    think it has been overlooked, vs. having been considered and rejected.
@@ -165,7 +165,7 @@ A: No.  See above answer.  In short, if you think it really belongs in
 
    If you think there is some valid information relating to it being in
    stable that does _not_ belong in the commit log, then use the three
-   dash marker line as described in Documentation/SubmittingPatches to
+   dash marker line as described in Documentation/process/submitting-patches.rst to
    temporarily embed that information into the patch that you send.
 
 Q: Someone said that the comment style and coding convention is different
@@ -220,5 +220,5 @@ A: Attention to detail.  Re-read your own work as if you were the
    If it is your first patch, mail it to yourself so you can test apply
    it to an unpatched tree to confirm infrastructure didn't mangle it.
 
-   Finally, go back and read Documentation/SubmittingPatches to be
+   Finally, go back and read Documentation/process/submitting-patches.rst to be
    sure you are not repeating some common mistake documented there.
diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt
index 97282da82b75..ad3dead052a4 100644
--- a/Documentation/networking/vortex.txt
+++ b/Documentation/networking/vortex.txt
@@ -364,7 +364,7 @@ steps you should take:
 
 - The contents of your report will vary a lot depending upon the
   problem.  If it's a kernel crash then you should refer to the
-  REPORTING-BUGS file.
+  admin-guide/reporting-bugs.rst file.
 
   But for most problems it is useful to provide the following:
 
diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX
index ad04cc8097ed..7cb6085839f3 100644
--- a/Documentation/power/00-INDEX
+++ b/Documentation/power/00-INDEX
@@ -6,7 +6,7 @@ basic-pm-debugging.txt
 	- Debugging suspend and resume
 charger-manager.txt
 	- Battery charger management.
-devices.txt
+admin-guide/devices.rst
 	- How drivers interact with system-wide power management
 drivers-testing.txt
 	- Testing suspend and resume support in device drivers
diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt
index 44558882aa60..85c746cbab2c 100644
--- a/Documentation/power/pci.txt
+++ b/Documentation/power/pci.txt
@@ -8,7 +8,7 @@ management.  Based on previous work by Patrick Mochel <mochel@transmeta.com>
 
 This document only covers the aspects of power management specific to PCI
 devices.  For general description of the kernel's interfaces related to device
-power management refer to Documentation/power/devices.txt and
+power management refer to Documentation/power/admin-guide/devices.rst and
 Documentation/power/runtime_pm.txt.
 
 ---------------------------------------------------------------------------
@@ -417,7 +417,7 @@ pm->runtime_idle() callback.
 2.4. System-Wide Power Transitions
 ----------------------------------
 There are a few different types of system-wide power transitions, described in
-Documentation/power/devices.txt.  Each of them requires devices to be handled
+Documentation/power/admin-guide/devices.rst.  Each of them requires devices to be handled
 in a specific way and the PM core executes subsystem-level power management
 callbacks for this purpose.  They are executed in phases such that each phase
 involves executing the same subsystem-level callback for every device belonging
@@ -623,7 +623,7 @@ System restore requires a hibernation image to be loaded into memory and the
 pre-hibernation memory contents to be restored before the pre-hibernation system
 activity can be resumed.
 
-As described in Documentation/power/devices.txt, the hibernation image is loaded
+As described in Documentation/power/admin-guide/devices.rst, the hibernation image is loaded
 into memory by a fresh instance of the kernel, called the boot kernel, which in
 turn is loaded and run by a boot loader in the usual way.  After the boot kernel
 has loaded the image, it needs to replace its own code and data with the code
@@ -677,7 +677,7 @@ controlling the runtime power management of their devices.
 
 At the time of this writing there are two ways to define power management
 callbacks for a PCI device driver, the recommended one, based on using a
-dev_pm_ops structure described in Documentation/power/devices.txt, and the
+dev_pm_ops structure described in Documentation/power/admin-guide/devices.rst, and the
 "legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and
 .resume() callbacks from struct pci_driver are used.  The legacy approach,
 however, doesn't allow one to define runtime power management callbacks and is
@@ -1046,5 +1046,5 @@ PCI Local Bus Specification, Rev. 3.0
 PCI Bus Power Management Interface Specification, Rev. 1.2
 Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b
 PCI Express Base Specification, Rev. 2.0
-Documentation/power/devices.txt
+Documentation/power/admin-guide/devices.rst
 Documentation/power/runtime_pm.txt
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 1fd1fbe9ce95..4870980e967e 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -674,7 +674,7 @@ left in runtime suspend.  If that happens, the PM core will not execute any
 system suspend and resume callbacks for all of those devices, except for the
 complete callback, which is then entirely responsible for handling the device
 as appropriate.  This only applies to system suspend transitions that are not
-related to hibernation (see Documentation/power/devices.txt for more
+related to hibernation (see Documentation/power/admin-guide/devices.rst for more
 information).
 
 The PM core does its best to reduce the probability of race conditions between
diff --git a/Documentation/power/swsusp-dmcrypt.txt b/Documentation/power/swsusp-dmcrypt.txt
index 59931b46ff7e..b802fbfd95ef 100644
--- a/Documentation/power/swsusp-dmcrypt.txt
+++ b/Documentation/power/swsusp-dmcrypt.txt
@@ -8,7 +8,7 @@ Some prerequisites:
 You know how dm-crypt works. If not, visit the following web page:
 http://www.saout.de/misc/dm-crypt/
 You have read Documentation/power/swsusp.txt and understand it.
-You did read Documentation/initrd.txt and know how an initrd works.
+You did read Documentation/admin-guide/initrd.rst and know how an initrd works.
 You know how to create or how to modify an initrd.
 
 Now your system is properly set up, your disk is encrypted except for
diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst
index 9d5cef996f7f..983d628c1112 100644
--- a/Documentation/process/4.Coding.rst
+++ b/Documentation/process/4.Coding.rst
@@ -22,7 +22,7 @@ Coding style
 ************
 
 The kernel has long had a standard coding style, described in
-Documentation/CodingStyle.  For much of that time, the policies described
+Documentation/process/coding-style.rst.  For much of that time, the policies described
 in that file were taken as being, at most, advisory.  As a result, there is
 a substantial amount of code in the kernel which does not meet the coding
 style guidelines.  The presence of that code leads to two independent
@@ -343,7 +343,7 @@ user-space developers to know what they are working with.  See
 Documentation/ABI/README for a description of how this documentation should
 be formatted and what information needs to be provided.
 
-The file Documentation/kernel-parameters.txt describes all of the kernel's
+The file Documentation/admin-guide/kernel-parameters.rst describes all of the kernel's
 boot-time parameters.  Any patch which adds new parameters should add the
 appropriate entries to this file.
 
diff --git a/Documentation/process/5.Posting.rst b/Documentation/process/5.Posting.rst
index b511ddf7e82a..1b7728b19ea7 100644
--- a/Documentation/process/5.Posting.rst
+++ b/Documentation/process/5.Posting.rst
@@ -9,8 +9,8 @@ kernel.  Unsurprisingly, the kernel development community has evolved a set
 of conventions and procedures which are used in the posting of patches;
 following them will make life much easier for everybody involved.  This
 document will attempt to cover these expectations in reasonable detail;
-more information can also be found in the files SubmittingPatches,
-SubmittingDrivers, and SubmitChecklist in the kernel documentation
+more information can also be found in the files process/submitting-patches.rst,
+process/submitting-drivers.rst, and process/submit-checklist.rst in the kernel documentation
 directory.
 
 
@@ -198,7 +198,7 @@ pass it to diff with the "-X" option.
 
 The tags mentioned above are used to describe how various developers have
 been associated with the development of this patch.  They are described in
-detail in the SubmittingPatches document; what follows here is a brief
+detail in the process/submitting-patches.rst document; what follows here is a brief
 summary.  Each of these lines has the format:
 
 ::
@@ -210,7 +210,7 @@ The tags in common use are:
  - Signed-off-by: this is a developer's certification that he or she has
    the right to submit the patch for inclusion into the kernel.  It is an
    agreement to the Developer's Certificate of Origin, the full text of
-   which can be found in Documentation/SubmittingPatches.  Code without a
+   which can be found in Documentation/process/submitting-patches.rst.  Code without a
    proper signoff cannot be merged into the mainline.
 
  - Acked-by: indicates an agreement by another developer (often a
@@ -221,7 +221,7 @@ The tags in common use are:
    it to work.
 
  - Reviewed-by: the named developer has reviewed the patch for correctness;
-   see the reviewer's statement in Documentation/SubmittingPatches for more
+   see the reviewer's statement in Documentation/process/submitting-patches.rst for more
    detail.
 
  - Reported-by: names a user who reported a problem which is fixed by this
@@ -248,7 +248,7 @@ take care of:
    be examined in any detail.  If there is any doubt at all, mail the patch
    to yourself and convince yourself that it shows up intact.
 
-   Documentation/email-clients.txt has some helpful hints on making
+   Documentation/process/email-clients.rst has some helpful hints on making
    specific mail clients work for sending patches.
 
  - Are you sure your patch is free of silly mistakes?  You should always
diff --git a/Documentation/process/8.Conclusion.rst b/Documentation/process/8.Conclusion.rst
index 23ec7cbc2d2b..1c7f54cd0261 100644
--- a/Documentation/process/8.Conclusion.rst
+++ b/Documentation/process/8.Conclusion.rst
@@ -5,9 +5,9 @@ For more information
 
 There are numerous sources of information on Linux kernel development and
 related topics.  First among those will always be the Documentation
-directory found in the kernel source distribution.  The top-level HOWTO
-file is an important starting point; SubmittingPatches and
-SubmittingDrivers are also something which all kernel developers should
+directory found in the kernel source distribution.  The top-level process/howto.rst
+file is an important starting point; process/submitting-patches.rst and
+process/submitting-drivers.rst are also something which all kernel developers should
 read.  Many internal kernel APIs are documented using the kerneldoc
 mechanism; "make htmldocs" or "make pdfdocs" can be used to generate those
 documents in HTML or PDF format (though the version of TeX shipped by some
diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst
index f5b5b1aa51b3..8cc25a06f353 100644
--- a/Documentation/process/adding-syscalls.rst
+++ b/Documentation/process/adding-syscalls.rst
@@ -3,7 +3,7 @@ Adding a New System Call
 
 This document describes what's involved in adding a new system call to the
 Linux kernel, over and above the normal submission advice in
-:ref:`Documentation/SubmittingPatches <submittingpatches>`.
+:ref:`Documentation/process/submitting-patches.rst <submittingpatches>`.
 
 
 System Call Alternatives
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index 9c61c039ccd9..968808bec407 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -1058,5 +1058,5 @@ gcc internals and indent, all available from http://www.gnu.org/manual/
 WG14 is the international standardization working group for the programming
 language C, URL: http://www.open-std.org/JTC1/SC22/WG14/
 
-Kernel CodingStyle, by greg@kroah.com at OLS 2002:
+Kernel process/coding-style.rst, by greg@kroah.com at OLS 2002:
 http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst
index 5f042349f987..3f66a1980726 100644
--- a/Documentation/process/howto.rst
+++ b/Documentation/process/howto.rst
@@ -90,19 +90,19 @@ required reading:
     what is necessary to do to configure and build the kernel.  People
     who are new to the kernel should start here.
 
-  :ref:`Documentation/Changes <changes>`
+  :ref:`Documentation/process/changes.rst <changes>`
     This file gives a list of the minimum levels of various software
     packages that are necessary to build and run the kernel
     successfully.
 
-  :ref:`Documentation/CodingStyle <codingstyle>`
+  :ref:`Documentation/process/coding-style.rst <codingstyle>`
     This describes the Linux kernel coding style, and some of the
     rationale behind it. All new code is expected to follow the
     guidelines in this document. Most maintainers will only accept
     patches if these rules are followed, and many people will only
     review code if it is in the proper style.
 
-  :ref:`Documentation/SubmittingPatches <submittingpatches>` and :ref:`Documentation/SubmittingDrivers <submittingdrivers>`
+  :ref:`Documentation/process/submitting-patches.rst <submittingpatches>` and :ref:`Documentation/process/submitting-drivers.rst <submittingdrivers>`
     These files describe in explicit detail how to successfully create
     and send a patch, including (but not limited to):
 
@@ -122,7 +122,7 @@ required reading:
 	"Linux kernel patch submission format"
 		http://linux.yyz.us/patch-format.html
 
-  :ref:`Documentation/stable_api_nonsense.txt <stable_api_nonsense>`
+  :ref:`Documentation/process/stable-api-nonsense.rst <stable_api_nonsense>`
     This file describes the rationale behind the conscious decision to
     not have a stable API within the kernel, including things like:
 
@@ -135,29 +135,29 @@ required reading:
     philosophy and is very important for people moving to Linux from
     development on other Operating Systems.
 
-  :ref:`Documentation/SecurityBugs <securitybugs>`
+  :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
     If you feel you have found a security problem in the Linux kernel,
     please follow the steps in this document to help notify the kernel
     developers, and help solve the issue.
 
-  :ref:`Documentation/ManagementStyle <managementstyle>`
+  :ref:`Documentation/process/management-style.rst <managementstyle>`
     This document describes how Linux kernel maintainers operate and the
     shared ethos behind their methodologies.  This is important reading
     for anyone new to kernel development (or anyone simply curious about
     it), as it resolves a lot of common misconceptions and confusion
     about the unique behavior of kernel maintainers.
 
-  :ref:`Documentation/stable_kernel_rules.txt <stable_kernel_rules>`
+  :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
     This file describes the rules on how the stable kernel releases
     happen, and what to do if you want to get a change into one of these
     releases.
 
-  :ref:`Documentation/kernel-docs.txt <kernel_docs>`
+  :ref:`Documentation/process/kernel-docs.rst <kernel_docs>`
     A list of external documentation that pertains to kernel
     development.  Please consult this list if you do not find what you
     are looking for within the in-kernel documentation.
 
-  :ref:`Documentation/applying-patches.txt <applying_patches>`
+  :ref:`Documentation/process/applying-patches.rst <applying_patches>`
     A good introduction describing exactly what a patch is and how to
     apply it to the different development branches of the kernel.
 
@@ -307,7 +307,7 @@ two weeks, but it can be longer if there are no pressing problems.  A
 security-related problem, instead, can cause a release to happen almost
 instantly.
 
-The file Documentation/stable_kernel_rules.txt in the kernel tree
+The file Documentation/process/stable-kernel-rules.rst in the kernel tree
 documents what kinds of changes are acceptable for the -stable tree, and
 how the release process works.
 
@@ -366,7 +366,7 @@ tool.  For details on how to use the kernel bugzilla, please see:
 
 	https://bugzilla.kernel.org/page.cgi?id=faq.html
 
-The file REPORTING-BUGS in the main kernel source directory has a good
+The file admin-guide/reporting-bugs.rst in the main kernel source directory has a good
 template for how to report a possible kernel bug, and details what kind
 of information is needed by the kernel developers to help track down the
 problem.
@@ -440,7 +440,7 @@ add your statements between the individual quoted sections instead of
 writing at the top of the mail.
 
 If you add patches to your mail, make sure they are plain readable text
-as stated in Documentation/SubmittingPatches.
+as stated in Documentation/process/submitting-patches.rst.
 Kernel developers don't want to deal with
 attachments or compressed patches; they may want to comment on
 individual lines of your patch, which works only that way. Make sure you
diff --git a/Documentation/process/management-style.rst b/Documentation/process/management-style.rst
index dea2e66c9a10..45595fd8a66b 100644
--- a/Documentation/process/management-style.rst
+++ b/Documentation/process/management-style.rst
@@ -5,7 +5,7 @@ Linux kernel management style
 
 This is a short document describing the preferred (or made up, depending
 on who you ask) management style for the linux kernel.  It's meant to
-mirror the CodingStyle document to some degree, and mainly written to
+mirror the process/coding-style.rst document to some degree, and mainly written to
 avoid answering [#f1]_  the same (or similar) questions over and over again.
 
 Management style is very personal and much harder to quantify than
diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst
index 4d82e31b7958..11ec2d93a5e0 100644
--- a/Documentation/process/stable-kernel-rules.rst
+++ b/Documentation/process/stable-kernel-rules.rst
@@ -27,7 +27,7 @@ Rules on what kind of patches are accepted, and which ones are not, into the
  - It cannot contain any "trivial" fixes in it (spelling changes,
    whitespace cleanups, etc).
  - It must follow the
-   :ref:`Documentation/SubmittingPatches <submittingpatches>`
+   :ref:`Documentation/process/submitting-patches.rst <submittingpatches>`
    rules.
  - It or an equivalent fix must already exist in Linus' tree (upstream).
 
@@ -40,7 +40,7 @@ Procedure for submitting patches to the -stable tree
    Documentation/networking/netdev-FAQ.txt
  - Security patches should not be handled (solely) by the -stable review
    process but should follow the procedures in
-   :ref:`Documentation/SecurityBugs <securitybugs>`.
+   :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`.
 
 For all other submissions, choose one of the following procedures
 -----------------------------------------------------------------
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index 894289b22b15..a0d9d34bfb6d 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -7,7 +7,7 @@ Here are some basic things that developers should do if they want to see their
 kernel patch submissions accepted more quickly.
 
 These are all above and beyond the documentation that is provided in
-:ref:`Documentation/SubmittingPatches <submittingpatches>`
+:ref:`Documentation/process/submitting-patches.rst <submittingpatches>`
 and elsewhere regarding submitting Linux kernel patches.
 
 
@@ -31,7 +31,7 @@ and elsewhere regarding submitting Linux kernel patches.
    tends to use ``unsigned long`` for 64-bit quantities.
 
 5) Check your patch for general style as detailed in
-   :ref:`Documentation/CodingStyle <codingstyle>`.
+   :ref:`Documentation/process/coding-style.rst <codingstyle>`.
    Check for trivial violations with the patch style checker prior to
    submission (``scripts/checkpatch.pl``).
    You should be able to justify all violations that remain in
@@ -78,7 +78,7 @@ and elsewhere regarding submitting Linux kernel patches.
 16) All new ``/proc`` entries are documented under ``Documentation/``
 
 17) All new kernel boot parameters are documented in
-    ``Documentation/kernel-parameters.txt``.
+    ``Documentation/admin-guide/kernel-parameters.rst``.
 
 18) All new module parameters are documented with ``MODULE_PARM_DESC()``
 
diff --git a/Documentation/process/submitting-drivers.rst b/Documentation/process/submitting-drivers.rst
index 252b77a23fad..0939d018c289 100644
--- a/Documentation/process/submitting-drivers.rst
+++ b/Documentation/process/submitting-drivers.rst
@@ -8,7 +8,7 @@ various kernel trees. Note that if you are interested in video card drivers
 you should probably talk to XFree86 (http://www.xfree86.org/) and/or X.Org
 (http://x.org/) instead.
 
-Also read the Documentation/SubmittingPatches document.
+Also read the Documentation/process/submitting-patches.rst document.
 
 
 Allocating Device Numbers
@@ -19,7 +19,7 @@ by the Linux assigned name and number authority (currently this is
 Torben Mathiasen). The site is http://www.lanana.org/. This
 also deals with allocating numbers for devices that are not going to
 be submitted to the mainstream kernel.
-See Documentation/devices.txt for more information on this.
+See Documentation/admin-guide/devices.rst for more information on this.
 
 If you don't use assigned numbers then when your device is submitted it will
 be given an assigned number even if that is different from values you may
@@ -73,7 +73,7 @@ Interfaces:
 
 Code:
 		Please use the Linux style of code formatting as documented
-		in :ref:`Documentation/CodingStyle <codingStyle>`.
+		in :ref:`Documentation/process/coding-style.rst <codingStyle>`.
 		If you have sections of code
 		that need to be in other formats, for example because they
 		are shared with a windows driver kit and you want to
@@ -109,7 +109,7 @@ PM support:
 		anything.  For the driver testing instructions see
 		Documentation/power/drivers-testing.txt and for a relatively
 		complete overview of the power management issues related to
-		drivers see Documentation/power/devices.txt .
+		drivers see Documentation/power/admin-guide/devices.rst .
 
 Control:
 		In general if there is active maintenance of a driver by
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index 4cc20b2c6df3..b4cf8f375184 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -11,10 +11,10 @@ can greatly increase the chances of your change being accepted.
 This document contains a large number of suggestions in a relatively terse
 format.  For detailed information on how the kernel development process
 works, see :ref:`Documentation/process <development_process_main>`.
-Also, read :ref:`Documentation/SubmitChecklist <submitchecklist>`
+Also, read :ref:`Documentation/process/submit-checklist.rst <submitchecklist>`
 for a list of items to check before
 submitting code.  If you are submitting a driver, also read
-:ref:`Documentation/SubmittingDrivers <submittingdrivers>`;
+:ref:`Documentation/process/submitting-drivers.rst <submittingdrivers>`;
 for device tree binding patches, read
 Documentation/devicetree/bindings/submitting-patches.txt.
 
@@ -238,7 +238,7 @@ then only post say 15 or so at a time and wait for review and integration.
 
 Check your patch for basic style violations, details of which can be
 found in
-:ref:`Documentation/CodingStyle <codingstyle>`.
+:ref:`Documentation/process/coding-style.rst <codingstyle>`.
 Failure to do so simply wastes
 the reviewers time and will get your patch rejected, probably
 without even being read.
@@ -305,7 +305,7 @@ toward the stable maintainers by putting a line like this::
 
 into the sign-off area of your patch (note, NOT an email recipient).  You
 should also read
-:ref:`Documentation/stable_kernel_rules.txt <stable_kernel_rules>`
+:ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
 in addition to this file.
 
 Note, however, that some subsystem maintainers want to come to their own
@@ -363,7 +363,7 @@ decreasing the likelihood of your MIME-attached change being accepted.
 Exception:  If your mailer is mangling patches then someone may ask
 you to re-send them using MIME.
 
-See :ref:`Documentation/email-clients.txt <email_clients>`
+See :ref:`Documentation/process/email-clients.rst <email_clients>`
 for hints about configuring your e-mail client so that it sends your patches
 untouched.
 
@@ -828,8 +828,8 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
 NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
   <https://lkml.org/lkml/2005/7/11/336>
 
-Kernel Documentation/CodingStyle:
-  :ref:`Documentation/CodingStyle <codingstyle>`
+Kernel Documentation/process/coding-style.rst:
+  :ref:`Documentation/process/coding-style.rst <codingstyle>`
 
 Linus Torvalds's mail on the canonical patch format:
   <http://lkml.org/lkml/2005/4/7/183>
diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index 1f0c27049340..8c174063b3f0 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -26,7 +26,7 @@ whether they can be changed or not:
                the system software.
 
 The rfkill subsystem has two parameters, rfkill.default_state and
-rfkill.master_switch_mode, which are documented in kernel-parameters.txt.
+rfkill.master_switch_mode, which are documented in admin-guide/kernel-parameters.rst.
 
 
 2. Implementation details
diff --git a/Documentation/scsi/scsi-parameters.txt b/Documentation/scsi/scsi-parameters.txt
index 8e66dafa41e1..8477655c0e46 100644
--- a/Documentation/scsi/scsi-parameters.txt
+++ b/Documentation/scsi/scsi-parameters.txt
@@ -1,7 +1,7 @@
                           SCSI Kernel Parameters
                           ~~~~~~~~~~~~~~~~~~~~~~
 
-See Documentation/kernel-parameters.txt for general information on
+See Documentation/admin-guide/kernel-parameters.rst for general information on
 specifying module parameters.
 
 This document may not be entirely up to date and comprehensive. The command
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index 255075157511..6338400eed73 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -336,7 +336,7 @@ in parallel by these functions.
 Conventions
 ===========
 First, Linus Torvalds's thoughts on C coding style can be found in the
-Documentation/CodingStyle file. 
+Documentation/process/coding-style.rst file.
 
 Next, there is a movement to "outlaw" typedefs introducing synonyms for 
 struct tags. Both can be still found in the SCSI subsystem, but
diff --git a/Documentation/scsi/sym53c8xx_2.txt b/Documentation/scsi/sym53c8xx_2.txt
index 6af8f7a7770f..d28186553fb0 100644
--- a/Documentation/scsi/sym53c8xx_2.txt
+++ b/Documentation/scsi/sym53c8xx_2.txt
@@ -427,7 +427,7 @@ Synchronous transfers frequency       (default answer: 80)
 10.1 Syntax
 
 Setup commands can be passed to the driver either at boot time or as
-parameters to modprobe, as described in Documentation/kernel-parameters.txt
+parameters to modprobe, as described in Documentation/admin-guide/kernel-parameters.rst
 
 Example of boot setup command under lilo prompt:
 
diff --git a/Documentation/sound/alsa/alsa-parameters.txt b/Documentation/sound/alsa/alsa-parameters.txt
index 0fa40679b080..72eced86f035 100644
--- a/Documentation/sound/alsa/alsa-parameters.txt
+++ b/Documentation/sound/alsa/alsa-parameters.txt
@@ -1,7 +1,7 @@
                           ALSA Kernel Parameters
                           ~~~~~~~~~~~~~~~~~~~~~~
 
-See Documentation/kernel-parameters.txt for general information on
+See Documentation/admin-guide/kernel-parameters.rst for general information on
 specifying module parameters.
 
 This document may not be entirely up to date and comprehensive. The command
diff --git a/Documentation/sound/oss/oss-parameters.txt b/Documentation/sound/oss/oss-parameters.txt
index 3ab391e7c295..cc675f25eee4 100644
--- a/Documentation/sound/oss/oss-parameters.txt
+++ b/Documentation/sound/oss/oss-parameters.txt
@@ -1,7 +1,7 @@
                           OSS Kernel Parameters
                           ~~~~~~~~~~~~~~~~~~~~~
 
-See Documentation/kernel-parameters.txt for general information on
+See Documentation/admin-guide/kernel-parameters.rst for general information on
 specifying module parameters.
 
 This document may not be entirely up to date and comprehensive. The command
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index ffab8b5caa60..6bb78f872929 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -71,7 +71,7 @@ show up in /proc/sys/kernel:
 - printk_ratelimit_burst
 - pty                         ==> Documentation/filesystems/devpts.txt
 - randomize_va_space
-- real-root-dev               ==> Documentation/initrd.txt
+- real-root-dev               ==> Documentation/admin-guide/initrd.rst
 - reboot-cmd                  [ SPARC only ]
 - rtsig-max
 - rtsig-nr
@@ -453,7 +453,7 @@ in a KVM virtual machine. This default can be overridden by adding
 
    nmi_watchdog=1
 
-to the guest kernel command line (see Documentation/kernel-parameters.txt).
+to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst).
 
 ==============================================================
 
diff --git a/Documentation/virtual/kvm/review-checklist.txt b/Documentation/virtual/kvm/review-checklist.txt
index a850986ed684..a83b27635fdd 100644
--- a/Documentation/virtual/kvm/review-checklist.txt
+++ b/Documentation/virtual/kvm/review-checklist.txt
@@ -1,8 +1,8 @@
 Review checklist for kvm patches
 ================================
 
-1.  The patch must follow Documentation/CodingStyle and
-    Documentation/SubmittingPatches.
+1.  The patch must follow Documentation/process/coding-style.rst and
+    Documentation/process/submitting-patches.rst.
 
 2.  Patches should be against kvm.git master branch.
 
diff --git a/Documentation/vm/numa b/Documentation/vm/numa
index e0b58c0e6b49..a08f71647714 100644
--- a/Documentation/vm/numa
+++ b/Documentation/vm/numa
@@ -82,7 +82,7 @@ such as DMA or DMA32, represent relatively scarce resources.  Linux chooses
 a default zonelist order based on the sizes of the various zone types relative
 to the total memory of the node and the total memory of the system.  The
 default zonelist order may be overridden using the numa_zonelist_order kernel
-boot parameter or sysctl.  [see Documentation/kernel-parameters.txt and
+boot parameter or sysctl.  [see Documentation/admin-guide/kernel-parameters.rst and
 Documentation/sysctl/vm.txt]
 
 By default, Linux will attempt to satisfy memory allocation requests from the
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.txt b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
index 271b8850dde7..9fffb2958d13 100644
--- a/Documentation/watchdog/convert_drivers_to_kernel_api.txt
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
@@ -213,6 +213,6 @@ The entry for the driver now needs to select WATCHDOG_CORE:
 Create a patch and send it to upstream
 --------------------------------------
 
-Make sure you understood Documentation/SubmittingPatches and send your patch to
+Make sure you understood Documentation/process/submitting-patches.rst and send your patch to
 linux-watchdog@vger.kernel.org. We are looking forward to it :)
 
diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index a8d364227a77..e21850e270a0 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -4,7 +4,7 @@ be listed here unless the driver has its own driver-specific information
 file.
 
 
-See Documentation/kernel-parameters.txt for information on
+See Documentation/admin-guide/kernel-parameters.rst for information on
 providing kernel parameters for builtin drivers versus loadable
 modules.
 
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 9da6f3512249..5e9b826b5f62 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -921,7 +921,7 @@ They should normally not be deleted from the kernel command line even
 though not all of them are actually meaningful to the kernel.  Boot
 loader authors who need additional command line options for the boot
 loader itself should get them registered in
-Documentation/kernel-parameters.txt to make sure they will not
+Documentation/admin-guide/kernel-parameters.rst to make sure they will not
 conflict with actual kernel options now or in the future.
 
   vga=<mode>
diff --git a/Documentation/zh_CN/CodingStyle b/Documentation/zh_CN/CodingStyle
index 12717791baac..b02738042799 100644
--- a/Documentation/zh_CN/CodingStyle
+++ b/Documentation/zh_CN/CodingStyle
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/CodingStyle
+Chinese translated version of Documentation/process/coding-style.rst
 
 If you have any comment or update to the content, please post to LKML directly.
 However, if you have problem communicating in English you can also ask the
@@ -7,7 +7,7 @@ translation is outdated or there is problem with translation.
 
 Chinese maintainer: Zhang Le <r0bertz@gentoo.org>
 ---------------------------------------------------------------------
-Documentation/CodingStyle的中文翻译
+Documentation/process/coding-style.rst的中文翻译
 
 如果想评论或更新本文的内容，请直接发信到LKML。如果你使用英文交流有困难的话，也可
 以向中文版维护者求助。如果本翻译更新不及时或者翻译存在问题，请联系中文版维护者。
@@ -809,5 +809,5 @@ GNU 手册 - 遵循 K&R 标准和此文本 - cpp, gcc, gcc internals and indent,
 
 WG14是C语言的国际标准化工作组，URL: http://www.open-std.org/JTC1/SC22/WG14/
 
-Kernel CodingStyle，作者 greg@kroah.com 发表于OLS 2002：
+Kernel process/coding-style.rst，作者 greg@kroah.com 发表于OLS 2002：
 http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
diff --git a/Documentation/zh_CN/HOWTO b/Documentation/zh_CN/HOWTO
index f0613b92e0be..11be075ba5fa 100644
--- a/Documentation/zh_CN/HOWTO
+++ b/Documentation/zh_CN/HOWTO
@@ -1,4 +1,4 @@
-﻿Chinese translated version of Documentation/HOWTO
+﻿Chinese translated version of Documentation/process/howto.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
 Maintainer: Greg Kroah-Hartman <greg@kroah.com>
 Chinese maintainer: Li Yang <leoli@freescale.com>
 ---------------------------------------------------------------------
-Documentation/HOWTO 的中文翻译
+Documentation/process/howto.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
@@ -93,16 +93,16 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与
     文件简要介绍了Linux内核的背景，并且描述了如何配置和编译内核。内核的
     新用户应该从这里开始。
 
-  Documentation/Changes
+  Documentation/process/changes.rst
     文件给出了用来编译和使用内核所需要的最小软件包列表。
 
-  Documentation/CodingStyle
+  Documentation/process/coding-style.rst
     描述Linux内核的代码风格和理由。所有新代码需要遵守这篇文档中定义的规
     范。大多数维护者只会接收符合规定的补丁，很多人也只会帮忙检查符合风格
     的代码。
 
-  Documentation/SubmittingPatches
-  Documentation/SubmittingDrivers
+  Documentation/process/submitting-patches.rst
+  Documentation/process/submitting-drivers.rst
     这两份文档明确描述如何创建和发送补丁，其中包括（但不仅限于)：
        - 邮件内容
        - 邮件格式
@@ -116,7 +116,7 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与
     "Linux kernel patch submission format"
         http://linux.yyz.us/patch-format.html
 
-  Documentation/stable_api_nonsense.txt
+  Documentation/process/stable-api-nonsense.rst
     论证内核为什么特意不包括稳定的内核内部API，也就是说不包括像这样的特
     性：
        - 子系统中间层（为了兼容性？）
@@ -125,23 +125,23 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与
     这篇文档对于理解Linux的开发哲学至关重要。对于将开发平台从其他操作系
     统转移到Linux的人来说也很重要。
 
-  Documentation/SecurityBugs
+  Documentation/admin-guide/security-bugs.rst
     如果你认为自己发现了Linux内核的安全性问题，请根据这篇文档中的步骤来
     提醒其他内核开发者并帮助解决这个问题。
 
-  Documentation/ManagementStyle
+  Documentation/process/management-style.rst
     描述内核维护者的工作方法及其共有特点。这对于刚刚接触内核开发（或者对
     它感到好奇）的人来说很重要，因为它解释了很多对于内核维护者独特行为的
     普遍误解与迷惑。
 
-  Documentation/stable_kernel_rules.txt
+  Documentation/process/stable-kernel-rules.rst
     解释了稳定版内核发布的规则，以及如何将改动放入这些版本的步骤。
 
-  Documentation/kernel-docs.txt
+  Documentation/process/kernel-docs.rst
     有助于内核开发的外部文档列表。如果你在内核自带的文档中没有找到你想找
     的内容，可以查看这些文档。
 
-  Documentation/applying-patches.txt
+  Documentation/process/applying-patches.rst
     关于补丁是什么以及如何将它打在不同内核开发分支上的好介绍
 
 内核还拥有大量从代码自动生成的文档。它包含内核内部API的全面介绍以及如何
@@ -238,7 +238,7 @@ kernel.org网站的pub/linux/kernel/v2.6/目录下找到它。它的开发遵循
 2.6.x.y版本由“稳定版”小组（邮件地址<stable@vger.kernel.org>）维护，一般隔周发
 布新版本。
 
-内核源码中的Documentation/stable_kernel_rules.txt文件具体描述了可被稳定
+内核源码中的Documentation/process/stable-kernel-rules.rst文件具体描述了可被稳定
 版内核接受的修改类型以及发布的流程。
 
 
@@ -329,7 +329,7 @@ bugzilla.kernel.org是Linux内核开发者们用来跟踪内核Bug的网站。
 户在这个工具中报告找到的所有bug。如何使用内核bugzilla的细节请访问：
 	http://test.kernel.org/bugzilla/faq.html
 
-内核源码主目录中的REPORTING-BUGS文件里有一个很好的模板。它指导用户如何报
+内核源码主目录中的admin-guide/reporting-bugs.rst文件里有一个很好的模板。它指导用户如何报
 告可能的内核bug以及需要提供哪些信息来帮助内核开发者们找到问题的根源。
 
 
@@ -380,7 +380,7 @@ MAINTAINERS文件中可以找到不同话题对应的邮件列表。
 这几行。将你的评论加在被引用的段落之间而不要放在邮件的顶部。
 
 如果你在邮件中附带补丁，请确认它们是可以直接阅读的纯文本（如
-Documentation/SubmittingPatches文档中所述）。内核开发者们不希望遇到附件
+Documentation/process/submitting-patches.rst文档中所述）。内核开发者们不希望遇到附件
 或者被压缩了的补丁。只有这样才能保证他们可以直接评论你的每行代码。请确保
 你使用的邮件发送程序不会修改空格和制表符。一个防范性的测试方法是先将邮件
 发送给自己，然后自己尝试是否可以顺利地打上收到的补丁。如果测试不成功，请
diff --git a/Documentation/zh_CN/SecurityBugs b/Documentation/zh_CN/SecurityBugs
index d21eb07fe943..2d0fffd122ce 100644
--- a/Documentation/zh_CN/SecurityBugs
+++ b/Documentation/zh_CN/SecurityBugs
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/SecurityBugs
+Chinese translated version of Documentation/admin-guide/security-bugs.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -8,7 +8,7 @@ or if there is a problem with the translation.
 
 Chinese maintainer: Harry Wei <harryxiyou@gmail.com>
 ---------------------------------------------------------------------
-Documentation/SecurityBugs 的中文翻译
+Documentation/admin-guide/security-bugs.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
@@ -31,7 +31,7 @@ linux内核安全团队可以通过email<security@kernel.org>来联系。这是
 一组独立的安全工作人员，可以帮助改善漏洞报告并且公布和取消一个修复。安
 全团队有可能会从部分的维护者那里引进额外的帮助来了解并且修复安全漏洞。
 当遇到任何漏洞，所能提供的信息越多就越能诊断和修复。如果你不清楚什么
-是有帮助的信息，那就请重温一下REPORTING-BUGS文件中的概述过程。任
+是有帮助的信息，那就请重温一下admin-guide/reporting-bugs.rst文件中的概述过程。任
 何攻击性的代码都是非常有用的，未经报告者的同意不会被取消，除非它已经
 被公布于众。
 
diff --git a/Documentation/zh_CN/SubmittingDrivers b/Documentation/zh_CN/SubmittingDrivers
index d313f5d8448d..929385e4b194 100644
--- a/Documentation/zh_CN/SubmittingDrivers
+++ b/Documentation/zh_CN/SubmittingDrivers
@@ -1,4 +1,4 @@
-﻿Chinese translated version of Documentation/SubmittingDrivers
+﻿Chinese translated version of Documentation/process/submitting-drivers.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -8,7 +8,7 @@ or if there is a problem with the translation.
 
 Chinese maintainer: Li Yang <leo@zh-kernel.org>
 ---------------------------------------------------------------------
-Documentation/SubmittingDrivers 的中文翻译
+Documentation/process/submitting-drivers.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
@@ -30,7 +30,7 @@ Documentation/SubmittingDrivers 的中文翻译
 兴趣的是显卡驱动程序，你也许应该访问 XFree86 项目(http://www.xfree86.org/)
 和／或 X.org 项目 (http://x.org)。
 
-另请参阅 Documentation/SubmittingPatches 文档。
+另请参阅 Documentation/process/submitting-patches.rst 文档。
 
 
 分配设备号
@@ -39,7 +39,7 @@ Documentation/SubmittingDrivers 的中文翻译
 块设备和字符设备的主设备号与从设备号是由 Linux 命名编号分配权威 LANANA（
 现在是 Torben Mathiasen）负责分配。申请的网址是 http://www.lanana.org/。
 即使不准备提交到主流内核的设备驱动也需要在这里分配设备号。有关详细信息，
-请参阅 Documentation/devices.txt。
+请参阅 Documentation/admin-guide/devices.rst。
 
 如果你使用的不是已经分配的设备号，那么当你提交设备驱动的时候，它将会被强
 制分配一个新的设备号，即便这个设备号和你之前发给客户的截然不同。
@@ -81,7 +81,7 @@ Linux 2.6:
 		如果你需要一个 Linux 和 NT 的通用驱动接口，那么请在用
 		户空间实现它。
 
-代码：		请使用 Documentation/CodingStyle 中所描述的 Linux 代码风
+代码：		请使用 Documentation/process/coding-style.rst 中所描述的 Linux 代码风
 		格。如果你的某些代码段（例如那些与 Windows 驱动程序包共
 		享的代码段）需要使用其他格式，而你却只希望维护一份代码，
 		那么请将它们很好地区分出来，并且注明原因。
@@ -107,7 +107,7 @@ Linux 2.6:
 		程序测试的指导，请参阅
 		Documentation/power/drivers-testing.txt。有关驱动程序电
 		源管理问题相对全面的概述，请参阅
-		Documentation/power/devices.txt。
+		Documentation/power/admin-guide/devices.rst。
 
 管理：		如果一个驱动程序的作者还在进行有效的维护，那么通常除了那
 		些明显正确且不需要任何检查的补丁以外，其他所有的补丁都会
diff --git a/Documentation/zh_CN/SubmittingPatches b/Documentation/zh_CN/SubmittingPatches
index 1d3a10f8746b..e9098da8f1a4 100644
--- a/Documentation/zh_CN/SubmittingPatches
+++ b/Documentation/zh_CN/SubmittingPatches
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/SubmittingPatches
+Chinese translated version of Documentation/process/submitting-patches.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -8,7 +8,7 @@ or if there is a problem with the translation.
 
 Chinese maintainer: TripleX Chung <triplex@zh-kernel.org>
 ---------------------------------------------------------------------
-Documentation/SubmittingPatches 的中文翻译
+Documentation/process/submitting-patches.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
@@ -30,9 +30,9 @@ Documentation/SubmittingPatches 的中文翻译
 对于想要将改动提交到 Linux 内核的个人或者公司来说，如果不熟悉“规矩”，
 提交的流程会让人畏惧。本文档收集了一系列建议，这些建议可以大大的提高你
 的改动被接受的机会。
-阅读 Documentation/SubmitChecklist 来获得在提交代码前需要检查的项目的列
+阅读 Documentation/process/submit-checklist.rst 来获得在提交代码前需要检查的项目的列
 表。如果你在提交一个驱动程序，那么同时阅读一下
-Documentation/SubmittingDrivers 。
+Documentation/process/submitting-drivers.rst 。
 
 
 --------------------------
@@ -338,7 +338,7 @@ e-mail 标题中的“一句话概述”扼要的描述 e-mail 中的补丁。
 本节包含很多和提交到内核的代码有关的通常的"规则"。事情永远有例外...但是
 你必须真的有好的理由这样做。你可以把本节叫做Linus的计算机科学入门课。
 
-1) 读 Document/CodingStyle
+1) 读 Document/process/coding-style.rst
 
 Nuff 说过，如果你的代码和这个偏离太多，那么它有可能会被拒绝，没有更多的
 审查，没有更多的评价。
@@ -404,8 +404,8 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
 NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
   <https://lkml.org/lkml/2005/7/11/336>
 
-Kernel Documentation/CodingStyle:
-  <http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
+Kernel Documentation/process/coding-style.rst:
+  <http://sosdg.org/~coywolf/lxr/source/Documentation/process/coding-style.rst>
 
 Linus Torvalds's mail on the canonical patch format:
   <http://lkml.org/lkml/2005/4/7/183>
diff --git a/Documentation/zh_CN/arm/Booting b/Documentation/zh_CN/arm/Booting
index 6158a64df80c..1fe866f8218f 100644
--- a/Documentation/zh_CN/arm/Booting
+++ b/Documentation/zh_CN/arm/Booting
@@ -68,7 +68,7 @@ RAM，或可能使用对这个设备已知的 RAM 信息，还可能使用任何
 作为替代方案，引导加载程序也可以通过标签列表传递相关的'console='
 选项给内核以指定某个串口，而串口数据格式的选项在以下文档中描述：
 
-       Documentation/kernel-parameters.txt。
+       Documentation/admin-guide/kernel-parameters.rst。
 
 
 3、检测机器类型
diff --git a/Documentation/zh_CN/email-clients.txt b/Documentation/zh_CN/email-clients.txt
index b9a1a3e6c78d..ec31d97e8d0e 100644
--- a/Documentation/zh_CN/email-clients.txt
+++ b/Documentation/zh_CN/email-clients.txt
@@ -1,4 +1,4 @@
-﻿Chinese translated version of Documentation/email-clients.txt
+﻿Chinese translated version of Documentation/process/email-clients.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -8,7 +8,7 @@ or if there is a problem with the translation.
 
 Chinese maintainer: Harry Wei <harryxiyou@gmail.com>
 ---------------------------------------------------------------------
-Documentation/email-clients.txt 的中文翻译
+Documentation/process/email-clients.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/zh_CN/oops-tracing.txt b/Documentation/zh_CN/oops-tracing.txt
index 9312608ffb8d..41ab53cc0e83 100644
--- a/Documentation/zh_CN/oops-tracing.txt
+++ b/Documentation/zh_CN/oops-tracing.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/oops-tracing.txt
+Chinese translated version of Documentation/admin-guide/oops-tracing.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -8,7 +8,7 @@ or if there is a problem with the translation.
 
 Chinese maintainer: Dave Young <hidave.darkstar@gmail.com>
 ---------------------------------------------------------------------
-Documentation/oops-tracing.txt 的中文翻译
+Documentation/admin-guide/oops-tracing.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
@@ -50,7 +50,7 @@ cat /proc/kmsg > file， 然而你必须介入中止传输， kmsg是一个“
 息滚动到了终端的上面，你会发现以高分辩率启动（比如，vga=791）会让你读到更多的文
 本。（注意：这需要vesafb，所以对‘早期’的oops没有帮助）
 
-（2）用串口终端启动（请参看Documentation/serial-console.txt），运行一个null
+（2）用串口终端启动（请参看Documentation/admin-guide/serial-console.rst），运行一个null
 modem到另一台机器并用你喜欢的通讯工具获取输出。Minicom工作地很好。
 
 （3）使用Kdump（请参看Documentation/kdump/kdump.txt），
diff --git a/Documentation/zh_CN/stable_api_nonsense.txt b/Documentation/zh_CN/stable_api_nonsense.txt
index c26a27d1ee7d..a2b27fab382c 100644
--- a/Documentation/zh_CN/stable_api_nonsense.txt
+++ b/Documentation/zh_CN/stable_api_nonsense.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/stable_api_nonsense.txt
+Chinese translated version of Documentation/process/stable-api-nonsense.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have problem
@@ -9,7 +9,7 @@ is problem with translation.
 Maintainer: Greg Kroah-Hartman <greg@kroah.com>
 Chinese maintainer: TripleX Chung <zhongyu@18mail.cn>
 ---------------------------------------------------------------------
-Documentation/stable_api_nonsense.txt 的中文翻译
+Documentation/process/stable-api-nonsense.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/zh_CN/stable_kernel_rules.txt b/Documentation/zh_CN/stable_kernel_rules.txt
index 26ea5ed7cd9c..db4ba5a0c39a 100644
--- a/Documentation/zh_CN/stable_kernel_rules.txt
+++ b/Documentation/zh_CN/stable_kernel_rules.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/stable_kernel_rules.txt
+Chinese translated version of Documentation/process/stable-kernel-rules.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -8,7 +8,7 @@ or if there is a problem with the translation.
 
 Chinese maintainer: TripleX Chung <triplex@zh-kernel.org>
 ---------------------------------------------------------------------
-Documentation/stable_kernel_rules.txt 的中文翻译
+Documentation/process/stable-kernel-rules.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
@@ -38,7 +38,7 @@ Documentation/stable_kernel_rules.txt 的中文翻译
   - 没有“理论上的竞争条件”，除非能给出竞争条件如何被利用的解释。
   - 不能存在任何的“琐碎的”修正（拼写修正，去掉多余空格之类的）。
   - 必须被相关子系统的维护者接受。
-  - 必须遵循Documentation/SubmittingPatches里的规则。
+  - 必须遵循Documentation/process/submitting-patches.rst里的规则。
 
 向稳定版代码树提交补丁的过程：
 
diff --git a/Documentation/zh_CN/volatile-considered-harmful.txt b/Documentation/zh_CN/volatile-considered-harmful.txt
index ba8149d2233a..475125967197 100644
--- a/Documentation/zh_CN/volatile-considered-harmful.txt
+++ b/Documentation/zh_CN/volatile-considered-harmful.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/volatile-considered-harmful.txt
+Chinese translated version of Documentation/process/volatile-considered-harmful.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
 Maintainer: Jonathan Corbet <corbet@lwn.net>
 Chinese maintainer: Bryan Wu <bryan.wu@analog.com>
 ---------------------------------------------------------------------
-Documentation/volatile-considered-harmful.txt 的中文翻译
+Documentation/process/volatile-considered-harmful.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/MAINTAINERS b/MAINTAINERS
index de0451df542f..69820b75b2e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -35,13 +35,13 @@ trivial patch so apply some common sense.
 
 	PLEASE check your patch with the automated style checker
 	(scripts/checkpatch.pl) to catch trivial style violations.
-	See Documentation/CodingStyle for guidance here.
+	See Documentation/process/coding-style.rst for guidance here.
 
 	PLEASE CC: the maintainers and mailing lists that are generated
 	by scripts/get_maintainer.pl.  The results returned by the
 	script will be best if you have git installed and are making
 	your changes in a branch derived from Linus' latest git tree.
-	See Documentation/SubmittingPatches for details.
+	See Documentation/process/submitting-patches.rst for details.
 
 	PLEASE try to include any credit lines you want added with the
 	patch. It avoids people being missed off by mistake and makes
@@ -54,7 +54,7 @@ trivial patch so apply some common sense.
 	of the Linux Foundation certificate of contribution and should
 	include a Signed-off-by: line.  The current version of this
 	"Developer's Certificate of Origin" (DCO) is listed in the file
-	Documentation/SubmittingPatches.
+	Documentation/process/submitting-patches.rst.
 
 6.	Make sure you have the right to send any changes you make. If you
 	do changes at work you may find your employer owns the patch
@@ -2924,7 +2924,7 @@ CAPELLA MICROSYSTEMS LIGHT SENSOR DRIVER
 M:	Kevin Tsai <ktsai@capellamicro.com>
 S:	Maintained
 F:	drivers/iio/light/cm*
-F:	Documentation/devicetree/bindings/i2c/trivial-devices.txt
+F:	Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst
 
 CAVIUM I2C DRIVER
 M:	Jan Glauber <jglauber@cavium.com>
@@ -11438,7 +11438,7 @@ STABLE BRANCH
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 L:	stable@vger.kernel.org
 S:	Supported
-F:	Documentation/stable_kernel_rules.txt
+F:	Documentation/process/stable-kernel-rules.rst
 
 STAGING SUBSYSTEM
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bada636d1065..19d237b0737d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1525,7 +1525,7 @@ config X86_CHECK_BIOS_CORRUPTION
 	  line.  By default it scans the low 64k of memory every 60
 	  seconds; see the memory_corruption_check_size and
 	  memory_corruption_check_period parameters in
-	  Documentation/kernel-parameters.txt to adjust this.
+	  Documentation/admin-guide/kernel-parameters.rst to adjust this.
 
 	  When enabled with the default parameters, this option has
 	  almost no overhead, as it reserves a relatively small amount
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 535e7828445a..c5f9cbe0ae21 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -342,7 +342,7 @@ config ACPI_DEBUG
 
 	  Use the acpi.debug_layer and acpi.debug_level kernel command-line
 	  parameters documented in Documentation/acpi/debug.txt and
-	  Documentation/kernel-parameters.txt to control the type and
+	  Documentation/admin-guide/kernel-parameters.rst to control the type and
 	  amount of debug output.
 
 config ACPI_PCI_SLOT
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 223a770f78f3..59ce0dd50701 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -129,7 +129,7 @@ static int ata_force_tbl_size;
 static char ata_force_param_buf[PAGE_SIZE] __initdata;
 /* param_buf is thrown away after initialization, disallow read */
 module_param_string(force, ata_force_param_buf, sizeof(ata_force_param_buf), 0);
-MODULE_PARM_DESC(force, "Force ATA configurations including cable type, link speed and transfer mode (see Documentation/kernel-parameters.txt for details)");
+MODULE_PARM_DESC(force, "Force ATA configurations including cable type, link speed and transfer mode (see Documentation/admin-guide/kernel-parameters.rst for details)");
 
 static int atapi_enabled = 1;
 module_param(atapi_enabled, int, 0444);
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index c115217c79ae..e051fc8aa7d7 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -14,7 +14,7 @@
   * (C) 2000,2001,2002,2003,2004 Omnikey AG
   *
   * (C) 2005-2006 Harald Welte <laforge@gnumonks.org>
-  * 	- Adhere to Kernel CodingStyle
+  * 	- Adhere to Kernel process/coding-style.rst
   * 	- Port to 2.6.13 "new" style PCMCIA
   * 	- Check for copy_{from,to}_user return values
   * 	- Use nonseekable_open()
@@ -151,7 +151,7 @@ static struct pcmcia_device *dev_table[CM4000_MAX_DEV];
 static struct class *cmm_class;
 
 /* This table doesn't use spaces after the comma between fields and thus
- * violates CodingStyle.  However, I don't really think wrapping it around will
+ * violates process/coding-style.rst.  However, I don't really think wrapping it around will
  * make it any clearer to read -HW */
 static unsigned char fi_di_table[10][14] = {
 /*FI     00   01   02   03   04   05   06   07   08   09   10   11   12   13 */
diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
index db9538d4b358..a7be12d9a139 100644
--- a/drivers/net/can/grcan.c
+++ b/drivers/net/can/grcan.c
@@ -15,7 +15,7 @@
  * See "Documentation/ABI/testing/sysfs-class-net-grcan" for information on the
  * sysfs interface.
  *
- * See "Documentation/kernel-parameters.txt" for information on the module
+ * See "Documentation/admin-guide/kernel-parameters.rst" for information on the module
  * parameters.
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 8b2b740d6679..b20ce7da1ee4 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -28,7 +28,7 @@ config BLK_DEV_PMEM
 	  non-standard OEM-specific E820 memory type (type-12, see
 	  CONFIG_X86_PMEM_LEGACY), or it is manually specified by the
 	  'memmap=nn[KMG]!ss[KMG]' kernel command line (see
-	  Documentation/kernel-parameters.txt).  This driver converts
+	  Documentation/admin-guide/kernel-parameters.rst).  This driver converts
 	  these persistent memory ranges into block devices that are
 	  capable of DAX (direct-access) file system mappings.  See
 	  Documentation/nvdimm/nvdimm.txt for more details.
diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c
index 5dd430f8f921..d84dffb894f4 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -47,7 +47,7 @@ static const char driver_name[] = "vme_user";
 static int bus[VME_USER_BUS_MAX];
 static unsigned int bus_num;
 
-/* Currently Documentation/devices.txt defines the following for VME:
+/* Currently Documentation/admin-guide/devices.rst defines the following for VME:
  *
  * 221 char	VME bus
  *		  0 = /dev/bus/vme/m0		First master image
diff --git a/drivers/video/fbdev/skeletonfb.c b/drivers/video/fbdev/skeletonfb.c
index f948baa16d82..e219a0a22077 100644
--- a/drivers/video/fbdev/skeletonfb.c
+++ b/drivers/video/fbdev/skeletonfb.c
@@ -836,7 +836,7 @@ static void xxxfb_remove(struct pci_dev *dev)
  *	@dev: PCI device
  *	@msg: the suspend event code.
  *
- *      See Documentation/power/devices.txt for more information
+ *      See Documentation/power/admin-guide/devices.rst for more information
  */
 static int xxxfb_suspend(struct pci_dev *dev, pm_message_t msg)
 {
@@ -851,7 +851,7 @@ static int xxxfb_suspend(struct pci_dev *dev, pm_message_t msg)
  *	xxxfb_resume - Optional but recommended function. Resume the device.
  *	@dev: PCI device
  *
- *      See Documentation/power/devices.txt for more information
+ *      See Documentation/power/admin-guide/devices.rst for more information
  */
 static int xxxfb_resume(struct pci_dev *dev)
 {
@@ -915,7 +915,7 @@ static void __exit xxxfb_exit(void)
  *	@dev: platform device
  *	@msg: the suspend event code.
  *
- *      See Documentation/power/devices.txt for more information
+ *      See Documentation/power/admin-guide/devices.rst for more information
  */
 static int xxxfb_suspend(struct platform_device *dev, pm_message_t msg)
 {
@@ -930,7 +930,7 @@ static int xxxfb_suspend(struct platform_device *dev, pm_message_t msg)
  *	xxxfb_resume - Optional but recommended function. Resume the device.
  *	@dev: platform device
  *
- *      See Documentation/power/devices.txt for more information
+ *      See Documentation/power/admin-guide/devices.rst for more information
  */
 static int xxxfb_resume(struct platform_dev *dev)
 {
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 77590320d44c..623f72334fa5 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -75,7 +75,7 @@ config VIRTIO_MMIO_CMDLINE_DEVICES
 	 Allow virtio-mmio devices instantiation via the kernel command line
 	 or module parameters. Be aware that using incorrect parameters (base
 	 address in particular) can crash your system - you have been warned.
-	 See Documentation/kernel-parameters.txt for details.
+	 See Documentation/admin-guide/kernel-parameters.rst for details.
 
 	 If unsure, say 'N'.
 
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 4c09d93d9569..b2f82cf6bf86 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -170,8 +170,8 @@ config BINFMT_MISC
 
 	  You can do other nice things, too. Read the file
 	  <file:Documentation/binfmt_misc.txt> to learn how to use this
-	  feature, <file:Documentation/java.txt> for information about how
-	  to include Java support. and <file:Documentation/mono.txt> for
+	  feature, <file:Documentation/admin-guide/java.rst> for information about how
+	  to include Java support. and <file:Documentation/admin-guide/mono.rst> for
           information about how to include Mono-based .NET support.
 
           To use binfmt_misc, you will need to mount it:
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index be40813eff52..b42e5bd6d8ff 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -86,4 +86,4 @@ config PSTORE_RAM
 	  Note that for historical reasons, the module will be named
 	  "ramoops.ko".
 
-	  For more information, see Documentation/ramoops.txt.
+	  For more information, see Documentation/admin-guide/ramoops.rst.
diff --git a/include/linux/device.h b/include/linux/device.h
index bc41e87a969b..36d3a9867da9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -733,7 +733,7 @@ struct device_dma_parameters {
  * 		minimizes board-specific #ifdefs in drivers.
  * @driver_data: Private pointer for driver specific info.
  * @power:	For device power management.
- * 		See Documentation/power/devices.txt for details.
+ * 		See Documentation/power/admin-guide/devices.rst for details.
  * @pm_domain:	Provide callbacks that are executed during system suspend,
  * 		hibernation, system resume and during runtime PM transitions
  * 		along with subsystem-level and driver-level callbacks.
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 06eb353182ab..efa67b2dfee9 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -258,7 +258,7 @@ typedef struct pm_message {
  * example, if it detects that a child was unplugged while the system was
  * asleep).
  *
- * Refer to Documentation/power/devices.txt for more information about the role
+ * Refer to Documentation/power/admin-guide/devices.rst for more information about the role
  * of the above callbacks in the system suspend process.
  *
  * There also are callbacks related to runtime power management of devices.
diff --git a/include/uapi/linux/major.h b/include/uapi/linux/major.h
index 620252e69b44..19e195bee990 100644
--- a/include/uapi/linux/major.h
+++ b/include/uapi/linux/major.h
@@ -3,7 +3,7 @@
 
 /*
  * This file has definitions for major device numbers.
- * For the device number assignments, see Documentation/devices.txt.
+ * For the device number assignments, see Documentation/admin-guide/devices.rst.
  */
 
 #define UNNAMED_MAJOR		0
diff --git a/init/Kconfig b/init/Kconfig
index 34407f15e6d3..172f80ea0d58 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1306,7 +1306,7 @@ config BLK_DEV_INITRD
 	  boot loader (loadlin or lilo) and that is mounted as root
 	  before the normal boot procedure. It is typically used to
 	  load modules needed to mount the "real" root file system,
-	  etc. See <file:Documentation/initrd.txt> for details.
+	  etc. See <file:Documentation/admin-guide/initrd.rst> for details.
 
 	  If RAM disk support (BLK_DEV_RAM) is also included, this
 	  also enables initial RAM disk (initrd) support and adds
diff --git a/init/main.c b/init/main.c
index 2858be732f6d..691eb9351a83 100644
--- a/init/main.c
+++ b/init/main.c
@@ -980,7 +980,7 @@ static int __ref kernel_init(void *unused)
 		return 0;
 
 	panic("No working init found.  Try passing init= option to kernel. "
-	      "See Linux Documentation/init.txt for guidance.");
+	      "See Linux Documentation/admin-guide/init.rst for guidance.");
 }
 
 static noinline void __init kernel_init_freeable(void)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 33bc56cf60d7..d2df3a93284b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -13,7 +13,7 @@ config PRINTK_TIME
 	  be included, not that the timestamp is recorded.
 
 	  The behavior is also controlled by the kernel command line
-	  parameter printk.time=1. See Documentation/kernel-parameters.txt
+	  parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst
 
 config MESSAGE_LOGLEVEL_DEFAULT
 	int "Default message log level (1-7)"
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a8368d1c4348..d0c729ccec20 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2187,7 +2187,7 @@ sub process {
 
 		if ($rawline=~/^\+\+\+\s+(\S+)/) {
 			$setup_docs = 0;
-			if ($1 =~ m@Documentation/kernel-parameters.txt$@) {
+			if ($1 =~ m@Documentation/admin-guide/kernel-parameters.rst$@) {
 				$setup_docs = 1;
 			}
 			#next;
@@ -5102,7 +5102,7 @@ sub process {
 		my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b};
 		if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/) {
 			WARN("VOLATILE",
-			     "Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
+			     "Use of volatile is usually wrong: see Documentation/process/volatile-considered-harmful.rst\n" . $herecurr);
 		}
 
 # Check for user-visible strings broken across lines, which breaks the ability
@@ -5817,7 +5817,7 @@ sub process {
 
 			if (!grep(/$name/, @setup_docs)) {
 				CHK("UNDOCUMENTED_SETUP",
-				    "__setup appears un-documented -- check Documentation/kernel-parameters.txt\n" . $herecurr);
+				    "__setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.rst\n" . $herecurr);
 			}
 		}
 
diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README
index 0558bb9ce0a6..f3926c33ed4c 100644
--- a/tools/testing/selftests/futex/README
+++ b/tools/testing/selftests/futex/README
@@ -59,4 +59,4 @@ o FIXME: decide on a sane test naming scheme.  Currently the tests are named
 Coding Style
 ------------
 o The Futex Test project adheres to the coding standards set forth by Linux
-  kernel as defined in the Linux source Documentation/CodingStyle.
+  kernel as defined in the Linux source Documentation/process/coding-style.rst.
-- 
cgit v1.2.3


From 864e2fe935228c5c551da8638b5fdd07f82d04a7 Mon Sep 17 00:00:00 2001
From: Amitesh Singh <singh.amitesh@gmail.com>
Date: Fri, 30 Sep 2016 22:40:57 +0530
Subject: usb: fix a typo in usb_class_driver documentation

replace usb_unregister_dev by usb_deregister_dev

Signed-off-by: Amitesh Singh <singh.amitesh@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index eba1f10e8cfd..7e68259360de 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1160,7 +1160,7 @@ extern struct bus_type usb_bus_type;
  * @minor_base: the start of the minor range for this driver.
  *
  * This structure is used for the usb_register_dev() and
- * usb_unregister_dev() functions, to consolidate a number of the
+ * usb_deregister_dev() functions, to consolidate a number of the
  * parameters used for them.
  */
 struct usb_class_driver {
-- 
cgit v1.2.3


From 08bcd3edec2559833aa0ed0213cc300fc9705dd6 Mon Sep 17 00:00:00 2001
From: Anthony Best <anthonybest@bestanthony.com>
Date: Tue, 4 Oct 2016 14:15:42 -0600
Subject: gpio: fix struct gpio_chip comment

It should have been @reg_clr instead of @reg_clk

Signed-off-by: Anthony Best <anthonybest@bestanthony.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 24e2cc56beb1..2dfcf25b1724 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -91,7 +91,7 @@ enum single_ended_mode {
  *	bit. This callback assigns the right bit mask.
  * @reg_dat: data (in) register for generic GPIO
  * @reg_set: output set register (out=high) for generic GPIO
- * @reg_clk: output clear register (out=low) for generic GPIO
+ * @reg_clr: output clear register (out=low) for generic GPIO
  * @reg_dir: direction setting register for generic GPIO
  * @bgpio_bits: number of register bits used for a generic GPIO i.e.
  *	<register width> * 8
-- 
cgit v1.2.3


From daa6e41ce2b594e7d622b4cd3978defca1139666 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 6 Sep 2016 15:18:29 -0700
Subject: soc: qcom: wcnss_ctrl: Stub wcnss_ctrl API

Stub the wcnss_ctrl API to allow compile testing wcnss function drivers.

Cc: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 include/linux/soc/qcom/wcnss_ctrl.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/qcom/wcnss_ctrl.h b/include/linux/soc/qcom/wcnss_ctrl.h
index a37bc5538f19..eab64976a73b 100644
--- a/include/linux/soc/qcom/wcnss_ctrl.h
+++ b/include/linux/soc/qcom/wcnss_ctrl.h
@@ -3,6 +3,19 @@
 
 #include <linux/soc/qcom/smd.h>
 
+#if IS_ENABLED(CONFIG_QCOM_WCNSS_CTRL)
+
 struct qcom_smd_channel *qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb);
 
+#else
+
+static inline struct qcom_smd_channel*
+qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb)
+{
+	WARN_ON(1);
+	return ERR_PTR(-ENXIO);
+}
+
+#endif
+
 #endif
-- 
cgit v1.2.3


From c8d283ff8b0b6b2061dfc137afd6c56608a34bcb Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Thu, 20 Oct 2016 21:20:59 +0200
Subject: crypto: ccp - fix typo "CPP"

The abbreviation for Cryptographic Coprocessor is "CCP".

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Acked-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/ccp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index a7653339fedb..c71dd8fa5764 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -11,8 +11,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef __CPP_H__
-#define __CPP_H__
+#ifndef __CCP_H__
+#define __CCP_H__
 
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
@@ -553,7 +553,7 @@ enum ccp_engine {
 #define CCP_CMD_PASSTHRU_NO_DMA_MAP	0x00000002
 
 /**
- * struct ccp_cmd - CPP operation request
+ * struct ccp_cmd - CCP operation request
  * @entry: list element (ccp driver use only)
  * @work: work element used for callbacks (ccp driver use only)
  * @ccp: CCP device to be run on (ccp driver use only)
-- 
cgit v1.2.3


From 2ebda74fd6c9d3fc3b9f0234fc519795e23025a5 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Fri, 21 Oct 2016 13:19:47 +0100
Subject: crypto: acomp - add asynchronous compression api

Add acomp, an asynchronous compression api that uses scatterlist
buffers.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig                      |  10 ++
 crypto/Makefile                     |   2 +
 crypto/acompress.c                  | 118 +++++++++++++++
 crypto/crypto_user.c                |  19 +++
 include/crypto/acompress.h          | 281 ++++++++++++++++++++++++++++++++++++
 include/crypto/internal/acompress.h |  66 +++++++++
 include/linux/crypto.h              |   1 +
 include/uapi/linux/cryptouser.h     |   5 +
 8 files changed, 502 insertions(+)
 create mode 100644 crypto/acompress.c
 create mode 100644 include/crypto/acompress.h
 create mode 100644 include/crypto/internal/acompress.h

(limited to 'include/linux')

diff --git a/crypto/Kconfig b/crypto/Kconfig
index fd288053b1c5..9950c47c9d27 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -102,6 +102,15 @@ config CRYPTO_KPP
 	select CRYPTO_ALGAPI
 	select CRYPTO_KPP2
 
+config CRYPTO_ACOMP2
+	tristate
+	select CRYPTO_ALGAPI2
+
+config CRYPTO_ACOMP
+	tristate
+	select CRYPTO_ALGAPI
+	select CRYPTO_ACOMP2
+
 config CRYPTO_RSA
 	tristate "RSA algorithm"
 	select CRYPTO_AKCIPHER
@@ -138,6 +147,7 @@ config CRYPTO_MANAGER2
 	select CRYPTO_BLKCIPHER2
 	select CRYPTO_AKCIPHER2
 	select CRYPTO_KPP2
+	select CRYPTO_ACOMP2
 
 config CRYPTO_USER
 	tristate "Userspace cryptographic algorithm configuration"
diff --git a/crypto/Makefile b/crypto/Makefile
index 99cc64ac70ef..0933dc6bd24c 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -50,6 +50,8 @@ rsa_generic-y += rsa_helper.o
 rsa_generic-y += rsa-pkcs1pad.o
 obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o
 
+obj-$(CONFIG_CRYPTO_ACOMP2) += acompress.o
+
 cryptomgr-y := algboss.o testmgr.o
 
 obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
diff --git a/crypto/acompress.c b/crypto/acompress.c
new file mode 100644
index 000000000000..4977279476d3
--- /dev/null
+++ b/crypto/acompress.c
@@ -0,0 +1,118 @@
+/*
+ * Asynchronous Compression operations
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Weigang Li <weigang.li@intel.com>
+ *          Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <linux/cryptouser.h>
+#include <net/netlink.h>
+#include <crypto/internal/acompress.h>
+#include "internal.h"
+
+#ifdef CONFIG_NET
+static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_acomp racomp;
+
+	strncpy(racomp.type, "acomp", sizeof(racomp.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
+		    sizeof(struct crypto_report_acomp), &racomp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+#else
+static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	return -ENOSYS;
+}
+#endif
+
+static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+
+static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	seq_puts(m, "type         : acomp\n");
+}
+
+static void crypto_acomp_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm);
+	struct acomp_alg *alg = crypto_acomp_alg(acomp);
+
+	alg->exit(acomp);
+}
+
+static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm);
+	struct acomp_alg *alg = crypto_acomp_alg(acomp);
+
+	if (alg->exit)
+		acomp->base.exit = crypto_acomp_exit_tfm;
+
+	if (alg->init)
+		return alg->init(acomp);
+
+	return 0;
+}
+
+static const struct crypto_type crypto_acomp_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_acomp_init_tfm,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_acomp_show,
+#endif
+	.report = crypto_acomp_report,
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_ACOMPRESS,
+	.tfmsize = offsetof(struct crypto_acomp, base),
+};
+
+struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type,
+					u32 mask)
+{
+	return crypto_alloc_tfm(alg_name, &crypto_acomp_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_acomp);
+
+int crypto_register_acomp(struct acomp_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+
+	base->cra_type = &crypto_acomp_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_ACOMPRESS;
+
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_acomp);
+
+int crypto_unregister_acomp(struct acomp_alg *alg)
+{
+	return crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_acomp);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Asynchronous compression type");
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 1c5705481c69..a90404a0c5ff 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -112,6 +112,21 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_acomp racomp;
+
+	strncpy(racomp.type, "acomp", sizeof(racomp.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
+		    sizeof(struct crypto_report_acomp), &racomp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_akcipher rakcipher;
@@ -186,7 +201,11 @@ static int crypto_report_one(struct crypto_alg *alg,
 			goto nla_put_failure;
 
 		break;
+	case CRYPTO_ALG_TYPE_ACOMPRESS:
+		if (crypto_report_acomp(skb, alg))
+			goto nla_put_failure;
 
+		break;
 	case CRYPTO_ALG_TYPE_AKCIPHER:
 		if (crypto_report_akcipher(skb, alg))
 			goto nla_put_failure;
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
new file mode 100644
index 000000000000..14c70d887160
--- /dev/null
+++ b/include/crypto/acompress.h
@@ -0,0 +1,281 @@
+/*
+ * Asynchronous Compression operations
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Weigang Li <weigang.li@intel.com>
+ *          Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ACOMP_H
+#define _CRYPTO_ACOMP_H
+#include <linux/crypto.h>
+
+#define CRYPTO_ACOMP_ALLOC_OUTPUT	0x00000001
+
+/**
+ * struct acomp_req - asynchronous (de)compression request
+ *
+ * @base:	Common attributes for asynchronous crypto requests
+ * @src:	Source Data
+ * @dst:	Destination data
+ * @slen:	Size of the input buffer
+ * @dlen:	Size of the output buffer and number of bytes produced
+ * @flags:	Internal flags
+ * @__ctx:	Start of private context data
+ */
+struct acomp_req {
+	struct crypto_async_request base;
+	struct scatterlist *src;
+	struct scatterlist *dst;
+	unsigned int slen;
+	unsigned int dlen;
+	u32 flags;
+	void *__ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+/**
+ * struct crypto_acomp - user-instantiated objects which encapsulate
+ * algorithms and core processing logic
+ *
+ * @base:	Common crypto API algorithm data structure
+ */
+struct crypto_acomp {
+	struct crypto_tfm base;
+};
+
+/**
+ * struct acomp_alg - asynchronous compression algorithm
+ *
+ * @compress:	Function performs a compress operation
+ * @decompress:	Function performs a de-compress operation
+ * @dst_free:	Frees destination buffer if allocated inside the algorithm
+ * @init:	Initialize the cryptographic transformation object.
+ *		This function is used to initialize the cryptographic
+ *		transformation object. This function is called only once at
+ *		the instantiation time, right after the transformation context
+ *		was allocated. In case the cryptographic hardware has some
+ *		special requirements which need to be handled by software, this
+ *		function shall check for the precise requirement of the
+ *		transformation and put any software fallbacks in place.
+ * @exit:	Deinitialize the cryptographic transformation object. This is a
+ *		counterpart to @init, used to remove various changes set in
+ *		@init.
+ *
+ * @reqsize:	Context size for (de)compression requests
+ * @base:	Common crypto API algorithm data structure
+ */
+struct acomp_alg {
+	int (*compress)(struct acomp_req *req);
+	int (*decompress)(struct acomp_req *req);
+	void (*dst_free)(struct scatterlist *dst);
+	int (*init)(struct crypto_acomp *tfm);
+	void (*exit)(struct crypto_acomp *tfm);
+	unsigned int reqsize;
+	struct crypto_alg base;
+};
+
+/**
+ * DOC: Asynchronous Compression API
+ *
+ * The Asynchronous Compression API is used with the algorithms of type
+ * CRYPTO_ALG_TYPE_ACOMPRESS (listed as type "acomp" in /proc/crypto)
+ */
+
+/**
+ * crypto_alloc_acomp() -- allocate ACOMPRESS tfm handle
+ * @alg_name:	is the cra_name / name or cra_driver_name / driver name of the
+ *		compression algorithm e.g. "deflate"
+ * @type:	specifies the type of the algorithm
+ * @mask:	specifies the mask for the algorithm
+ *
+ * Allocate a handle for a compression algorithm. The returned struct
+ * crypto_acomp is the handle that is required for any subsequent
+ * API invocation for the compression operations.
+ *
+ * Return:	allocated handle in case of success; IS_ERR() is true in case
+ *		of an error, PTR_ERR() returns the error code.
+ */
+struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type,
+					u32 mask);
+
+static inline struct crypto_tfm *crypto_acomp_tfm(struct crypto_acomp *tfm)
+{
+	return &tfm->base;
+}
+
+static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg)
+{
+	return container_of(alg, struct acomp_alg, base);
+}
+
+static inline struct crypto_acomp *__crypto_acomp_tfm(struct crypto_tfm *tfm)
+{
+	return container_of(tfm, struct crypto_acomp, base);
+}
+
+static inline struct acomp_alg *crypto_acomp_alg(struct crypto_acomp *tfm)
+{
+	return __crypto_acomp_alg(crypto_acomp_tfm(tfm)->__crt_alg);
+}
+
+static inline unsigned int crypto_acomp_reqsize(struct crypto_acomp *tfm)
+{
+	return crypto_acomp_alg(tfm)->reqsize;
+}
+
+static inline void acomp_request_set_tfm(struct acomp_req *req,
+					 struct crypto_acomp *tfm)
+{
+	req->base.tfm = crypto_acomp_tfm(tfm);
+}
+
+static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req)
+{
+	return __crypto_acomp_tfm(req->base.tfm);
+}
+
+/**
+ * crypto_free_acomp() -- free ACOMPRESS tfm handle
+ *
+ * @tfm:	ACOMPRESS tfm handle allocated with crypto_alloc_acomp()
+ */
+static inline void crypto_free_acomp(struct crypto_acomp *tfm)
+{
+	crypto_destroy_tfm(tfm, crypto_acomp_tfm(tfm));
+}
+
+static inline int crypto_has_acomp(const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_ACOMPRESS;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return crypto_has_alg(alg_name, type, mask);
+}
+
+/**
+ * acomp_request_alloc() -- allocates asynchronous (de)compression request
+ *
+ * @tfm:	ACOMPRESS tfm handle allocated with crypto_alloc_acomp()
+ *
+ * Return:	allocated handle in case of success or NULL in case of an error
+ */
+static inline struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm)
+{
+	struct acomp_req *req;
+
+	req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL);
+	if (likely(req))
+		acomp_request_set_tfm(req, tfm);
+
+	return req;
+}
+
+/**
+ * acomp_request_free() -- zeroize and free asynchronous (de)compression
+ *			   request as well as the output buffer if allocated
+ *			   inside the algorithm
+ *
+ * @req:	request to free
+ */
+static inline void acomp_request_free(struct acomp_req *req)
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	struct acomp_alg *alg = crypto_acomp_alg(tfm);
+
+	if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) {
+		alg->dst_free(req->dst);
+		req->dst = NULL;
+	}
+	kzfree(req);
+}
+
+/**
+ * acomp_request_set_callback() -- Sets an asynchronous callback
+ *
+ * Callback will be called when an asynchronous operation on a given
+ * request is finished.
+ *
+ * @req:	request that the callback will be set for
+ * @flgs:	specify for instance if the operation may backlog
+ * @cmlp:	callback which will be called
+ * @data:	private data used by the caller
+ */
+static inline void acomp_request_set_callback(struct acomp_req *req,
+					      u32 flgs,
+					      crypto_completion_t cmpl,
+					      void *data)
+{
+	req->base.complete = cmpl;
+	req->base.data = data;
+	req->base.flags = flgs;
+}
+
+/**
+ * acomp_request_set_params() -- Sets request parameters
+ *
+ * Sets parameters required by an acomp operation
+ *
+ * @req:	asynchronous compress request
+ * @src:	pointer to input buffer scatterlist
+ * @dst:	pointer to output buffer scatterlist. If this is NULL, the
+ *		acomp layer will allocate the output memory
+ * @slen:	size of the input buffer
+ * @dlen:	size of the output buffer. If dst is NULL, this can be used by
+ *		the user to specify the maximum amount of memory to allocate
+ */
+static inline void acomp_request_set_params(struct acomp_req *req,
+					    struct scatterlist *src,
+					    struct scatterlist *dst,
+					    unsigned int slen,
+					    unsigned int dlen)
+{
+	req->src = src;
+	req->dst = dst;
+	req->slen = slen;
+	req->dlen = dlen;
+
+	if (!req->dst)
+		req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT;
+}
+
+/**
+ * crypto_acomp_compress() -- Invoke asynchronous compress operation
+ *
+ * Function invokes the asynchronous compress operation
+ *
+ * @req:	asynchronous compress request
+ *
+ * Return:	zero on success; error code in case of error
+ */
+static inline int crypto_acomp_compress(struct acomp_req *req)
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	struct acomp_alg *alg = crypto_acomp_alg(tfm);
+
+	return alg->compress(req);
+}
+
+/**
+ * crypto_acomp_decompress() -- Invoke asynchronous decompress operation
+ *
+ * Function invokes the asynchronous decompress operation
+ *
+ * @req:	asynchronous compress request
+ *
+ * Return:	zero on success; error code in case of error
+ */
+static inline int crypto_acomp_decompress(struct acomp_req *req)
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	struct acomp_alg *alg = crypto_acomp_alg(tfm);
+
+	return alg->decompress(req);
+}
+
+#endif
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
new file mode 100644
index 000000000000..a9a9000d1aea
--- /dev/null
+++ b/include/crypto/internal/acompress.h
@@ -0,0 +1,66 @@
+/*
+ * Asynchronous Compression operations
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Weigang Li <weigang.li@intel.com>
+ *          Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ACOMP_INT_H
+#define _CRYPTO_ACOMP_INT_H
+#include <crypto/acompress.h>
+
+/*
+ * Transform internal helpers.
+ */
+static inline void *acomp_request_ctx(struct acomp_req *req)
+{
+	return req->__ctx;
+}
+
+static inline void *acomp_tfm_ctx(struct crypto_acomp *tfm)
+{
+	return tfm->base.__crt_ctx;
+}
+
+static inline void acomp_request_complete(struct acomp_req *req,
+					  int err)
+{
+	req->base.complete(&req->base, err);
+}
+
+static inline const char *acomp_alg_name(struct crypto_acomp *tfm)
+{
+	return crypto_acomp_tfm(tfm)->__crt_alg->cra_name;
+}
+
+/**
+ * crypto_register_acomp() -- Register asynchronous compression algorithm
+ *
+ * Function registers an implementation of an asynchronous
+ * compression algorithm
+ *
+ * @alg:	algorithm definition
+ *
+ * Return:	zero on success; error code in case of error
+ */
+int crypto_register_acomp(struct acomp_alg *alg);
+
+/**
+ * crypto_unregister_acomp() -- Unregister asynchronous compression algorithm
+ *
+ * Function unregisters an implementation of an asynchronous
+ * compression algorithm
+ *
+ * @alg:	algorithm definition
+ *
+ * Return:	zero on success; error code in case of error
+ */
+int crypto_unregister_acomp(struct acomp_alg *alg);
+
+#endif
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 7cee5551625b..dc57a0505505 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -50,6 +50,7 @@
 #define CRYPTO_ALG_TYPE_SKCIPHER	0x00000005
 #define CRYPTO_ALG_TYPE_GIVCIPHER	0x00000006
 #define CRYPTO_ALG_TYPE_KPP		0x00000008
+#define CRYPTO_ALG_TYPE_ACOMPRESS	0x0000000a
 #define CRYPTO_ALG_TYPE_RNG		0x0000000c
 #define CRYPTO_ALG_TYPE_AKCIPHER	0x0000000d
 #define CRYPTO_ALG_TYPE_DIGEST		0x0000000e
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index 79b5ded2001a..11d21fce14d6 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -46,6 +46,7 @@ enum crypto_attr_type_t {
 	CRYPTOCFGA_REPORT_CIPHER,	/* struct crypto_report_cipher */
 	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
 	CRYPTOCFGA_REPORT_KPP,		/* struct crypto_report_kpp */
+	CRYPTOCFGA_REPORT_ACOMP,	/* struct crypto_report_acomp */
 	__CRYPTOCFGA_MAX
 
 #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
@@ -112,5 +113,9 @@ struct crypto_report_kpp {
 	char type[CRYPTO_MAX_NAME];
 };
 
+struct crypto_report_acomp {
+	char type[CRYPTO_MAX_NAME];
+};
+
 #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
 			       sizeof(struct crypto_report_blkcipher))
-- 
cgit v1.2.3


From 1ab53a77b772bf7369464a0e4fa6fd6499acf8f1 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Fri, 21 Oct 2016 13:19:48 +0100
Subject: crypto: acomp - add driver-side scomp interface

Add a synchronous back-end (scomp) to acomp. This allows to easily
expose the already present compression algorithms in LKCF via acomp.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Makefile                     |   1 +
 crypto/acompress.c                  |  55 +++++-
 crypto/scompress.c                  | 356 ++++++++++++++++++++++++++++++++++++
 include/crypto/acompress.h          |  42 ++---
 include/crypto/internal/acompress.h |  15 ++
 include/crypto/internal/scompress.h | 136 ++++++++++++++
 include/linux/crypto.h              |   2 +
 7 files changed, 578 insertions(+), 29 deletions(-)
 create mode 100644 crypto/scompress.c
 create mode 100644 include/crypto/internal/scompress.h

(limited to 'include/linux')

diff --git a/crypto/Makefile b/crypto/Makefile
index 0933dc6bd24c..5c83f3dea119 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -51,6 +51,7 @@ rsa_generic-y += rsa-pkcs1pad.o
 obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o
 
 obj-$(CONFIG_CRYPTO_ACOMP2) += acompress.o
+obj-$(CONFIG_CRYPTO_ACOMP2) += scompress.o
 
 cryptomgr-y := algboss.o testmgr.o
 
diff --git a/crypto/acompress.c b/crypto/acompress.c
index 4977279476d3..887783d8e9a9 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -22,8 +22,11 @@
 #include <linux/cryptouser.h>
 #include <net/netlink.h>
 #include <crypto/internal/acompress.h>
+#include <crypto/internal/scompress.h>
 #include "internal.h"
 
+static const struct crypto_type crypto_acomp_type;
+
 #ifdef CONFIG_NET
 static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
@@ -67,6 +70,14 @@ static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
 	struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm);
 	struct acomp_alg *alg = crypto_acomp_alg(acomp);
 
+	if (tfm->__crt_alg->cra_type != &crypto_acomp_type)
+		return crypto_init_scomp_ops_async(tfm);
+
+	acomp->compress = alg->compress;
+	acomp->decompress = alg->decompress;
+	acomp->dst_free = alg->dst_free;
+	acomp->reqsize = alg->reqsize;
+
 	if (alg->exit)
 		acomp->base.exit = crypto_acomp_exit_tfm;
 
@@ -76,15 +87,25 @@ static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static unsigned int crypto_acomp_extsize(struct crypto_alg *alg)
+{
+	int extsize = crypto_alg_extsize(alg);
+
+	if (alg->cra_type != &crypto_acomp_type)
+		extsize += sizeof(struct crypto_scomp *);
+
+	return extsize;
+}
+
 static const struct crypto_type crypto_acomp_type = {
-	.extsize = crypto_alg_extsize,
+	.extsize = crypto_acomp_extsize,
 	.init_tfm = crypto_acomp_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_acomp_show,
 #endif
 	.report = crypto_acomp_report,
 	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
-	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_ACOMPRESS_MASK,
 	.type = CRYPTO_ALG_TYPE_ACOMPRESS,
 	.tfmsize = offsetof(struct crypto_acomp, base),
 };
@@ -96,6 +117,36 @@ struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type,
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_acomp);
 
+struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp)
+{
+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+	struct acomp_req *req;
+
+	req = __acomp_request_alloc(acomp);
+	if (req && (tfm->__crt_alg->cra_type != &crypto_acomp_type))
+		return crypto_acomp_scomp_alloc_ctx(req);
+
+	return req;
+}
+EXPORT_SYMBOL_GPL(acomp_request_alloc);
+
+void acomp_request_free(struct acomp_req *req)
+{
+	struct crypto_acomp *acomp = crypto_acomp_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+
+	if (tfm->__crt_alg->cra_type != &crypto_acomp_type)
+		crypto_acomp_scomp_free_ctx(req);
+
+	if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) {
+		acomp->dst_free(req->dst);
+		req->dst = NULL;
+	}
+
+	__acomp_request_free(req);
+}
+EXPORT_SYMBOL_GPL(acomp_request_free);
+
 int crypto_register_acomp(struct acomp_alg *alg)
 {
 	struct crypto_alg *base = &alg->base;
diff --git a/crypto/scompress.c b/crypto/scompress.c
new file mode 100644
index 000000000000..35e396d154b7
--- /dev/null
+++ b/crypto/scompress.c
@@ -0,0 +1,356 @@
+/*
+ * Synchronous Compression operations
+ *
+ * Copyright 2015 LG Electronics Inc.
+ * Copyright (c) 2016, Intel Corporation
+ * Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <linux/vmalloc.h>
+#include <crypto/algapi.h>
+#include <linux/cryptouser.h>
+#include <net/netlink.h>
+#include <linux/scatterlist.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/acompress.h>
+#include <crypto/internal/scompress.h>
+#include "internal.h"
+
+static const struct crypto_type crypto_scomp_type;
+static void * __percpu *scomp_src_scratches;
+static void * __percpu *scomp_dst_scratches;
+static int scomp_scratch_users;
+static DEFINE_MUTEX(scomp_lock);
+
+#ifdef CONFIG_NET
+static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_comp rscomp;
+
+	strncpy(rscomp.type, "scomp", sizeof(rscomp.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
+		    sizeof(struct crypto_report_comp), &rscomp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+#else
+static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	return -ENOSYS;
+}
+#endif
+
+static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+
+static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	seq_puts(m, "type         : scomp\n");
+}
+
+static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
+{
+	return 0;
+}
+
+static void crypto_scomp_free_scratches(void * __percpu *scratches)
+{
+	int i;
+
+	if (!scratches)
+		return;
+
+	for_each_possible_cpu(i)
+		vfree(*per_cpu_ptr(scratches, i));
+
+	free_percpu(scratches);
+}
+
+static void * __percpu *crypto_scomp_alloc_scratches(void)
+{
+	void * __percpu *scratches;
+	int i;
+
+	scratches = alloc_percpu(void *);
+	if (!scratches)
+		return NULL;
+
+	for_each_possible_cpu(i) {
+		void *scratch;
+
+		scratch = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i));
+		if (!scratch)
+			goto error;
+		*per_cpu_ptr(scratches, i) = scratch;
+	}
+
+	return scratches;
+
+error:
+	crypto_scomp_free_scratches(scratches);
+	return NULL;
+}
+
+static void crypto_scomp_free_all_scratches(void)
+{
+	if (!--scomp_scratch_users) {
+		crypto_scomp_free_scratches(scomp_src_scratches);
+		crypto_scomp_free_scratches(scomp_dst_scratches);
+		scomp_src_scratches = NULL;
+		scomp_dst_scratches = NULL;
+	}
+}
+
+static int crypto_scomp_alloc_all_scratches(void)
+{
+	if (!scomp_scratch_users++) {
+		scomp_src_scratches = crypto_scomp_alloc_scratches();
+		if (!scomp_src_scratches)
+			return -ENOMEM;
+		scomp_dst_scratches = crypto_scomp_alloc_scratches();
+		if (!scomp_dst_scratches)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static void crypto_scomp_sg_free(struct scatterlist *sgl)
+{
+	int i, n;
+	struct page *page;
+
+	if (!sgl)
+		return;
+
+	n = sg_nents(sgl);
+	for_each_sg(sgl, sgl, n, i) {
+		page = sg_page(sgl);
+		if (page)
+			__free_page(page);
+	}
+
+	kfree(sgl);
+}
+
+static struct scatterlist *crypto_scomp_sg_alloc(size_t size, gfp_t gfp)
+{
+	struct scatterlist *sgl;
+	struct page *page;
+	int i, n;
+
+	n = ((size - 1) >> PAGE_SHIFT) + 1;
+
+	sgl = kmalloc_array(n, sizeof(struct scatterlist), gfp);
+	if (!sgl)
+		return NULL;
+
+	sg_init_table(sgl, n);
+
+	for (i = 0; i < n; i++) {
+		page = alloc_page(gfp);
+		if (!page)
+			goto err;
+		sg_set_page(sgl + i, page, PAGE_SIZE, 0);
+	}
+
+	return sgl;
+
+err:
+	sg_mark_end(sgl + i);
+	crypto_scomp_sg_free(sgl);
+	return NULL;
+}
+
+static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	void **tfm_ctx = acomp_tfm_ctx(tfm);
+	struct crypto_scomp *scomp = *tfm_ctx;
+	void **ctx = acomp_request_ctx(req);
+	const int cpu = get_cpu();
+	u8 *scratch_src = *per_cpu_ptr(scomp_src_scratches, cpu);
+	u8 *scratch_dst = *per_cpu_ptr(scomp_dst_scratches, cpu);
+	int ret;
+
+	if (!req->src || !req->slen || req->slen > SCOMP_SCRATCH_SIZE) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (req->dst && !req->dlen) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!req->dlen || req->dlen > SCOMP_SCRATCH_SIZE)
+		req->dlen = SCOMP_SCRATCH_SIZE;
+
+	scatterwalk_map_and_copy(scratch_src, req->src, 0, req->slen, 0);
+	if (dir)
+		ret = crypto_scomp_compress(scomp, scratch_src, req->slen,
+					    scratch_dst, &req->dlen, *ctx);
+	else
+		ret = crypto_scomp_decompress(scomp, scratch_src, req->slen,
+					      scratch_dst, &req->dlen, *ctx);
+	if (!ret) {
+		if (!req->dst) {
+			req->dst = crypto_scomp_sg_alloc(req->dlen,
+				   req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+				   GFP_KERNEL : GFP_ATOMIC);
+			if (!req->dst)
+				goto out;
+		}
+		scatterwalk_map_and_copy(scratch_dst, req->dst, 0, req->dlen,
+					 1);
+	}
+out:
+	put_cpu();
+	return ret;
+}
+
+static int scomp_acomp_compress(struct acomp_req *req)
+{
+	return scomp_acomp_comp_decomp(req, 1);
+}
+
+static int scomp_acomp_decompress(struct acomp_req *req)
+{
+	return scomp_acomp_comp_decomp(req, 0);
+}
+
+static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
+{
+	struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_scomp(*ctx);
+}
+
+int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *calg = tfm->__crt_alg;
+	struct crypto_acomp *crt = __crypto_acomp_tfm(tfm);
+	struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
+	struct crypto_scomp *scomp;
+
+	if (!crypto_mod_get(calg))
+		return -EAGAIN;
+
+	scomp = crypto_create_tfm(calg, &crypto_scomp_type);
+	if (IS_ERR(scomp)) {
+		crypto_mod_put(calg);
+		return PTR_ERR(scomp);
+	}
+
+	*ctx = scomp;
+	tfm->exit = crypto_exit_scomp_ops_async;
+
+	crt->compress = scomp_acomp_compress;
+	crt->decompress = scomp_acomp_decompress;
+	crt->dst_free = crypto_scomp_sg_free;
+	crt->reqsize = sizeof(void *);
+
+	return 0;
+}
+
+struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req)
+{
+	struct crypto_acomp *acomp = crypto_acomp_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+	struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm);
+	struct crypto_scomp *scomp = *tfm_ctx;
+	void *ctx;
+
+	ctx = crypto_scomp_alloc_ctx(scomp);
+	if (IS_ERR(ctx)) {
+		kfree(req);
+		return NULL;
+	}
+
+	*req->__ctx = ctx;
+
+	return req;
+}
+
+void crypto_acomp_scomp_free_ctx(struct acomp_req *req)
+{
+	struct crypto_acomp *acomp = crypto_acomp_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+	struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm);
+	struct crypto_scomp *scomp = *tfm_ctx;
+	void *ctx = *req->__ctx;
+
+	if (ctx)
+		crypto_scomp_free_ctx(scomp, ctx);
+}
+
+static const struct crypto_type crypto_scomp_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_scomp_init_tfm,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_scomp_show,
+#endif
+	.report = crypto_scomp_report,
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_SCOMPRESS,
+	.tfmsize = offsetof(struct crypto_scomp, base),
+};
+
+int crypto_register_scomp(struct scomp_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+	int ret = -ENOMEM;
+
+	mutex_lock(&scomp_lock);
+	if (crypto_scomp_alloc_all_scratches())
+		goto error;
+
+	base->cra_type = &crypto_scomp_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS;
+
+	ret = crypto_register_alg(base);
+	if (ret)
+		goto error;
+
+	mutex_unlock(&scomp_lock);
+	return ret;
+
+error:
+	crypto_scomp_free_all_scratches();
+	mutex_unlock(&scomp_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_scomp);
+
+int crypto_unregister_scomp(struct scomp_alg *alg)
+{
+	int ret;
+
+	mutex_lock(&scomp_lock);
+	ret = crypto_unregister_alg(&alg->base);
+	crypto_scomp_free_all_scratches();
+	mutex_unlock(&scomp_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_scomp);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Synchronous compression type");
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index 14c70d887160..e328b52425a8 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -42,9 +42,18 @@ struct acomp_req {
  * struct crypto_acomp - user-instantiated objects which encapsulate
  * algorithms and core processing logic
  *
- * @base:	Common crypto API algorithm data structure
+ * @compress:		Function performs a compress operation
+ * @decompress:		Function performs a de-compress operation
+ * @dst_free:		Frees destination buffer if allocated inside the
+ *			algorithm
+ * @reqsize:		Context size for (de)compression requests
+ * @base:		Common crypto API algorithm data structure
  */
 struct crypto_acomp {
+	int (*compress)(struct acomp_req *req);
+	int (*decompress)(struct acomp_req *req);
+	void (*dst_free)(struct scatterlist *dst);
+	unsigned int reqsize;
 	struct crypto_tfm base;
 };
 
@@ -125,7 +134,7 @@ static inline struct acomp_alg *crypto_acomp_alg(struct crypto_acomp *tfm)
 
 static inline unsigned int crypto_acomp_reqsize(struct crypto_acomp *tfm)
 {
-	return crypto_acomp_alg(tfm)->reqsize;
+	return tfm->reqsize;
 }
 
 static inline void acomp_request_set_tfm(struct acomp_req *req,
@@ -165,16 +174,7 @@ static inline int crypto_has_acomp(const char *alg_name, u32 type, u32 mask)
  *
  * Return:	allocated handle in case of success or NULL in case of an error
  */
-static inline struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm)
-{
-	struct acomp_req *req;
-
-	req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL);
-	if (likely(req))
-		acomp_request_set_tfm(req, tfm);
-
-	return req;
-}
+struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm);
 
 /**
  * acomp_request_free() -- zeroize and free asynchronous (de)compression
@@ -183,17 +183,7 @@ static inline struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm)
  *
  * @req:	request to free
  */
-static inline void acomp_request_free(struct acomp_req *req)
-{
-	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
-	struct acomp_alg *alg = crypto_acomp_alg(tfm);
-
-	if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) {
-		alg->dst_free(req->dst);
-		req->dst = NULL;
-	}
-	kzfree(req);
-}
+void acomp_request_free(struct acomp_req *req);
 
 /**
  * acomp_request_set_callback() -- Sets an asynchronous callback
@@ -256,9 +246,8 @@ static inline void acomp_request_set_params(struct acomp_req *req,
 static inline int crypto_acomp_compress(struct acomp_req *req)
 {
 	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
-	struct acomp_alg *alg = crypto_acomp_alg(tfm);
 
-	return alg->compress(req);
+	return tfm->compress(req);
 }
 
 /**
@@ -273,9 +262,8 @@ static inline int crypto_acomp_compress(struct acomp_req *req)
 static inline int crypto_acomp_decompress(struct acomp_req *req)
 {
 	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
-	struct acomp_alg *alg = crypto_acomp_alg(tfm);
 
-	return alg->decompress(req);
+	return tfm->decompress(req);
 }
 
 #endif
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
index a9a9000d1aea..1de2b5af12d7 100644
--- a/include/crypto/internal/acompress.h
+++ b/include/crypto/internal/acompress.h
@@ -39,6 +39,21 @@ static inline const char *acomp_alg_name(struct crypto_acomp *tfm)
 	return crypto_acomp_tfm(tfm)->__crt_alg->cra_name;
 }
 
+static inline struct acomp_req *__acomp_request_alloc(struct crypto_acomp *tfm)
+{
+	struct acomp_req *req;
+
+	req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL);
+	if (likely(req))
+		acomp_request_set_tfm(req, tfm);
+	return req;
+}
+
+static inline void __acomp_request_free(struct acomp_req *req)
+{
+	kzfree(req);
+}
+
 /**
  * crypto_register_acomp() -- Register asynchronous compression algorithm
  *
diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h
new file mode 100644
index 000000000000..3fda3c5655a0
--- /dev/null
+++ b/include/crypto/internal/scompress.h
@@ -0,0 +1,136 @@
+/*
+ * Synchronous Compression operations
+ *
+ * Copyright 2015 LG Electronics Inc.
+ * Copyright (c) 2016, Intel Corporation
+ * Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_SCOMP_INT_H
+#define _CRYPTO_SCOMP_INT_H
+#include <linux/crypto.h>
+
+#define SCOMP_SCRATCH_SIZE	131072
+
+struct crypto_scomp {
+	struct crypto_tfm base;
+};
+
+/**
+ * struct scomp_alg - synchronous compression algorithm
+ *
+ * @alloc_ctx:	Function allocates algorithm specific context
+ * @free_ctx:	Function frees context allocated with alloc_ctx
+ * @compress:	Function performs a compress operation
+ * @decompress:	Function performs a de-compress operation
+ * @init:	Initialize the cryptographic transformation object.
+ *		This function is used to initialize the cryptographic
+ *		transformation object. This function is called only once at
+ *		the instantiation time, right after the transformation context
+ *		was allocated. In case the cryptographic hardware has some
+ *		special requirements which need to be handled by software, this
+ *		function shall check for the precise requirement of the
+ *		transformation and put any software fallbacks in place.
+ * @exit:	Deinitialize the cryptographic transformation object. This is a
+ *		counterpart to @init, used to remove various changes set in
+ *		@init.
+ * @base:	Common crypto API algorithm data structure
+ */
+struct scomp_alg {
+	void *(*alloc_ctx)(struct crypto_scomp *tfm);
+	void (*free_ctx)(struct crypto_scomp *tfm, void *ctx);
+	int (*compress)(struct crypto_scomp *tfm, const u8 *src,
+			unsigned int slen, u8 *dst, unsigned int *dlen,
+			void *ctx);
+	int (*decompress)(struct crypto_scomp *tfm, const u8 *src,
+			  unsigned int slen, u8 *dst, unsigned int *dlen,
+			  void *ctx);
+	struct crypto_alg base;
+};
+
+static inline struct scomp_alg *__crypto_scomp_alg(struct crypto_alg *alg)
+{
+	return container_of(alg, struct scomp_alg, base);
+}
+
+static inline struct crypto_scomp *__crypto_scomp_tfm(struct crypto_tfm *tfm)
+{
+	return container_of(tfm, struct crypto_scomp, base);
+}
+
+static inline struct crypto_tfm *crypto_scomp_tfm(struct crypto_scomp *tfm)
+{
+	return &tfm->base;
+}
+
+static inline void crypto_free_scomp(struct crypto_scomp *tfm)
+{
+	crypto_destroy_tfm(tfm, crypto_scomp_tfm(tfm));
+}
+
+static inline struct scomp_alg *crypto_scomp_alg(struct crypto_scomp *tfm)
+{
+	return __crypto_scomp_alg(crypto_scomp_tfm(tfm)->__crt_alg);
+}
+
+static inline void *crypto_scomp_alloc_ctx(struct crypto_scomp *tfm)
+{
+	return crypto_scomp_alg(tfm)->alloc_ctx(tfm);
+}
+
+static inline void crypto_scomp_free_ctx(struct crypto_scomp *tfm,
+					 void *ctx)
+{
+	return crypto_scomp_alg(tfm)->free_ctx(tfm, ctx);
+}
+
+static inline int crypto_scomp_compress(struct crypto_scomp *tfm,
+					const u8 *src, unsigned int slen,
+					u8 *dst, unsigned int *dlen, void *ctx)
+{
+	return crypto_scomp_alg(tfm)->compress(tfm, src, slen, dst, dlen, ctx);
+}
+
+static inline int crypto_scomp_decompress(struct crypto_scomp *tfm,
+					  const u8 *src, unsigned int slen,
+					  u8 *dst, unsigned int *dlen,
+					  void *ctx)
+{
+	return crypto_scomp_alg(tfm)->decompress(tfm, src, slen, dst, dlen,
+						 ctx);
+}
+
+int crypto_init_scomp_ops_async(struct crypto_tfm *tfm);
+struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req);
+void crypto_acomp_scomp_free_ctx(struct acomp_req *req);
+
+/**
+ * crypto_register_scomp() -- Register synchronous compression algorithm
+ *
+ * Function registers an implementation of a synchronous
+ * compression algorithm
+ *
+ * @alg:	algorithm definition
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_register_scomp(struct scomp_alg *alg);
+
+/**
+ * crypto_unregister_scomp() -- Unregister synchronous compression algorithm
+ *
+ * Function unregisters an implementation of a synchronous
+ * compression algorithm
+ *
+ * @alg:	algorithm definition
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_unregister_scomp(struct scomp_alg *alg);
+
+#endif
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index dc57a0505505..8348d83d8b5e 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -51,6 +51,7 @@
 #define CRYPTO_ALG_TYPE_GIVCIPHER	0x00000006
 #define CRYPTO_ALG_TYPE_KPP		0x00000008
 #define CRYPTO_ALG_TYPE_ACOMPRESS	0x0000000a
+#define CRYPTO_ALG_TYPE_SCOMPRESS	0x0000000b
 #define CRYPTO_ALG_TYPE_RNG		0x0000000c
 #define CRYPTO_ALG_TYPE_AKCIPHER	0x0000000d
 #define CRYPTO_ALG_TYPE_DIGEST		0x0000000e
@@ -61,6 +62,7 @@
 #define CRYPTO_ALG_TYPE_HASH_MASK	0x0000000e
 #define CRYPTO_ALG_TYPE_AHASH_MASK	0x0000000e
 #define CRYPTO_ALG_TYPE_BLKCIPHER_MASK	0x0000000c
+#define CRYPTO_ALG_TYPE_ACOMPRESS_MASK	0x0000000e
 
 #define CRYPTO_ALG_LARVAL		0x00000010
 #define CRYPTO_ALG_DEAD			0x00000020
-- 
cgit v1.2.3


From a225023828038a1aaea876a65313c863ec23fa44 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 19 Oct 2016 15:45:27 +0200
Subject: sched/core: Explain sleep/wakeup in a better way

There were a few questions wrt. how sleep-wakeup works. Try and explain
it more.

Requested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 52 +++++++++++++++++++++++++++++++++++----------------
 kernel/sched/core.c   | 17 +++++++++--------
 2 files changed, 45 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b0ec92..3762fe4e3a80 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -262,20 +262,9 @@ extern char ___assert_task_state[1 - 2*!!(
 #define set_task_state(tsk, state_value)			\
 	do {							\
 		(tsk)->task_state_change = _THIS_IP_;		\
-		smp_store_mb((tsk)->state, (state_value));		\
+		smp_store_mb((tsk)->state, (state_value));	\
 	} while (0)
 
-/*
- * set_current_state() includes a barrier so that the write of current->state
- * is correctly serialised wrt the caller's subsequent test of whether to
- * actually sleep:
- *
- *	set_current_state(TASK_UNINTERRUPTIBLE);
- *	if (do_i_need_to_sleep())
- *		schedule();
- *
- * If the caller does not need such serialisation then use __set_current_state()
- */
 #define __set_current_state(state_value)			\
 	do {							\
 		current->task_state_change = _THIS_IP_;		\
@@ -284,11 +273,19 @@ extern char ___assert_task_state[1 - 2*!!(
 #define set_current_state(state_value)				\
 	do {							\
 		current->task_state_change = _THIS_IP_;		\
-		smp_store_mb(current->state, (state_value));		\
+		smp_store_mb(current->state, (state_value));	\
 	} while (0)
 
 #else
 
+/*
+ * @tsk had better be current, or you get to keep the pieces.
+ *
+ * The only reason is that computing current can be more expensive than
+ * using a pointer that's already available.
+ *
+ * Therefore, see set_current_state().
+ */
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
 #define set_task_state(tsk, state_value)		\
@@ -299,11 +296,34 @@ extern char ___assert_task_state[1 - 2*!!(
  * is correctly serialised wrt the caller's subsequent test of whether to
  * actually sleep:
  *
+ *   for (;;) {
  *	set_current_state(TASK_UNINTERRUPTIBLE);
- *	if (do_i_need_to_sleep())
- *		schedule();
+ *	if (!need_sleep)
+ *		break;
+ *
+ *	schedule();
+ *   }
+ *   __set_current_state(TASK_RUNNING);
+ *
+ * If the caller does not need such serialisation (because, for instance, the
+ * condition test and condition change and wakeup are under the same lock) then
+ * use __set_current_state().
+ *
+ * The above is typically ordered against the wakeup, which does:
+ *
+ *	need_sleep = false;
+ *	wake_up_state(p, TASK_UNINTERRUPTIBLE);
+ *
+ * Where wake_up_state() (and all other wakeup primitives) imply enough
+ * barriers to order the store of the variable against wakeup.
+ *
+ * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
+ * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
+ * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
+ *
+ * This is obviously fine, since they both store the exact same value.
  *
- * If the caller does not need such serialisation then use __set_current_state()
+ * Also see the comments of try_to_wake_up().
  */
 #define __set_current_state(state_value)		\
 	do { current->state = (state_value); } while (0)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 94732d1ab00a..b8c86ba44ca9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1995,14 +1995,15 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * @state: the mask of task states that can be woken
  * @wake_flags: wake modifier flags (WF_*)
  *
- * Put it on the run-queue if it's not already there. The "current"
- * thread is always on the run-queue (except when the actual
- * re-schedule is in progress), and as such you're allowed to do
- * the simpler "current->state = TASK_RUNNING" to mark yourself
- * runnable without the overhead of this.
- *
- * Return: %true if @p was woken up, %false if it was already running.
- * or @state didn't match @p's state.
+ * If (@state & @p->state) @p->state = TASK_RUNNING.
+ *
+ * If the task was not queued/runnable, also place it back on a runqueue.
+ *
+ * Atomic against schedule() which would dequeue a task, also see
+ * set_current_state().
+ *
+ * Return: %true if @p->state changes (an actual wakeup was done),
+ *	   %false otherwise.
  */
 static int
 try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
-- 
cgit v1.2.3


From 3ca0ff571b092ee4d807f1168caa428d95b0173b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 23 Aug 2016 13:36:04 +0200
Subject: locking/mutex: Rework mutex::owner

The current mutex implementation has an atomic lock word and a
non-atomic owner field.

This disparity leads to a number of issues with the current mutex code
as it means that we can have a locked mutex without an explicit owner
(because the owner field has not been set, or already cleared).

This leads to a number of weird corner cases, esp. between the
optimistic spinning and debug code. Where the optimistic spinning
code needs the owner field updated inside the lock region, the debug
code is more relaxed because the whole lock is serialized by the
wait_lock.

Also, the spinning code itself has a few corner cases where we need to
deal with a held lock without an owner field.

Furthermore, it becomes even more of a problem when trying to fix
starvation cases in the current code. We end up stacking special case
on special case.

To solve this rework the basic mutex implementation to be a single
atomic word that contains the owner and uses the low bits for extra
state.

This matches how PI futexes and rt_mutex already work. By having the
owner an integral part of the lock state a lot of the problems
dissapear and we get a better option to deal with starvation cases,
direct owner handoff.

Changing the basic mutex does however invalidate all the arch specific
mutex code; this patch leaves that unused in-place, a later patch will
remove that.

Tested-by: Jason Low <jason.low2@hpe.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex-debug.h  |  24 ---
 include/linux/mutex.h        |  46 ++++--
 kernel/locking/mutex-debug.c |  13 --
 kernel/locking/mutex-debug.h |  10 --
 kernel/locking/mutex.c       | 371 ++++++++++++++++++-------------------------
 kernel/locking/mutex.h       |  26 ---
 kernel/sched/core.c          |   2 +-
 7 files changed, 187 insertions(+), 305 deletions(-)
 delete mode 100644 include/linux/mutex-debug.h

(limited to 'include/linux')

diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
deleted file mode 100644
index 4ac8b1977b73..000000000000
--- a/include/linux/mutex-debug.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef __LINUX_MUTEX_DEBUG_H
-#define __LINUX_MUTEX_DEBUG_H
-
-#include <linux/linkage.h>
-#include <linux/lockdep.h>
-#include <linux/debug_locks.h>
-
-/*
- * Mutexes - debugging helpers:
- */
-
-#define __DEBUG_MUTEX_INITIALIZER(lockname)				\
-	, .magic = &lockname
-
-#define mutex_init(mutex)						\
-do {									\
-	static struct lock_class_key __key;				\
-									\
-	__mutex_init((mutex), #mutex, &__key);				\
-} while (0)
-
-extern void mutex_destroy(struct mutex *lock);
-
-#endif
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 2cb7531e7d7a..4d3bccabbea5 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -18,6 +18,7 @@
 #include <linux/atomic.h>
 #include <asm/processor.h>
 #include <linux/osq_lock.h>
+#include <linux/debug_locks.h>
 
 /*
  * Simple, straightforward mutexes with strict semantics:
@@ -48,16 +49,12 @@
  *   locks and tasks (and only those tasks)
  */
 struct mutex {
-	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
-	atomic_t		count;
+	atomic_long_t		owner;
 	spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
-	struct task_struct	*owner;
-#endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	struct optimistic_spin_queue osq; /* Spinner MCS lock */
 #endif
+	struct list_head	wait_list;
 #ifdef CONFIG_DEBUG_MUTEXES
 	void			*magic;
 #endif
@@ -66,6 +63,11 @@ struct mutex {
 #endif
 };
 
+static inline struct task_struct *__mutex_owner(struct mutex *lock)
+{
+	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03);
+}
+
 /*
  * This is the control structure for tasks blocked on mutex,
  * which resides on the blocked task's kernel stack:
@@ -79,9 +81,20 @@ struct mutex_waiter {
 };
 
 #ifdef CONFIG_DEBUG_MUTEXES
-# include <linux/mutex-debug.h>
+
+#define __DEBUG_MUTEX_INITIALIZER(lockname)				\
+	, .magic = &lockname
+
+extern void mutex_destroy(struct mutex *lock);
+
 #else
+
 # define __DEBUG_MUTEX_INITIALIZER(lockname)
+
+static inline void mutex_destroy(struct mutex *lock) {}
+
+#endif
+
 /**
  * mutex_init - initialize the mutex
  * @mutex: the mutex to be initialized
@@ -90,14 +103,12 @@ struct mutex_waiter {
  *
  * It is not allowed to initialize an already locked mutex.
  */
-# define mutex_init(mutex) \
-do {							\
-	static struct lock_class_key __key;		\
-							\
-	__mutex_init((mutex), #mutex, &__key);		\
+#define mutex_init(mutex)						\
+do {									\
+	static struct lock_class_key __key;				\
+									\
+	__mutex_init((mutex), #mutex, &__key);				\
 } while (0)
-static inline void mutex_destroy(struct mutex *lock) {}
-#endif
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
@@ -107,7 +118,7 @@ static inline void mutex_destroy(struct mutex *lock) {}
 #endif
 
 #define __MUTEX_INITIALIZER(lockname) \
-		{ .count = ATOMIC_INIT(1) \
+		{ .owner = ATOMIC_LONG_INIT(0) \
 		, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
 		, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
 		__DEBUG_MUTEX_INITIALIZER(lockname) \
@@ -127,7 +138,10 @@ extern void __mutex_init(struct mutex *lock, const char *name,
  */
 static inline int mutex_is_locked(struct mutex *lock)
 {
-	return atomic_read(&lock->count) != 1;
+	/*
+	 * XXX think about spin_is_locked
+	 */
+	return __mutex_owner(lock) != NULL;
 }
 
 /*
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 9c951fade415..9aa713629387 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -73,21 +73,8 @@ void debug_mutex_unlock(struct mutex *lock)
 {
 	if (likely(debug_locks)) {
 		DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-
-		if (!lock->owner)
-			DEBUG_LOCKS_WARN_ON(!lock->owner);
-		else
-			DEBUG_LOCKS_WARN_ON(lock->owner != current);
-
 		DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
 	}
-
-	/*
-	 * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug
-	 * mutexes so that we can do it here after we've verified state.
-	 */
-	mutex_clear_owner(lock);
-	atomic_set(&lock->count, 1);
 }
 
 void debug_mutex_init(struct mutex *lock, const char *name,
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index 57a871ae3c81..a459faa48987 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -27,16 +27,6 @@ extern void debug_mutex_unlock(struct mutex *lock);
 extern void debug_mutex_init(struct mutex *lock, const char *name,
 			     struct lock_class_key *key);
 
-static inline void mutex_set_owner(struct mutex *lock)
-{
-	WRITE_ONCE(lock->owner, current);
-}
-
-static inline void mutex_clear_owner(struct mutex *lock)
-{
-	WRITE_ONCE(lock->owner, NULL);
-}
-
 #define spin_lock_mutex(lock, flags)			\
 	do {						\
 		struct mutex *l = container_of(lock, struct mutex, wait_lock); \
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index a70b90db3909..de1ce0bae0d5 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -27,41 +27,113 @@
 #include <linux/debug_locks.h>
 #include <linux/osq_lock.h>
 
-/*
- * In the DEBUG case we are using the "NULL fastpath" for mutexes,
- * which forces all calls into the slowpath:
- */
 #ifdef CONFIG_DEBUG_MUTEXES
 # include "mutex-debug.h"
-# include <asm-generic/mutex-null.h>
-/*
- * Must be 0 for the debug case so we do not do the unlock outside of the
- * wait_lock region. debug_mutex_unlock() will do the actual unlock in this
- * case.
- */
-# undef __mutex_slowpath_needs_to_unlock
-# define  __mutex_slowpath_needs_to_unlock()	0
 #else
 # include "mutex.h"
-# include <asm/mutex.h>
 #endif
 
 void
 __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 {
-	atomic_set(&lock->count, 1);
+	atomic_long_set(&lock->owner, 0);
 	spin_lock_init(&lock->wait_lock);
 	INIT_LIST_HEAD(&lock->wait_list);
-	mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	osq_lock_init(&lock->osq);
 #endif
 
 	debug_mutex_init(lock, name, key);
 }
-
 EXPORT_SYMBOL(__mutex_init);
 
+/*
+ * @owner: contains: 'struct task_struct *' to the current lock owner,
+ * NULL means not owned. Since task_struct pointers are aligned at
+ * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low
+ * bits to store extra state.
+ *
+ * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
+ */
+#define MUTEX_FLAG_WAITERS	0x01
+
+#define MUTEX_FLAGS		0x03
+
+static inline struct task_struct *__owner_task(unsigned long owner)
+{
+	return (struct task_struct *)(owner & ~MUTEX_FLAGS);
+}
+
+static inline unsigned long __owner_flags(unsigned long owner)
+{
+	return owner & MUTEX_FLAGS;
+}
+
+/*
+ * Actual trylock that will work on any unlocked state.
+ */
+static inline bool __mutex_trylock(struct mutex *lock)
+{
+	unsigned long owner, curr = (unsigned long)current;
+
+	owner = atomic_long_read(&lock->owner);
+	for (;;) { /* must loop, can race against a flag */
+		unsigned long old;
+
+		if (__owner_task(owner))
+			return false;
+
+		old = atomic_long_cmpxchg_acquire(&lock->owner, owner,
+						  curr | __owner_flags(owner));
+		if (old == owner)
+			return true;
+
+		owner = old;
+	}
+}
+
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * Lockdep annotations are contained to the slow paths for simplicity.
+ * There is nothing that would stop spreading the lockdep annotations outwards
+ * except more code.
+ */
+
+/*
+ * Optimistic trylock that only works in the uncontended case. Make sure to
+ * follow with a __mutex_trylock() before failing.
+ */
+static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
+{
+	unsigned long curr = (unsigned long)current;
+
+	if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr))
+		return true;
+
+	return false;
+}
+
+static __always_inline bool __mutex_unlock_fast(struct mutex *lock)
+{
+	unsigned long curr = (unsigned long)current;
+
+	if (atomic_long_cmpxchg_release(&lock->owner, curr, 0UL) == curr)
+		return true;
+
+	return false;
+}
+#endif
+
+static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag)
+{
+	atomic_long_or(flag, &lock->owner);
+}
+
+static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag)
+{
+	atomic_long_andnot(flag, &lock->owner);
+}
+
 #ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * We split the mutex lock/unlock logic into separate fastpath and
@@ -69,7 +141,7 @@ EXPORT_SYMBOL(__mutex_init);
  * We also put the fastpath first in the kernel image, to make sure the
  * branch is predicted by the CPU as default-untaken.
  */
-__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
+static void __sched __mutex_lock_slowpath(struct mutex *lock);
 
 /**
  * mutex_lock - acquire the mutex
@@ -95,14 +167,10 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
 void __sched mutex_lock(struct mutex *lock)
 {
 	might_sleep();
-	/*
-	 * The locking fastpath is the 1->0 transition from
-	 * 'unlocked' into 'locked' state.
-	 */
-	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
-	mutex_set_owner(lock);
-}
 
+	if (!__mutex_trylock_fast(lock))
+		__mutex_lock_slowpath(lock);
+}
 EXPORT_SYMBOL(mutex_lock);
 #endif
 
@@ -149,9 +217,6 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
 /*
  * After acquiring lock with fastpath or when we lost out in contested
  * slowpath, set ctx and wake up any waiters so they can recheck.
- *
- * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
- * as the fastpath and opportunistic spinning are disabled in that case.
  */
 static __always_inline void
 ww_mutex_set_context_fastpath(struct ww_mutex *lock,
@@ -176,7 +241,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
 	/*
 	 * Check if lock is contended, if not there is nobody to wake up
 	 */
-	if (likely(atomic_read(&lock->base.count) == 0))
+	if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
 		return;
 
 	/*
@@ -227,7 +292,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 	bool ret = true;
 
 	rcu_read_lock();
-	while (lock->owner == owner) {
+	while (__mutex_owner(lock) == owner) {
 		/*
 		 * Ensure we emit the owner->on_cpu, dereference _after_
 		 * checking lock->owner still matches owner. If that fails,
@@ -260,26 +325,19 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
 		return 0;
 
 	rcu_read_lock();
-	owner = READ_ONCE(lock->owner);
+	owner = __mutex_owner(lock);
 	if (owner)
 		retval = owner->on_cpu;
 	rcu_read_unlock();
+
 	/*
-	 * if lock->owner is not set, the mutex owner may have just acquired
-	 * it and not set the owner yet or the mutex has been released.
+	 * If lock->owner is not set, the mutex has been released. Return true
+	 * such that we'll trylock in the spin path, which is a faster option
+	 * than the blocking slow path.
 	 */
 	return retval;
 }
 
-/*
- * Atomically try to take the lock when it is available
- */
-static inline bool mutex_try_to_acquire(struct mutex *lock)
-{
-	return !mutex_is_locked(lock) &&
-		(atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1);
-}
-
 /*
  * Optimistic spinning.
  *
@@ -288,13 +346,6 @@ static inline bool mutex_try_to_acquire(struct mutex *lock)
  * need to reschedule. The rationale is that if the lock owner is
  * running, it is likely to release the lock soon.
  *
- * Since this needs the lock owner, and this mutex implementation
- * doesn't track the owner atomically in the lock field, we need to
- * track it non-atomically.
- *
- * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
- * to serialize everything.
- *
  * The mutex spinners are queued up using MCS lock so that only one
  * spinner can compete for the mutex. However, if mutex spinning isn't
  * going to happen, there is no point in going through the lock/unlock
@@ -342,35 +393,16 @@ static bool mutex_optimistic_spin(struct mutex *lock,
 		 * If there's an owner, wait for it to either
 		 * release the lock or go to sleep.
 		 */
-		owner = READ_ONCE(lock->owner);
+		owner = __mutex_owner(lock);
 		if (owner && !mutex_spin_on_owner(lock, owner))
 			break;
 
 		/* Try to acquire the mutex if it is unlocked. */
-		if (mutex_try_to_acquire(lock)) {
-			lock_acquired(&lock->dep_map, ip);
-
-			if (use_ww_ctx) {
-				struct ww_mutex *ww;
-				ww = container_of(lock, struct ww_mutex, base);
-
-				ww_mutex_set_context_fastpath(ww, ww_ctx);
-			}
-
-			mutex_set_owner(lock);
+		if (__mutex_trylock(lock)) {
 			osq_unlock(&lock->osq);
 			return true;
 		}
 
-		/*
-		 * When there's no owner, we might have preempted between the
-		 * owner acquiring the lock and setting the owner field. If
-		 * we're an RT task that will live-lock because we won't let
-		 * the owner complete.
-		 */
-		if (!owner && (need_resched() || rt_task(task)))
-			break;
-
 		/*
 		 * The cpu_relax() call is a compiler barrier which forces
 		 * everything in this loop to be re-loaded. We don't need
@@ -406,8 +438,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
 }
 #endif
 
-__visible __used noinline
-void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
+static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip);
 
 /**
  * mutex_unlock - release the mutex
@@ -422,21 +453,12 @@ void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
  */
 void __sched mutex_unlock(struct mutex *lock)
 {
-	/*
-	 * The unlocking fastpath is the 0->1 transition from 'locked'
-	 * into 'unlocked' state:
-	 */
-#ifndef CONFIG_DEBUG_MUTEXES
-	/*
-	 * When debugging is enabled we must not clear the owner before time,
-	 * the slow path will always be taken, and that clears the owner field
-	 * after verifying that it was indeed current.
-	 */
-	mutex_clear_owner(lock);
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
+	if (__mutex_unlock_fast(lock))
+		return;
 #endif
-	__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
+	__mutex_unlock_slowpath(lock, _RET_IP_);
 }
-
 EXPORT_SYMBOL(mutex_unlock);
 
 /**
@@ -465,15 +487,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
 		lock->ctx = NULL;
 	}
 
-#ifndef CONFIG_DEBUG_MUTEXES
-	/*
-	 * When debugging is enabled we must not clear the owner before time,
-	 * the slow path will always be taken, and that clears the owner field
-	 * after verifying that it was indeed current.
-	 */
-	mutex_clear_owner(&lock->base);
-#endif
-	__mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath);
+	mutex_unlock(&lock->base);
 }
 EXPORT_SYMBOL(ww_mutex_unlock);
 
@@ -520,20 +534,24 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	preempt_disable();
 	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
-	if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
+	if (__mutex_trylock(lock) || mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
 		/* got the lock, yay! */
+		lock_acquired(&lock->dep_map, ip);
+		if (use_ww_ctx) {
+			struct ww_mutex *ww;
+			ww = container_of(lock, struct ww_mutex, base);
+
+			ww_mutex_set_context_fastpath(ww, ww_ctx);
+		}
 		preempt_enable();
 		return 0;
 	}
 
 	spin_lock_mutex(&lock->wait_lock, flags);
-
 	/*
-	 * Once more, try to acquire the lock. Only try-lock the mutex if
-	 * it is unlocked to reduce unnecessary xchg() operations.
+	 * After waiting to acquire the wait_lock, try again.
 	 */
-	if (!mutex_is_locked(lock) &&
-	    (atomic_xchg_acquire(&lock->count, 0) == 1))
+	if (__mutex_trylock(lock))
 		goto skip_wait;
 
 	debug_mutex_lock_common(lock, &waiter);
@@ -543,21 +561,13 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	list_add_tail(&waiter.list, &lock->wait_list);
 	waiter.task = task;
 
+	if (list_first_entry(&lock->wait_list, struct mutex_waiter, list) == &waiter)
+		__mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
+
 	lock_contended(&lock->dep_map, ip);
 
 	for (;;) {
-		/*
-		 * Lets try to take the lock again - this is needed even if
-		 * we get here for the first time (shortly after failing to
-		 * acquire the lock), to make sure that we get a wakeup once
-		 * it's unlocked. Later on, if we sleep, this is the
-		 * operation that gives us the lock. We xchg it to -1, so
-		 * that when we release the lock, we properly wake up the
-		 * other waiters. We only attempt the xchg if the count is
-		 * non-negative in order to avoid unnecessary xchg operations:
-		 */
-		if (atomic_read(&lock->count) >= 0 &&
-		    (atomic_xchg_acquire(&lock->count, -1) == 1))
+		if (__mutex_trylock(lock))
 			break;
 
 		/*
@@ -585,15 +595,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	__set_task_state(task, TASK_RUNNING);
 
 	mutex_remove_waiter(lock, &waiter, task);
-	/* set it to 0 if there are no waiters left: */
 	if (likely(list_empty(&lock->wait_list)))
-		atomic_set(&lock->count, 0);
+		__mutex_clear_flag(lock, MUTEX_FLAG_WAITERS);
+
 	debug_mutex_free_waiter(&waiter);
 
 skip_wait:
 	/* got the lock - cleanup and rejoice! */
 	lock_acquired(&lock->dep_map, ip);
-	mutex_set_owner(lock);
 
 	if (use_ww_ctx) {
 		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
@@ -631,7 +640,6 @@ _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
 			    0, nest, _RET_IP_, NULL, 0);
 }
-
 EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
 
 int __sched
@@ -650,7 +658,6 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
 	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
 				   subclass, NULL, _RET_IP_, NULL, 0);
 }
-
 EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
 
 static inline int
@@ -715,29 +722,22 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
 /*
  * Release the lock, slowpath:
  */
-static inline void
-__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
+static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
 {
-	unsigned long flags;
+	unsigned long owner, flags;
 	WAKE_Q(wake_q);
 
+	mutex_release(&lock->dep_map, 1, ip);
+
 	/*
-	 * As a performance measurement, release the lock before doing other
-	 * wakeup related duties to follow. This allows other tasks to acquire
-	 * the lock sooner, while still handling cleanups in past unlock calls.
-	 * This can be done as we do not enforce strict equivalence between the
-	 * mutex counter and wait_list.
-	 *
-	 *
-	 * Some architectures leave the lock unlocked in the fastpath failure
-	 * case, others need to leave it locked. In the later case we have to
-	 * unlock it here - as the lock counter is currently 0 or negative.
+	 * Release the lock before (potentially) taking the spinlock
+	 * such that other contenders can get on with things ASAP.
 	 */
-	if (__mutex_slowpath_needs_to_unlock())
-		atomic_set(&lock->count, 1);
+	owner = atomic_long_fetch_and_release(MUTEX_FLAGS, &lock->owner);
+	if (!__owner_flags(owner))
+		return;
 
 	spin_lock_mutex(&lock->wait_lock, flags);
-	mutex_release(&lock->dep_map, nested, _RET_IP_);
 	debug_mutex_unlock(lock);
 
 	if (!list_empty(&lock->wait_list)) {
@@ -754,17 +754,6 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
 	wake_up_q(&wake_q);
 }
 
-/*
- * Release the lock, slowpath:
- */
-__visible void
-__mutex_unlock_slowpath(atomic_t *lock_count)
-{
-	struct mutex *lock = container_of(lock_count, struct mutex, count);
-
-	__mutex_unlock_common_slowpath(lock, 1);
-}
-
 #ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * Here come the less common (and hence less performance-critical) APIs:
@@ -789,38 +778,30 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock);
  */
 int __sched mutex_lock_interruptible(struct mutex *lock)
 {
-	int ret;
-
 	might_sleep();
-	ret =  __mutex_fastpath_lock_retval(&lock->count);
-	if (likely(!ret)) {
-		mutex_set_owner(lock);
+
+	if (__mutex_trylock_fast(lock))
 		return 0;
-	} else
-		return __mutex_lock_interruptible_slowpath(lock);
+
+	return __mutex_lock_interruptible_slowpath(lock);
 }
 
 EXPORT_SYMBOL(mutex_lock_interruptible);
 
 int __sched mutex_lock_killable(struct mutex *lock)
 {
-	int ret;
-
 	might_sleep();
-	ret = __mutex_fastpath_lock_retval(&lock->count);
-	if (likely(!ret)) {
-		mutex_set_owner(lock);
+
+	if (__mutex_trylock_fast(lock))
 		return 0;
-	} else
-		return __mutex_lock_killable_slowpath(lock);
+
+	return __mutex_lock_killable_slowpath(lock);
 }
 EXPORT_SYMBOL(mutex_lock_killable);
 
-__visible void __sched
-__mutex_lock_slowpath(atomic_t *lock_count)
+static noinline void __sched
+__mutex_lock_slowpath(struct mutex *lock)
 {
-	struct mutex *lock = container_of(lock_count, struct mutex, count);
-
 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
 			    NULL, _RET_IP_, NULL, 0);
 }
@@ -856,37 +837,6 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
 
 #endif
 
-/*
- * Spinlock based trylock, we take the spinlock and check whether we
- * can get the lock:
- */
-static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
-{
-	struct mutex *lock = container_of(lock_count, struct mutex, count);
-	unsigned long flags;
-	int prev;
-
-	/* No need to trylock if the mutex is locked. */
-	if (mutex_is_locked(lock))
-		return 0;
-
-	spin_lock_mutex(&lock->wait_lock, flags);
-
-	prev = atomic_xchg_acquire(&lock->count, -1);
-	if (likely(prev == 1)) {
-		mutex_set_owner(lock);
-		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-	}
-
-	/* Set it back to 0 if there are no waiters: */
-	if (likely(list_empty(&lock->wait_list)))
-		atomic_set(&lock->count, 0);
-
-	spin_unlock_mutex(&lock->wait_lock, flags);
-
-	return prev == 1;
-}
-
 /**
  * mutex_trylock - try to acquire the mutex, without waiting
  * @lock: the mutex to be acquired
@@ -903,13 +853,12 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
  */
 int __sched mutex_trylock(struct mutex *lock)
 {
-	int ret;
+	bool locked = __mutex_trylock(lock);
 
-	ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
-	if (ret)
-		mutex_set_owner(lock);
+	if (locked)
+		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 
-	return ret;
+	return locked;
 }
 EXPORT_SYMBOL(mutex_trylock);
 
@@ -917,36 +866,28 @@ EXPORT_SYMBOL(mutex_trylock);
 int __sched
 __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
-	int ret;
-
 	might_sleep();
 
-	ret = __mutex_fastpath_lock_retval(&lock->base.count);
-
-	if (likely(!ret)) {
+	if (__mutex_trylock_fast(&lock->base)) {
 		ww_mutex_set_context_fastpath(lock, ctx);
-		mutex_set_owner(&lock->base);
-	} else
-		ret = __ww_mutex_lock_slowpath(lock, ctx);
-	return ret;
+		return 0;
+	}
+
+	return __ww_mutex_lock_slowpath(lock, ctx);
 }
 EXPORT_SYMBOL(__ww_mutex_lock);
 
 int __sched
 __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
-	int ret;
-
 	might_sleep();
 
-	ret = __mutex_fastpath_lock_retval(&lock->base.count);
-
-	if (likely(!ret)) {
+	if (__mutex_trylock_fast(&lock->base)) {
 		ww_mutex_set_context_fastpath(lock, ctx);
-		mutex_set_owner(&lock->base);
-	} else
-		ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx);
-	return ret;
+		return 0;
+	}
+
+	return __ww_mutex_lock_interruptible_slowpath(lock, ctx);
 }
 EXPORT_SYMBOL(__ww_mutex_lock_interruptible);
 
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 6cd6b8e9efd7..4410a4af42a3 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -16,32 +16,6 @@
 #define mutex_remove_waiter(lock, waiter, task) \
 		__list_del((waiter)->list.prev, (waiter)->list.next)
 
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-/*
- * The mutex owner can get read and written to locklessly.
- * We should use WRITE_ONCE when writing the owner value to
- * avoid store tearing, otherwise, a thread could potentially
- * read a partially written and incomplete owner value.
- */
-static inline void mutex_set_owner(struct mutex *lock)
-{
-	WRITE_ONCE(lock->owner, current);
-}
-
-static inline void mutex_clear_owner(struct mutex *lock)
-{
-	WRITE_ONCE(lock->owner, NULL);
-}
-#else
-static inline void mutex_set_owner(struct mutex *lock)
-{
-}
-
-static inline void mutex_clear_owner(struct mutex *lock)
-{
-}
-#endif
-
 #define debug_mutex_wake_waiter(lock, waiter)		do { } while (0)
 #define debug_mutex_free_waiter(waiter)			do { } while (0)
 #define debug_mutex_add_waiter(lock, waiter, ti)	do { } while (0)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 94732d1ab00a..8912aafd09e1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -75,11 +75,11 @@
 #include <linux/compiler.h>
 #include <linux/frame.h>
 #include <linux/prefetch.h>
+#include <linux/mutex.h>
 
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
-#include <asm/mutex.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #endif
-- 
cgit v1.2.3


From f54d1867005c3323f5d8ad83eed823e84226c429 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 25 Oct 2016 13:00:45 +0100
Subject: dma-buf: Rename struct fence to dma_fence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I plan to usurp the short name of struct fence for a core kernel struct,
and so I need to rename the specialised fence/timeline for DMA
operations to make room.

A consensus was reached in
https://lists.freedesktop.org/archives/dri-devel/2016-July/113083.html
that making clear this fence applies to DMA operations was a good thing.
Since then the patch has grown a bit as usage increases, so hopefully it
remains a good thing!

(v2...: rebase, rerun spatch)
v3: Compile on msm, spotted a manual fixup that I broke.
v4: Try again for msm, sorry Daniel

coccinelle script:
@@

@@
- struct fence
+ struct dma_fence
@@

@@
- struct fence_ops
+ struct dma_fence_ops
@@

@@
- struct fence_cb
+ struct dma_fence_cb
@@

@@
- struct fence_array
+ struct dma_fence_array
@@

@@
- enum fence_flag_bits
+ enum dma_fence_flag_bits
@@

@@
(
- fence_init
+ dma_fence_init
|
- fence_release
+ dma_fence_release
|
- fence_free
+ dma_fence_free
|
- fence_get
+ dma_fence_get
|
- fence_get_rcu
+ dma_fence_get_rcu
|
- fence_put
+ dma_fence_put
|
- fence_signal
+ dma_fence_signal
|
- fence_signal_locked
+ dma_fence_signal_locked
|
- fence_default_wait
+ dma_fence_default_wait
|
- fence_add_callback
+ dma_fence_add_callback
|
- fence_remove_callback
+ dma_fence_remove_callback
|
- fence_enable_sw_signaling
+ dma_fence_enable_sw_signaling
|
- fence_is_signaled_locked
+ dma_fence_is_signaled_locked
|
- fence_is_signaled
+ dma_fence_is_signaled
|
- fence_is_later
+ dma_fence_is_later
|
- fence_later
+ dma_fence_later
|
- fence_wait_timeout
+ dma_fence_wait_timeout
|
- fence_wait_any_timeout
+ dma_fence_wait_any_timeout
|
- fence_wait
+ dma_fence_wait
|
- fence_context_alloc
+ dma_fence_context_alloc
|
- fence_array_create
+ dma_fence_array_create
|
- to_fence_array
+ to_dma_fence_array
|
- fence_is_array
+ dma_fence_is_array
|
- trace_fence_emit
+ trace_dma_fence_emit
|
- FENCE_TRACE
+ DMA_FENCE_TRACE
|
- FENCE_WARN
+ DMA_FENCE_WARN
|
- FENCE_ERR
+ DMA_FENCE_ERR
)
 (
 ...
 )

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161025120045.28839-1-chris@chris-wilson.co.uk
---
 Documentation/sync_file.txt                     |  14 +-
 drivers/base/Kconfig                            |   6 +-
 drivers/dma-buf/Kconfig                         |   2 +-
 drivers/dma-buf/Makefile                        |   2 +-
 drivers/dma-buf/dma-buf.c                       |  28 +-
 drivers/dma-buf/dma-fence-array.c               | 146 +++++++
 drivers/dma-buf/dma-fence.c                     | 537 ++++++++++++++++++++++++
 drivers/dma-buf/fence-array.c                   | 145 -------
 drivers/dma-buf/fence.c                         | 534 -----------------------
 drivers/dma-buf/reservation.c                   |  94 +++--
 drivers/dma-buf/seqno-fence.c                   |  18 +-
 drivers/dma-buf/sw_sync.c                       |  48 +--
 drivers/dma-buf/sync_debug.c                    |  13 +-
 drivers/dma-buf/sync_debug.h                    |   9 +-
 drivers/dma-buf/sync_file.c                     |  63 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu.h             |  54 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c   |   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c          |  16 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c         |  22 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c      |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c     |  16 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c       |  58 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c          |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c         |  22 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c      |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h      |   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c          |  24 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c        |  48 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_test.c        |  12 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h       |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c         |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h         |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c         |  26 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h         |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c         |  26 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h         |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c          |  79 ++--
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c           |   6 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c           |   6 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c           |   6 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c           |  12 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c          |   6 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c          |   6 +-
 drivers/gpu/drm/amd/amdgpu/si_dma.c             |   6 +-
 drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h |   4 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c   |  67 +--
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h   |  26 +-
 drivers/gpu/drm/amd/scheduler/sched_fence.c     |  48 ++-
 drivers/gpu/drm/drm_atomic.c                    |   2 +-
 drivers/gpu/drm/drm_atomic_helper.c             |   8 +-
 drivers/gpu/drm/drm_fops.c                      |   6 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem.c           |   6 +-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c           |  46 +-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h           |   4 +-
 drivers/gpu/drm/i915/i915_gem_request.c         |  32 +-
 drivers/gpu/drm/i915/i915_gem_request.h         |  18 +-
 drivers/gpu/drm/i915/i915_sw_fence.c            |  41 +-
 drivers/gpu/drm/i915/i915_sw_fence.h            |   8 +-
 drivers/gpu/drm/i915/i915_trace.h               |   2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c        |   4 +-
 drivers/gpu/drm/i915/intel_engine_cs.c          |   2 +-
 drivers/gpu/drm/msm/msm_drv.h                   |   2 +-
 drivers/gpu/drm/msm/msm_fence.c                 |  28 +-
 drivers/gpu/drm/msm/msm_fence.h                 |   2 +-
 drivers/gpu/drm/msm/msm_gem.c                   |  14 +-
 drivers/gpu/drm/msm/msm_gem.h                   |   2 +-
 drivers/gpu/drm/msm/msm_gem_submit.c            |   8 +-
 drivers/gpu/drm/msm/msm_gpu.c                   |   2 +-
 drivers/gpu/drm/nouveau/nouveau_bo.c            |   6 +-
 drivers/gpu/drm/nouveau/nouveau_fence.c         |  80 ++--
 drivers/gpu/drm/nouveau/nouveau_fence.h         |   6 +-
 drivers/gpu/drm/nouveau/nouveau_gem.c           |   2 +-
 drivers/gpu/drm/nouveau/nv04_fence.c            |   2 +-
 drivers/gpu/drm/nouveau/nv10_fence.c            |   2 +-
 drivers/gpu/drm/nouveau/nv17_fence.c            |   2 +-
 drivers/gpu/drm/nouveau/nv50_fence.c            |   2 +-
 drivers/gpu/drm/nouveau/nv84_fence.c            |   2 +-
 drivers/gpu/drm/qxl/qxl_drv.h                   |   4 +-
 drivers/gpu/drm/qxl/qxl_release.c               |  35 +-
 drivers/gpu/drm/radeon/radeon.h                 |  10 +-
 drivers/gpu/drm/radeon/radeon_device.c          |   2 +-
 drivers/gpu/drm/radeon/radeon_display.c         |   8 +-
 drivers/gpu/drm/radeon/radeon_fence.c           |  56 +--
 drivers/gpu/drm/radeon/radeon_sync.c            |   6 +-
 drivers/gpu/drm/radeon/radeon_uvd.c             |   2 +-
 drivers/gpu/drm/ttm/ttm_bo.c                    |  24 +-
 drivers/gpu/drm/ttm/ttm_bo_util.c               |  22 +-
 drivers/gpu/drm/ttm/ttm_bo_vm.c                 |   8 +-
 drivers/gpu/drm/ttm/ttm_execbuf_util.c          |   3 +-
 drivers/gpu/drm/vgem/vgem_fence.c               |  53 +--
 drivers/gpu/drm/virtio/virtgpu_drv.h            |   2 +-
 drivers/gpu/drm/virtio/virtgpu_fence.c          |  26 +-
 drivers/gpu/drm/virtio/virtgpu_ioctl.c          |  12 +-
 drivers/gpu/drm/virtio/virtgpu_kms.c            |   2 +-
 drivers/gpu/drm/virtio/virtgpu_plane.c          |   2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c           |  44 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.h           |   8 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c        |   2 +-
 include/drm/drmP.h                              |   4 +-
 include/drm/drm_crtc.h                          |   2 +-
 include/drm/drm_plane.h                         |   2 +-
 include/drm/ttm/ttm_bo_api.h                    |   2 +-
 include/drm/ttm/ttm_bo_driver.h                 |   6 +-
 include/drm/ttm/ttm_execbuf_util.h              |   2 +-
 include/linux/dma-buf.h                         |   4 +-
 include/linux/dma-fence-array.h                 |  86 ++++
 include/linux/dma-fence.h                       | 437 +++++++++++++++++++
 include/linux/fence-array.h                     |  84 ----
 include/linux/fence.h                           | 424 -------------------
 include/linux/reservation.h                     |  28 +-
 include/linux/seqno-fence.h                     |  20 +-
 include/linux/sync_file.h                       |  14 +-
 include/trace/events/dma_fence.h                | 128 ++++++
 include/trace/events/fence.h                    | 128 ------
 114 files changed, 2206 insertions(+), 2168 deletions(-)
 create mode 100644 drivers/dma-buf/dma-fence-array.c
 create mode 100644 drivers/dma-buf/dma-fence.c
 delete mode 100644 drivers/dma-buf/fence-array.c
 delete mode 100644 drivers/dma-buf/fence.c
 create mode 100644 include/linux/dma-fence-array.h
 create mode 100644 include/linux/dma-fence.h
 delete mode 100644 include/linux/fence-array.h
 delete mode 100644 include/linux/fence.h
 create mode 100644 include/trace/events/dma_fence.h
 delete mode 100644 include/trace/events/fence.h

(limited to 'include/linux')

diff --git a/Documentation/sync_file.txt b/Documentation/sync_file.txt
index b63a68531afd..269681a6faec 100644
--- a/Documentation/sync_file.txt
+++ b/Documentation/sync_file.txt
@@ -6,7 +6,7 @@
 
 This document serves as a guide for device drivers writers on what the
 sync_file API is, and how drivers can support it. Sync file is the carrier of
-the fences(struct fence) that are needed to synchronize between drivers or
+the fences(struct dma_fence) that are needed to synchronize between drivers or
 across process boundaries.
 
 The sync_file API is meant to be used to send and receive fence information
@@ -32,9 +32,9 @@ in-fences and out-fences
 Sync files can go either to or from userspace. When a sync_file is sent from
 the driver to userspace we call the fences it contains 'out-fences'. They are
 related to a buffer that the driver is processing or is going to process, so
-the driver creates an out-fence to be able to notify, through fence_signal(),
-when it has finished using (or processing) that buffer. Out-fences are fences
-that the driver creates.
+the driver creates an out-fence to be able to notify, through
+dma_fence_signal(), when it has finished using (or processing) that buffer.
+Out-fences are fences that the driver creates.
 
 On the other hand if the driver receives fence(s) through a sync_file from
 userspace we call these fence(s) 'in-fences'. Receiveing in-fences means that
@@ -47,7 +47,7 @@ Creating Sync Files
 When a driver needs to send an out-fence userspace it creates a sync_file.
 
 Interface:
-	struct sync_file *sync_file_create(struct fence *fence);
+	struct sync_file *sync_file_create(struct dma_fence *fence);
 
 The caller pass the out-fence and gets back the sync_file. That is just the
 first step, next it needs to install an fd on sync_file->file. So it gets an
@@ -72,11 +72,11 @@ of the Sync File to the kernel. The kernel can then retrieve the fences
 from it.
 
 Interface:
-	struct fence *sync_file_get_fence(int fd);
+	struct dma_fence *sync_file_get_fence(int fd);
 
 
 The returned reference is owned by the caller and must be disposed of
-afterwards using fence_put(). In case of error, a NULL is returned instead.
+afterwards using dma_fence_put(). In case of error, a NULL is returned instead.
 
 References:
 [1] struct sync_file in include/linux/sync_file.h
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index fdf44cac08e6..37bf25c6b4a6 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -248,11 +248,11 @@ config DMA_SHARED_BUFFER
 	  APIs extension; the file's descriptor can then be passed on to other
 	  driver.
 
-config FENCE_TRACE
-	bool "Enable verbose FENCE_TRACE messages"
+config DMA_FENCE_TRACE
+	bool "Enable verbose DMA_FENCE_TRACE messages"
 	depends on DMA_SHARED_BUFFER
 	help
-	  Enable the FENCE_TRACE printks. This will add extra
+	  Enable the DMA_FENCE_TRACE printks. This will add extra
 	  spam to the console log, but will make it easier to diagnose
 	  lockup related problems for dma-buffers shared across multiple
 	  devices.
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
index 2585821b24ab..ed3b785bae37 100644
--- a/drivers/dma-buf/Kconfig
+++ b/drivers/dma-buf/Kconfig
@@ -7,7 +7,7 @@ config SYNC_FILE
 	select DMA_SHARED_BUFFER
 	---help---
 	  The Sync File Framework adds explicit syncronization via
-	  userspace. It enables send/receive 'struct fence' objects to/from
+	  userspace. It enables send/receive 'struct dma_fence' objects to/from
 	  userspace via Sync File fds for synchronization between drivers via
 	  userspace components. It has been ported from Android.
 
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 210a10bfad2b..c33bf8863147 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,3 +1,3 @@
-obj-y := dma-buf.o fence.o reservation.o seqno-fence.o fence-array.o
+obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
 obj-$(CONFIG_SW_SYNC)		+= sw_sync.o sync_debug.o
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index cf04d249a6a4..e72e64484131 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -25,7 +25,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/dma-buf.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <linux/anon_inodes.h>
 #include <linux/export.h>
 #include <linux/debugfs.h>
@@ -124,7 +124,7 @@ static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
 	return base + offset;
 }
 
-static void dma_buf_poll_cb(struct fence *fence, struct fence_cb *cb)
+static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
 	struct dma_buf_poll_cb_t *dcb = (struct dma_buf_poll_cb_t *)cb;
 	unsigned long flags;
@@ -140,7 +140,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 	struct dma_buf *dmabuf;
 	struct reservation_object *resv;
 	struct reservation_object_list *fobj;
-	struct fence *fence_excl;
+	struct dma_fence *fence_excl;
 	unsigned long events;
 	unsigned shared_count, seq;
 
@@ -187,20 +187,20 @@ retry:
 		spin_unlock_irq(&dmabuf->poll.lock);
 
 		if (events & pevents) {
-			if (!fence_get_rcu(fence_excl)) {
+			if (!dma_fence_get_rcu(fence_excl)) {
 				/* force a recheck */
 				events &= ~pevents;
 				dma_buf_poll_cb(NULL, &dcb->cb);
-			} else if (!fence_add_callback(fence_excl, &dcb->cb,
-						       dma_buf_poll_cb)) {
+			} else if (!dma_fence_add_callback(fence_excl, &dcb->cb,
+							   dma_buf_poll_cb)) {
 				events &= ~pevents;
-				fence_put(fence_excl);
+				dma_fence_put(fence_excl);
 			} else {
 				/*
 				 * No callback queued, wake up any additional
 				 * waiters.
 				 */
-				fence_put(fence_excl);
+				dma_fence_put(fence_excl);
 				dma_buf_poll_cb(NULL, &dcb->cb);
 			}
 		}
@@ -222,9 +222,9 @@ retry:
 			goto out;
 
 		for (i = 0; i < shared_count; ++i) {
-			struct fence *fence = rcu_dereference(fobj->shared[i]);
+			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
 
-			if (!fence_get_rcu(fence)) {
+			if (!dma_fence_get_rcu(fence)) {
 				/*
 				 * fence refcount dropped to zero, this means
 				 * that fobj has been freed
@@ -235,13 +235,13 @@ retry:
 				dma_buf_poll_cb(NULL, &dcb->cb);
 				break;
 			}
-			if (!fence_add_callback(fence, &dcb->cb,
-						dma_buf_poll_cb)) {
-				fence_put(fence);
+			if (!dma_fence_add_callback(fence, &dcb->cb,
+						    dma_buf_poll_cb)) {
+				dma_fence_put(fence);
 				events &= ~POLLOUT;
 				break;
 			}
-			fence_put(fence);
+			dma_fence_put(fence);
 		}
 
 		/* No callback queued, wake up any additional waiters. */
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
new file mode 100644
index 000000000000..67eb7c8fb88c
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -0,0 +1,146 @@
+/*
+ * dma-fence-array: aggregate fences to be waited together
+ *
+ * Copyright (C) 2016 Collabora Ltd
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Authors:
+ *	Gustavo Padovan <gustavo@padovan.org>
+ *	Christian König <christian.koenig@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/dma-fence-array.h>
+
+static const char *dma_fence_array_get_driver_name(struct dma_fence *fence)
+{
+	return "dma_fence_array";
+}
+
+static const char *dma_fence_array_get_timeline_name(struct dma_fence *fence)
+{
+	return "unbound";
+}
+
+static void dma_fence_array_cb_func(struct dma_fence *f,
+				    struct dma_fence_cb *cb)
+{
+	struct dma_fence_array_cb *array_cb =
+		container_of(cb, struct dma_fence_array_cb, cb);
+	struct dma_fence_array *array = array_cb->array;
+
+	if (atomic_dec_and_test(&array->num_pending))
+		dma_fence_signal(&array->base);
+	dma_fence_put(&array->base);
+}
+
+static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
+{
+	struct dma_fence_array *array = to_dma_fence_array(fence);
+	struct dma_fence_array_cb *cb = (void *)(&array[1]);
+	unsigned i;
+
+	for (i = 0; i < array->num_fences; ++i) {
+		cb[i].array = array;
+		/*
+		 * As we may report that the fence is signaled before all
+		 * callbacks are complete, we need to take an additional
+		 * reference count on the array so that we do not free it too
+		 * early. The core fence handling will only hold the reference
+		 * until we signal the array as complete (but that is now
+		 * insufficient).
+		 */
+		dma_fence_get(&array->base);
+		if (dma_fence_add_callback(array->fences[i], &cb[i].cb,
+					   dma_fence_array_cb_func)) {
+			dma_fence_put(&array->base);
+			if (atomic_dec_and_test(&array->num_pending))
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static bool dma_fence_array_signaled(struct dma_fence *fence)
+{
+	struct dma_fence_array *array = to_dma_fence_array(fence);
+
+	return atomic_read(&array->num_pending) <= 0;
+}
+
+static void dma_fence_array_release(struct dma_fence *fence)
+{
+	struct dma_fence_array *array = to_dma_fence_array(fence);
+	unsigned i;
+
+	for (i = 0; i < array->num_fences; ++i)
+		dma_fence_put(array->fences[i]);
+
+	kfree(array->fences);
+	dma_fence_free(fence);
+}
+
+const struct dma_fence_ops dma_fence_array_ops = {
+	.get_driver_name = dma_fence_array_get_driver_name,
+	.get_timeline_name = dma_fence_array_get_timeline_name,
+	.enable_signaling = dma_fence_array_enable_signaling,
+	.signaled = dma_fence_array_signaled,
+	.wait = dma_fence_default_wait,
+	.release = dma_fence_array_release,
+};
+EXPORT_SYMBOL(dma_fence_array_ops);
+
+/**
+ * dma_fence_array_create - Create a custom fence array
+ * @num_fences:		[in]	number of fences to add in the array
+ * @fences:		[in]	array containing the fences
+ * @context:		[in]	fence context to use
+ * @seqno:		[in]	sequence number to use
+ * @signal_on_any:	[in]	signal on any fence in the array
+ *
+ * Allocate a dma_fence_array object and initialize the base fence with
+ * dma_fence_init().
+ * In case of error it returns NULL.
+ *
+ * The caller should allocate the fences array with num_fences size
+ * and fill it with the fences it wants to add to the object. Ownership of this
+ * array is taken and dma_fence_put() is used on each fence on release.
+ *
+ * If @signal_on_any is true the fence array signals if any fence in the array
+ * signals, otherwise it signals when all fences in the array signal.
+ */
+struct dma_fence_array *dma_fence_array_create(int num_fences,
+					       struct dma_fence **fences,
+					       u64 context, unsigned seqno,
+					       bool signal_on_any)
+{
+	struct dma_fence_array *array;
+	size_t size = sizeof(*array);
+
+	/* Allocate the callback structures behind the array. */
+	size += num_fences * sizeof(struct dma_fence_array_cb);
+	array = kzalloc(size, GFP_KERNEL);
+	if (!array)
+		return NULL;
+
+	spin_lock_init(&array->lock);
+	dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock,
+		       context, seqno);
+
+	array->num_fences = num_fences;
+	atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
+	array->fences = fences;
+
+	return array;
+}
+EXPORT_SYMBOL(dma_fence_array_create);
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
new file mode 100644
index 000000000000..3a7bf009c21c
--- /dev/null
+++ b/drivers/dma-buf/dma-fence.c
@@ -0,0 +1,537 @@
+/*
+ * Fence mechanism for dma-buf and to allow for asynchronous dma access
+ *
+ * Copyright (C) 2012 Canonical Ltd
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Authors:
+ * Rob Clark <robdclark@gmail.com>
+ * Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/atomic.h>
+#include <linux/dma-fence.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/dma_fence.h>
+
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_annotate_wait_on);
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
+
+/*
+ * fence context counter: each execution context should have its own
+ * fence context, this allows checking if fences belong to the same
+ * context or not. One device can have multiple separate contexts,
+ * and they're used if some engine can run independently of another.
+ */
+static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0);
+
+/**
+ * dma_fence_context_alloc - allocate an array of fence contexts
+ * @num:	[in]	amount of contexts to allocate
+ *
+ * This function will return the first index of the number of fences allocated.
+ * The fence context is used for setting fence->context to a unique number.
+ */
+u64 dma_fence_context_alloc(unsigned num)
+{
+	BUG_ON(!num);
+	return atomic64_add_return(num, &dma_fence_context_counter) - num;
+}
+EXPORT_SYMBOL(dma_fence_context_alloc);
+
+/**
+ * dma_fence_signal_locked - signal completion of a fence
+ * @fence: the fence to signal
+ *
+ * Signal completion for software callbacks on a fence, this will unblock
+ * dma_fence_wait() calls and run all the callbacks added with
+ * dma_fence_add_callback(). Can be called multiple times, but since a fence
+ * can only go from unsignaled to signaled state, it will only be effective
+ * the first time.
+ *
+ * Unlike dma_fence_signal, this function must be called with fence->lock held.
+ */
+int dma_fence_signal_locked(struct dma_fence *fence)
+{
+	struct dma_fence_cb *cur, *tmp;
+	int ret = 0;
+
+	lockdep_assert_held(fence->lock);
+
+	if (WARN_ON(!fence))
+		return -EINVAL;
+
+	if (!ktime_to_ns(fence->timestamp)) {
+		fence->timestamp = ktime_get();
+		smp_mb__before_atomic();
+	}
+
+	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+		ret = -EINVAL;
+
+		/*
+		 * we might have raced with the unlocked dma_fence_signal,
+		 * still run through all callbacks
+		 */
+	} else
+		trace_dma_fence_signaled(fence);
+
+	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+		list_del_init(&cur->node);
+		cur->func(fence, cur);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(dma_fence_signal_locked);
+
+/**
+ * dma_fence_signal - signal completion of a fence
+ * @fence: the fence to signal
+ *
+ * Signal completion for software callbacks on a fence, this will unblock
+ * dma_fence_wait() calls and run all the callbacks added with
+ * dma_fence_add_callback(). Can be called multiple times, but since a fence
+ * can only go from unsignaled to signaled state, it will only be effective
+ * the first time.
+ */
+int dma_fence_signal(struct dma_fence *fence)
+{
+	unsigned long flags;
+
+	if (!fence)
+		return -EINVAL;
+
+	if (!ktime_to_ns(fence->timestamp)) {
+		fence->timestamp = ktime_get();
+		smp_mb__before_atomic();
+	}
+
+	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return -EINVAL;
+
+	trace_dma_fence_signaled(fence);
+
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) {
+		struct dma_fence_cb *cur, *tmp;
+
+		spin_lock_irqsave(fence->lock, flags);
+		list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+			list_del_init(&cur->node);
+			cur->func(fence, cur);
+		}
+		spin_unlock_irqrestore(fence->lock, flags);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(dma_fence_signal);
+
+/**
+ * dma_fence_wait_timeout - sleep until the fence gets signaled
+ * or until timeout elapses
+ * @fence:	[in]	the fence to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
+ * remaining timeout in jiffies on success. Other error values may be
+ * returned on custom implementations.
+ *
+ * Performs a synchronous wait on this fence. It is assumed the caller
+ * directly or indirectly (buf-mgr between reservation and committing)
+ * holds a reference to the fence, otherwise the fence might be
+ * freed before return, resulting in undefined behavior.
+ */
+signed long
+dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
+{
+	signed long ret;
+
+	if (WARN_ON(timeout < 0))
+		return -EINVAL;
+
+	if (timeout == 0)
+		return dma_fence_is_signaled(fence);
+
+	trace_dma_fence_wait_start(fence);
+	ret = fence->ops->wait(fence, intr, timeout);
+	trace_dma_fence_wait_end(fence);
+	return ret;
+}
+EXPORT_SYMBOL(dma_fence_wait_timeout);
+
+void dma_fence_release(struct kref *kref)
+{
+	struct dma_fence *fence =
+		container_of(kref, struct dma_fence, refcount);
+
+	trace_dma_fence_destroy(fence);
+
+	BUG_ON(!list_empty(&fence->cb_list));
+
+	if (fence->ops->release)
+		fence->ops->release(fence);
+	else
+		dma_fence_free(fence);
+}
+EXPORT_SYMBOL(dma_fence_release);
+
+void dma_fence_free(struct dma_fence *fence)
+{
+	kfree_rcu(fence, rcu);
+}
+EXPORT_SYMBOL(dma_fence_free);
+
+/**
+ * dma_fence_enable_sw_signaling - enable signaling on fence
+ * @fence:	[in]	the fence to enable
+ *
+ * this will request for sw signaling to be enabled, to make the fence
+ * complete as soon as possible
+ */
+void dma_fence_enable_sw_signaling(struct dma_fence *fence)
+{
+	unsigned long flags;
+
+	if (!test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			      &fence->flags) &&
+	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+		trace_dma_fence_enable_signal(fence);
+
+		spin_lock_irqsave(fence->lock, flags);
+
+		if (!fence->ops->enable_signaling(fence))
+			dma_fence_signal_locked(fence);
+
+		spin_unlock_irqrestore(fence->lock, flags);
+	}
+}
+EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
+
+/**
+ * dma_fence_add_callback - add a callback to be called when the fence
+ * is signaled
+ * @fence:	[in]	the fence to wait on
+ * @cb:		[in]	the callback to register
+ * @func:	[in]	the function to call
+ *
+ * cb will be initialized by dma_fence_add_callback, no initialization
+ * by the caller is required. Any number of callbacks can be registered
+ * to a fence, but a callback can only be registered to one fence at a time.
+ *
+ * Note that the callback can be called from an atomic context.  If
+ * fence is already signaled, this function will return -ENOENT (and
+ * *not* call the callback)
+ *
+ * Add a software callback to the fence. Same restrictions apply to
+ * refcount as it does to dma_fence_wait, however the caller doesn't need to
+ * keep a refcount to fence afterwards: when software access is enabled,
+ * the creator of the fence is required to keep the fence alive until
+ * after it signals with dma_fence_signal. The callback itself can be called
+ * from irq context.
+ *
+ */
+int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
+			   dma_fence_func_t func)
+{
+	unsigned long flags;
+	int ret = 0;
+	bool was_set;
+
+	if (WARN_ON(!fence || !func))
+		return -EINVAL;
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+		INIT_LIST_HEAD(&cb->node);
+		return -ENOENT;
+	}
+
+	spin_lock_irqsave(fence->lock, flags);
+
+	was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+				   &fence->flags);
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		ret = -ENOENT;
+	else if (!was_set) {
+		trace_dma_fence_enable_signal(fence);
+
+		if (!fence->ops->enable_signaling(fence)) {
+			dma_fence_signal_locked(fence);
+			ret = -ENOENT;
+		}
+	}
+
+	if (!ret) {
+		cb->func = func;
+		list_add_tail(&cb->node, &fence->cb_list);
+	} else
+		INIT_LIST_HEAD(&cb->node);
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_fence_add_callback);
+
+/**
+ * dma_fence_remove_callback - remove a callback from the signaling list
+ * @fence:	[in]	the fence to wait on
+ * @cb:		[in]	the callback to remove
+ *
+ * Remove a previously queued callback from the fence. This function returns
+ * true if the callback is successfully removed, or false if the fence has
+ * already been signaled.
+ *
+ * *WARNING*:
+ * Cancelling a callback should only be done if you really know what you're
+ * doing, since deadlocks and race conditions could occur all too easily. For
+ * this reason, it should only ever be done on hardware lockup recovery,
+ * with a reference held to the fence.
+ */
+bool
+dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(fence->lock, flags);
+
+	ret = !list_empty(&cb->node);
+	if (ret)
+		list_del_init(&cb->node);
+
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_fence_remove_callback);
+
+struct default_wait_cb {
+	struct dma_fence_cb base;
+	struct task_struct *task;
+};
+
+static void
+dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct default_wait_cb *wait =
+		container_of(cb, struct default_wait_cb, base);
+
+	wake_up_state(wait->task, TASK_NORMAL);
+}
+
+/**
+ * dma_fence_default_wait - default sleep until the fence gets signaled
+ * or until timeout elapses
+ * @fence:	[in]	the fence to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
+ * remaining timeout in jiffies on success.
+ */
+signed long
+dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
+{
+	struct default_wait_cb cb;
+	unsigned long flags;
+	signed long ret = timeout;
+	bool was_set;
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return timeout;
+
+	spin_lock_irqsave(fence->lock, flags);
+
+	if (intr && signal_pending(current)) {
+		ret = -ERESTARTSYS;
+		goto out;
+	}
+
+	was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+				   &fence->flags);
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		goto out;
+
+	if (!was_set) {
+		trace_dma_fence_enable_signal(fence);
+
+		if (!fence->ops->enable_signaling(fence)) {
+			dma_fence_signal_locked(fence);
+			goto out;
+		}
+	}
+
+	cb.base.func = dma_fence_default_wait_cb;
+	cb.task = current;
+	list_add(&cb.base.node, &fence->cb_list);
+
+	while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) {
+		if (intr)
+			__set_current_state(TASK_INTERRUPTIBLE);
+		else
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_unlock_irqrestore(fence->lock, flags);
+
+		ret = schedule_timeout(ret);
+
+		spin_lock_irqsave(fence->lock, flags);
+		if (ret > 0 && intr && signal_pending(current))
+			ret = -ERESTARTSYS;
+	}
+
+	if (!list_empty(&cb.base.node))
+		list_del(&cb.base.node);
+	__set_current_state(TASK_RUNNING);
+
+out:
+	spin_unlock_irqrestore(fence->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(dma_fence_default_wait);
+
+static bool
+dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count)
+{
+	int i;
+
+	for (i = 0; i < count; ++i) {
+		struct dma_fence *fence = fences[i];
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * dma_fence_wait_any_timeout - sleep until any fence gets signaled
+ * or until timeout elapses
+ * @fences:	[in]	array of fences to wait on
+ * @count:	[in]	number of fences to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
+ * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
+ * on success.
+ *
+ * Synchronous waits for the first fence in the array to be signaled. The
+ * caller needs to hold a reference to all fences in the array, otherwise a
+ * fence might be freed before return, resulting in undefined behavior.
+ */
+signed long
+dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
+			   bool intr, signed long timeout)
+{
+	struct default_wait_cb *cb;
+	signed long ret = timeout;
+	unsigned i;
+
+	if (WARN_ON(!fences || !count || timeout < 0))
+		return -EINVAL;
+
+	if (timeout == 0) {
+		for (i = 0; i < count; ++i)
+			if (dma_fence_is_signaled(fences[i]))
+				return 1;
+
+		return 0;
+	}
+
+	cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL);
+	if (cb == NULL) {
+		ret = -ENOMEM;
+		goto err_free_cb;
+	}
+
+	for (i = 0; i < count; ++i) {
+		struct dma_fence *fence = fences[i];
+
+		if (fence->ops->wait != dma_fence_default_wait) {
+			ret = -EINVAL;
+			goto fence_rm_cb;
+		}
+
+		cb[i].task = current;
+		if (dma_fence_add_callback(fence, &cb[i].base,
+					   dma_fence_default_wait_cb)) {
+			/* This fence is already signaled */
+			goto fence_rm_cb;
+		}
+	}
+
+	while (ret > 0) {
+		if (intr)
+			set_current_state(TASK_INTERRUPTIBLE);
+		else
+			set_current_state(TASK_UNINTERRUPTIBLE);
+
+		if (dma_fence_test_signaled_any(fences, count))
+			break;
+
+		ret = schedule_timeout(ret);
+
+		if (ret > 0 && intr && signal_pending(current))
+			ret = -ERESTARTSYS;
+	}
+
+	__set_current_state(TASK_RUNNING);
+
+fence_rm_cb:
+	while (i-- > 0)
+		dma_fence_remove_callback(fences[i], &cb[i].base);
+
+err_free_cb:
+	kfree(cb);
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_fence_wait_any_timeout);
+
+/**
+ * dma_fence_init - Initialize a custom fence.
+ * @fence:	[in]	the fence to initialize
+ * @ops:	[in]	the dma_fence_ops for operations on this fence
+ * @lock:	[in]	the irqsafe spinlock to use for locking this fence
+ * @context:	[in]	the execution context this fence is run on
+ * @seqno:	[in]	a linear increasing sequence number for this context
+ *
+ * Initializes an allocated fence, the caller doesn't have to keep its
+ * refcount after committing with this fence, but it will need to hold a
+ * refcount again if dma_fence_ops.enable_signaling gets called. This can
+ * be used for other implementing other types of fence.
+ *
+ * context and seqno are used for easy comparison between fences, allowing
+ * to check which fence is later by simply using dma_fence_later.
+ */
+void
+dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
+	       spinlock_t *lock, u64 context, unsigned seqno)
+{
+	BUG_ON(!lock);
+	BUG_ON(!ops || !ops->wait || !ops->enable_signaling ||
+	       !ops->get_driver_name || !ops->get_timeline_name);
+
+	kref_init(&fence->refcount);
+	fence->ops = ops;
+	INIT_LIST_HEAD(&fence->cb_list);
+	fence->lock = lock;
+	fence->context = context;
+	fence->seqno = seqno;
+	fence->flags = 0UL;
+
+	trace_dma_fence_init(fence);
+}
+EXPORT_SYMBOL(dma_fence_init);
diff --git a/drivers/dma-buf/fence-array.c b/drivers/dma-buf/fence-array.c
deleted file mode 100644
index f1989fcaf354..000000000000
--- a/drivers/dma-buf/fence-array.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * fence-array: aggregate fences to be waited together
- *
- * Copyright (C) 2016 Collabora Ltd
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
- * Authors:
- *	Gustavo Padovan <gustavo@padovan.org>
- *	Christian König <christian.koenig@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/fence-array.h>
-
-static void fence_array_cb_func(struct fence *f, struct fence_cb *cb);
-
-static const char *fence_array_get_driver_name(struct fence *fence)
-{
-	return "fence_array";
-}
-
-static const char *fence_array_get_timeline_name(struct fence *fence)
-{
-	return "unbound";
-}
-
-static void fence_array_cb_func(struct fence *f, struct fence_cb *cb)
-{
-	struct fence_array_cb *array_cb =
-		container_of(cb, struct fence_array_cb, cb);
-	struct fence_array *array = array_cb->array;
-
-	if (atomic_dec_and_test(&array->num_pending))
-		fence_signal(&array->base);
-	fence_put(&array->base);
-}
-
-static bool fence_array_enable_signaling(struct fence *fence)
-{
-	struct fence_array *array = to_fence_array(fence);
-	struct fence_array_cb *cb = (void *)(&array[1]);
-	unsigned i;
-
-	for (i = 0; i < array->num_fences; ++i) {
-		cb[i].array = array;
-		/*
-		 * As we may report that the fence is signaled before all
-		 * callbacks are complete, we need to take an additional
-		 * reference count on the array so that we do not free it too
-		 * early. The core fence handling will only hold the reference
-		 * until we signal the array as complete (but that is now
-		 * insufficient).
-		 */
-		fence_get(&array->base);
-		if (fence_add_callback(array->fences[i], &cb[i].cb,
-				       fence_array_cb_func)) {
-			fence_put(&array->base);
-			if (atomic_dec_and_test(&array->num_pending))
-				return false;
-		}
-	}
-
-	return true;
-}
-
-static bool fence_array_signaled(struct fence *fence)
-{
-	struct fence_array *array = to_fence_array(fence);
-
-	return atomic_read(&array->num_pending) <= 0;
-}
-
-static void fence_array_release(struct fence *fence)
-{
-	struct fence_array *array = to_fence_array(fence);
-	unsigned i;
-
-	for (i = 0; i < array->num_fences; ++i)
-		fence_put(array->fences[i]);
-
-	kfree(array->fences);
-	fence_free(fence);
-}
-
-const struct fence_ops fence_array_ops = {
-	.get_driver_name = fence_array_get_driver_name,
-	.get_timeline_name = fence_array_get_timeline_name,
-	.enable_signaling = fence_array_enable_signaling,
-	.signaled = fence_array_signaled,
-	.wait = fence_default_wait,
-	.release = fence_array_release,
-};
-EXPORT_SYMBOL(fence_array_ops);
-
-/**
- * fence_array_create - Create a custom fence array
- * @num_fences:		[in]	number of fences to add in the array
- * @fences:		[in]	array containing the fences
- * @context:		[in]	fence context to use
- * @seqno:		[in]	sequence number to use
- * @signal_on_any:	[in]	signal on any fence in the array
- *
- * Allocate a fence_array object and initialize the base fence with fence_init().
- * In case of error it returns NULL.
- *
- * The caller should allocate the fences array with num_fences size
- * and fill it with the fences it wants to add to the object. Ownership of this
- * array is taken and fence_put() is used on each fence on release.
- *
- * If @signal_on_any is true the fence array signals if any fence in the array
- * signals, otherwise it signals when all fences in the array signal.
- */
-struct fence_array *fence_array_create(int num_fences, struct fence **fences,
-				       u64 context, unsigned seqno,
-				       bool signal_on_any)
-{
-	struct fence_array *array;
-	size_t size = sizeof(*array);
-
-	/* Allocate the callback structures behind the array. */
-	size += num_fences * sizeof(struct fence_array_cb);
-	array = kzalloc(size, GFP_KERNEL);
-	if (!array)
-		return NULL;
-
-	spin_lock_init(&array->lock);
-	fence_init(&array->base, &fence_array_ops, &array->lock,
-		   context, seqno);
-
-	array->num_fences = num_fences;
-	atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
-	array->fences = fences;
-
-	return array;
-}
-EXPORT_SYMBOL(fence_array_create);
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
deleted file mode 100644
index cc05dddc77a6..000000000000
--- a/drivers/dma-buf/fence.c
+++ /dev/null
@@ -1,534 +0,0 @@
-/*
- * Fence mechanism for dma-buf and to allow for asynchronous dma access
- *
- * Copyright (C) 2012 Canonical Ltd
- * Copyright (C) 2012 Texas Instruments
- *
- * Authors:
- * Rob Clark <robdclark@gmail.com>
- * Maarten Lankhorst <maarten.lankhorst@canonical.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/atomic.h>
-#include <linux/fence.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/fence.h>
-
-EXPORT_TRACEPOINT_SYMBOL(fence_annotate_wait_on);
-EXPORT_TRACEPOINT_SYMBOL(fence_emit);
-
-/*
- * fence context counter: each execution context should have its own
- * fence context, this allows checking if fences belong to the same
- * context or not. One device can have multiple separate contexts,
- * and they're used if some engine can run independently of another.
- */
-static atomic64_t fence_context_counter = ATOMIC64_INIT(0);
-
-/**
- * fence_context_alloc - allocate an array of fence contexts
- * @num:	[in]	amount of contexts to allocate
- *
- * This function will return the first index of the number of fences allocated.
- * The fence context is used for setting fence->context to a unique number.
- */
-u64 fence_context_alloc(unsigned num)
-{
-	BUG_ON(!num);
-	return atomic64_add_return(num, &fence_context_counter) - num;
-}
-EXPORT_SYMBOL(fence_context_alloc);
-
-/**
- * fence_signal_locked - signal completion of a fence
- * @fence: the fence to signal
- *
- * Signal completion for software callbacks on a fence, this will unblock
- * fence_wait() calls and run all the callbacks added with
- * fence_add_callback(). Can be called multiple times, but since a fence
- * can only go from unsignaled to signaled state, it will only be effective
- * the first time.
- *
- * Unlike fence_signal, this function must be called with fence->lock held.
- */
-int fence_signal_locked(struct fence *fence)
-{
-	struct fence_cb *cur, *tmp;
-	int ret = 0;
-
-	lockdep_assert_held(fence->lock);
-
-	if (WARN_ON(!fence))
-		return -EINVAL;
-
-	if (!ktime_to_ns(fence->timestamp)) {
-		fence->timestamp = ktime_get();
-		smp_mb__before_atomic();
-	}
-
-	if (test_and_set_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
-		ret = -EINVAL;
-
-		/*
-		 * we might have raced with the unlocked fence_signal,
-		 * still run through all callbacks
-		 */
-	} else
-		trace_fence_signaled(fence);
-
-	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
-		list_del_init(&cur->node);
-		cur->func(fence, cur);
-	}
-	return ret;
-}
-EXPORT_SYMBOL(fence_signal_locked);
-
-/**
- * fence_signal - signal completion of a fence
- * @fence: the fence to signal
- *
- * Signal completion for software callbacks on a fence, this will unblock
- * fence_wait() calls and run all the callbacks added with
- * fence_add_callback(). Can be called multiple times, but since a fence
- * can only go from unsignaled to signaled state, it will only be effective
- * the first time.
- */
-int fence_signal(struct fence *fence)
-{
-	unsigned long flags;
-
-	if (!fence)
-		return -EINVAL;
-
-	if (!ktime_to_ns(fence->timestamp)) {
-		fence->timestamp = ktime_get();
-		smp_mb__before_atomic();
-	}
-
-	if (test_and_set_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return -EINVAL;
-
-	trace_fence_signaled(fence);
-
-	if (test_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) {
-		struct fence_cb *cur, *tmp;
-
-		spin_lock_irqsave(fence->lock, flags);
-		list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
-			list_del_init(&cur->node);
-			cur->func(fence, cur);
-		}
-		spin_unlock_irqrestore(fence->lock, flags);
-	}
-	return 0;
-}
-EXPORT_SYMBOL(fence_signal);
-
-/**
- * fence_wait_timeout - sleep until the fence gets signaled
- * or until timeout elapses
- * @fence:	[in]	the fence to wait on
- * @intr:	[in]	if true, do an interruptible wait
- * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
- *
- * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
- * remaining timeout in jiffies on success. Other error values may be
- * returned on custom implementations.
- *
- * Performs a synchronous wait on this fence. It is assumed the caller
- * directly or indirectly (buf-mgr between reservation and committing)
- * holds a reference to the fence, otherwise the fence might be
- * freed before return, resulting in undefined behavior.
- */
-signed long
-fence_wait_timeout(struct fence *fence, bool intr, signed long timeout)
-{
-	signed long ret;
-
-	if (WARN_ON(timeout < 0))
-		return -EINVAL;
-
-	if (timeout == 0)
-		return fence_is_signaled(fence);
-
-	trace_fence_wait_start(fence);
-	ret = fence->ops->wait(fence, intr, timeout);
-	trace_fence_wait_end(fence);
-	return ret;
-}
-EXPORT_SYMBOL(fence_wait_timeout);
-
-void fence_release(struct kref *kref)
-{
-	struct fence *fence =
-			container_of(kref, struct fence, refcount);
-
-	trace_fence_destroy(fence);
-
-	BUG_ON(!list_empty(&fence->cb_list));
-
-	if (fence->ops->release)
-		fence->ops->release(fence);
-	else
-		fence_free(fence);
-}
-EXPORT_SYMBOL(fence_release);
-
-void fence_free(struct fence *fence)
-{
-	kfree_rcu(fence, rcu);
-}
-EXPORT_SYMBOL(fence_free);
-
-/**
- * fence_enable_sw_signaling - enable signaling on fence
- * @fence:	[in]	the fence to enable
- *
- * this will request for sw signaling to be enabled, to make the fence
- * complete as soon as possible
- */
-void fence_enable_sw_signaling(struct fence *fence)
-{
-	unsigned long flags;
-
-	if (!test_and_set_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags) &&
-	    !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
-		trace_fence_enable_signal(fence);
-
-		spin_lock_irqsave(fence->lock, flags);
-
-		if (!fence->ops->enable_signaling(fence))
-			fence_signal_locked(fence);
-
-		spin_unlock_irqrestore(fence->lock, flags);
-	}
-}
-EXPORT_SYMBOL(fence_enable_sw_signaling);
-
-/**
- * fence_add_callback - add a callback to be called when the fence
- * is signaled
- * @fence:	[in]	the fence to wait on
- * @cb:		[in]	the callback to register
- * @func:	[in]	the function to call
- *
- * cb will be initialized by fence_add_callback, no initialization
- * by the caller is required. Any number of callbacks can be registered
- * to a fence, but a callback can only be registered to one fence at a time.
- *
- * Note that the callback can be called from an atomic context.  If
- * fence is already signaled, this function will return -ENOENT (and
- * *not* call the callback)
- *
- * Add a software callback to the fence. Same restrictions apply to
- * refcount as it does to fence_wait, however the caller doesn't need to
- * keep a refcount to fence afterwards: when software access is enabled,
- * the creator of the fence is required to keep the fence alive until
- * after it signals with fence_signal. The callback itself can be called
- * from irq context.
- *
- */
-int fence_add_callback(struct fence *fence, struct fence_cb *cb,
-		       fence_func_t func)
-{
-	unsigned long flags;
-	int ret = 0;
-	bool was_set;
-
-	if (WARN_ON(!fence || !func))
-		return -EINVAL;
-
-	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
-		INIT_LIST_HEAD(&cb->node);
-		return -ENOENT;
-	}
-
-	spin_lock_irqsave(fence->lock, flags);
-
-	was_set = test_and_set_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags);
-
-	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		ret = -ENOENT;
-	else if (!was_set) {
-		trace_fence_enable_signal(fence);
-
-		if (!fence->ops->enable_signaling(fence)) {
-			fence_signal_locked(fence);
-			ret = -ENOENT;
-		}
-	}
-
-	if (!ret) {
-		cb->func = func;
-		list_add_tail(&cb->node, &fence->cb_list);
-	} else
-		INIT_LIST_HEAD(&cb->node);
-	spin_unlock_irqrestore(fence->lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL(fence_add_callback);
-
-/**
- * fence_remove_callback - remove a callback from the signaling list
- * @fence:	[in]	the fence to wait on
- * @cb:		[in]	the callback to remove
- *
- * Remove a previously queued callback from the fence. This function returns
- * true if the callback is successfully removed, or false if the fence has
- * already been signaled.
- *
- * *WARNING*:
- * Cancelling a callback should only be done if you really know what you're
- * doing, since deadlocks and race conditions could occur all too easily. For
- * this reason, it should only ever be done on hardware lockup recovery,
- * with a reference held to the fence.
- */
-bool
-fence_remove_callback(struct fence *fence, struct fence_cb *cb)
-{
-	unsigned long flags;
-	bool ret;
-
-	spin_lock_irqsave(fence->lock, flags);
-
-	ret = !list_empty(&cb->node);
-	if (ret)
-		list_del_init(&cb->node);
-
-	spin_unlock_irqrestore(fence->lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL(fence_remove_callback);
-
-struct default_wait_cb {
-	struct fence_cb base;
-	struct task_struct *task;
-};
-
-static void
-fence_default_wait_cb(struct fence *fence, struct fence_cb *cb)
-{
-	struct default_wait_cb *wait =
-		container_of(cb, struct default_wait_cb, base);
-
-	wake_up_state(wait->task, TASK_NORMAL);
-}
-
-/**
- * fence_default_wait - default sleep until the fence gets signaled
- * or until timeout elapses
- * @fence:	[in]	the fence to wait on
- * @intr:	[in]	if true, do an interruptible wait
- * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
- *
- * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
- * remaining timeout in jiffies on success.
- */
-signed long
-fence_default_wait(struct fence *fence, bool intr, signed long timeout)
-{
-	struct default_wait_cb cb;
-	unsigned long flags;
-	signed long ret = timeout;
-	bool was_set;
-
-	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return timeout;
-
-	spin_lock_irqsave(fence->lock, flags);
-
-	if (intr && signal_pending(current)) {
-		ret = -ERESTARTSYS;
-		goto out;
-	}
-
-	was_set = test_and_set_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags);
-
-	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		goto out;
-
-	if (!was_set) {
-		trace_fence_enable_signal(fence);
-
-		if (!fence->ops->enable_signaling(fence)) {
-			fence_signal_locked(fence);
-			goto out;
-		}
-	}
-
-	cb.base.func = fence_default_wait_cb;
-	cb.task = current;
-	list_add(&cb.base.node, &fence->cb_list);
-
-	while (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) {
-		if (intr)
-			__set_current_state(TASK_INTERRUPTIBLE);
-		else
-			__set_current_state(TASK_UNINTERRUPTIBLE);
-		spin_unlock_irqrestore(fence->lock, flags);
-
-		ret = schedule_timeout(ret);
-
-		spin_lock_irqsave(fence->lock, flags);
-		if (ret > 0 && intr && signal_pending(current))
-			ret = -ERESTARTSYS;
-	}
-
-	if (!list_empty(&cb.base.node))
-		list_del(&cb.base.node);
-	__set_current_state(TASK_RUNNING);
-
-out:
-	spin_unlock_irqrestore(fence->lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(fence_default_wait);
-
-static bool
-fence_test_signaled_any(struct fence **fences, uint32_t count)
-{
-	int i;
-
-	for (i = 0; i < count; ++i) {
-		struct fence *fence = fences[i];
-		if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-			return true;
-	}
-	return false;
-}
-
-/**
- * fence_wait_any_timeout - sleep until any fence gets signaled
- * or until timeout elapses
- * @fences:	[in]	array of fences to wait on
- * @count:	[in]	number of fences to wait on
- * @intr:	[in]	if true, do an interruptible wait
- * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
- *
- * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
- * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
- * on success.
- *
- * Synchronous waits for the first fence in the array to be signaled. The
- * caller needs to hold a reference to all fences in the array, otherwise a
- * fence might be freed before return, resulting in undefined behavior.
- */
-signed long
-fence_wait_any_timeout(struct fence **fences, uint32_t count,
-		       bool intr, signed long timeout)
-{
-	struct default_wait_cb *cb;
-	signed long ret = timeout;
-	unsigned i;
-
-	if (WARN_ON(!fences || !count || timeout < 0))
-		return -EINVAL;
-
-	if (timeout == 0) {
-		for (i = 0; i < count; ++i)
-			if (fence_is_signaled(fences[i]))
-				return 1;
-
-		return 0;
-	}
-
-	cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL);
-	if (cb == NULL) {
-		ret = -ENOMEM;
-		goto err_free_cb;
-	}
-
-	for (i = 0; i < count; ++i) {
-		struct fence *fence = fences[i];
-
-		if (fence->ops->wait != fence_default_wait) {
-			ret = -EINVAL;
-			goto fence_rm_cb;
-		}
-
-		cb[i].task = current;
-		if (fence_add_callback(fence, &cb[i].base,
-				       fence_default_wait_cb)) {
-			/* This fence is already signaled */
-			goto fence_rm_cb;
-		}
-	}
-
-	while (ret > 0) {
-		if (intr)
-			set_current_state(TASK_INTERRUPTIBLE);
-		else
-			set_current_state(TASK_UNINTERRUPTIBLE);
-
-		if (fence_test_signaled_any(fences, count))
-			break;
-
-		ret = schedule_timeout(ret);
-
-		if (ret > 0 && intr && signal_pending(current))
-			ret = -ERESTARTSYS;
-	}
-
-	__set_current_state(TASK_RUNNING);
-
-fence_rm_cb:
-	while (i-- > 0)
-		fence_remove_callback(fences[i], &cb[i].base);
-
-err_free_cb:
-	kfree(cb);
-
-	return ret;
-}
-EXPORT_SYMBOL(fence_wait_any_timeout);
-
-/**
- * fence_init - Initialize a custom fence.
- * @fence:	[in]	the fence to initialize
- * @ops:	[in]	the fence_ops for operations on this fence
- * @lock:	[in]	the irqsafe spinlock to use for locking this fence
- * @context:	[in]	the execution context this fence is run on
- * @seqno:	[in]	a linear increasing sequence number for this context
- *
- * Initializes an allocated fence, the caller doesn't have to keep its
- * refcount after committing with this fence, but it will need to hold a
- * refcount again if fence_ops.enable_signaling gets called. This can
- * be used for other implementing other types of fence.
- *
- * context and seqno are used for easy comparison between fences, allowing
- * to check which fence is later by simply using fence_later.
- */
-void
-fence_init(struct fence *fence, const struct fence_ops *ops,
-	     spinlock_t *lock, u64 context, unsigned seqno)
-{
-	BUG_ON(!lock);
-	BUG_ON(!ops || !ops->wait || !ops->enable_signaling ||
-	       !ops->get_driver_name || !ops->get_timeline_name);
-
-	kref_init(&fence->refcount);
-	fence->ops = ops;
-	INIT_LIST_HEAD(&fence->cb_list);
-	fence->lock = lock;
-	fence->context = context;
-	fence->seqno = seqno;
-	fence->flags = 0UL;
-
-	trace_fence_init(fence);
-}
-EXPORT_SYMBOL(fence_init);
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 82de59f7cbbd..7ed56f3edfb7 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -102,17 +102,17 @@ EXPORT_SYMBOL(reservation_object_reserve_shared);
 static void
 reservation_object_add_shared_inplace(struct reservation_object *obj,
 				      struct reservation_object_list *fobj,
-				      struct fence *fence)
+				      struct dma_fence *fence)
 {
 	u32 i;
 
-	fence_get(fence);
+	dma_fence_get(fence);
 
 	preempt_disable();
 	write_seqcount_begin(&obj->seq);
 
 	for (i = 0; i < fobj->shared_count; ++i) {
-		struct fence *old_fence;
+		struct dma_fence *old_fence;
 
 		old_fence = rcu_dereference_protected(fobj->shared[i],
 						reservation_object_held(obj));
@@ -123,7 +123,7 @@ reservation_object_add_shared_inplace(struct reservation_object *obj,
 			write_seqcount_end(&obj->seq);
 			preempt_enable();
 
-			fence_put(old_fence);
+			dma_fence_put(old_fence);
 			return;
 		}
 	}
@@ -143,12 +143,12 @@ static void
 reservation_object_add_shared_replace(struct reservation_object *obj,
 				      struct reservation_object_list *old,
 				      struct reservation_object_list *fobj,
-				      struct fence *fence)
+				      struct dma_fence *fence)
 {
 	unsigned i;
-	struct fence *old_fence = NULL;
+	struct dma_fence *old_fence = NULL;
 
-	fence_get(fence);
+	dma_fence_get(fence);
 
 	if (!old) {
 		RCU_INIT_POINTER(fobj->shared[0], fence);
@@ -165,7 +165,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj,
 	fobj->shared_count = old->shared_count;
 
 	for (i = 0; i < old->shared_count; ++i) {
-		struct fence *check;
+		struct dma_fence *check;
 
 		check = rcu_dereference_protected(old->shared[i],
 						reservation_object_held(obj));
@@ -196,7 +196,7 @@ done:
 		kfree_rcu(old, rcu);
 
 	if (old_fence)
-		fence_put(old_fence);
+		dma_fence_put(old_fence);
 }
 
 /**
@@ -208,7 +208,7 @@ done:
  * reservation_object_reserve_shared() has been called.
  */
 void reservation_object_add_shared_fence(struct reservation_object *obj,
-					 struct fence *fence)
+					 struct dma_fence *fence)
 {
 	struct reservation_object_list *old, *fobj = obj->staged;
 
@@ -231,9 +231,9 @@ EXPORT_SYMBOL(reservation_object_add_shared_fence);
  * Add a fence to the exclusive slot.  The obj->lock must be held.
  */
 void reservation_object_add_excl_fence(struct reservation_object *obj,
-				       struct fence *fence)
+				       struct dma_fence *fence)
 {
-	struct fence *old_fence = reservation_object_get_excl(obj);
+	struct dma_fence *old_fence = reservation_object_get_excl(obj);
 	struct reservation_object_list *old;
 	u32 i = 0;
 
@@ -242,7 +242,7 @@ void reservation_object_add_excl_fence(struct reservation_object *obj,
 		i = old->shared_count;
 
 	if (fence)
-		fence_get(fence);
+		dma_fence_get(fence);
 
 	preempt_disable();
 	write_seqcount_begin(&obj->seq);
@@ -255,11 +255,11 @@ void reservation_object_add_excl_fence(struct reservation_object *obj,
 
 	/* inplace update, no shared fences */
 	while (i--)
-		fence_put(rcu_dereference_protected(old->shared[i],
+		dma_fence_put(rcu_dereference_protected(old->shared[i],
 						reservation_object_held(obj)));
 
 	if (old_fence)
-		fence_put(old_fence);
+		dma_fence_put(old_fence);
 }
 EXPORT_SYMBOL(reservation_object_add_excl_fence);
 
@@ -276,12 +276,12 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
  * Zero or -errno
  */
 int reservation_object_get_fences_rcu(struct reservation_object *obj,
-				      struct fence **pfence_excl,
+				      struct dma_fence **pfence_excl,
 				      unsigned *pshared_count,
-				      struct fence ***pshared)
+				      struct dma_fence ***pshared)
 {
-	struct fence **shared = NULL;
-	struct fence *fence_excl;
+	struct dma_fence **shared = NULL;
+	struct dma_fence *fence_excl;
 	unsigned int shared_count;
 	int ret = 1;
 
@@ -296,12 +296,12 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
 		seq = read_seqcount_begin(&obj->seq);
 
 		fence_excl = rcu_dereference(obj->fence_excl);
-		if (fence_excl && !fence_get_rcu(fence_excl))
+		if (fence_excl && !dma_fence_get_rcu(fence_excl))
 			goto unlock;
 
 		fobj = rcu_dereference(obj->fence);
 		if (fobj) {
-			struct fence **nshared;
+			struct dma_fence **nshared;
 			size_t sz = sizeof(*shared) * fobj->shared_max;
 
 			nshared = krealloc(shared, sz,
@@ -322,15 +322,15 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
 
 			for (i = 0; i < shared_count; ++i) {
 				shared[i] = rcu_dereference(fobj->shared[i]);
-				if (!fence_get_rcu(shared[i]))
+				if (!dma_fence_get_rcu(shared[i]))
 					break;
 			}
 		}
 
 		if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) {
 			while (i--)
-				fence_put(shared[i]);
-			fence_put(fence_excl);
+				dma_fence_put(shared[i]);
+			dma_fence_put(fence_excl);
 			goto unlock;
 		}
 
@@ -368,7 +368,7 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
 					 bool wait_all, bool intr,
 					 unsigned long timeout)
 {
-	struct fence *fence;
+	struct dma_fence *fence;
 	unsigned seq, shared_count, i = 0;
 	long ret = timeout;
 
@@ -389,16 +389,17 @@ retry:
 			shared_count = fobj->shared_count;
 
 		for (i = 0; i < shared_count; ++i) {
-			struct fence *lfence = rcu_dereference(fobj->shared[i]);
+			struct dma_fence *lfence = rcu_dereference(fobj->shared[i]);
 
-			if (test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags))
+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				     &lfence->flags))
 				continue;
 
-			if (!fence_get_rcu(lfence))
+			if (!dma_fence_get_rcu(lfence))
 				goto unlock_retry;
 
-			if (fence_is_signaled(lfence)) {
-				fence_put(lfence);
+			if (dma_fence_is_signaled(lfence)) {
+				dma_fence_put(lfence);
 				continue;
 			}
 
@@ -408,15 +409,16 @@ retry:
 	}
 
 	if (!shared_count) {
-		struct fence *fence_excl = rcu_dereference(obj->fence_excl);
+		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
 
 		if (fence_excl &&
-		    !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence_excl->flags)) {
-			if (!fence_get_rcu(fence_excl))
+		    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			      &fence_excl->flags)) {
+			if (!dma_fence_get_rcu(fence_excl))
 				goto unlock_retry;
 
-			if (fence_is_signaled(fence_excl))
-				fence_put(fence_excl);
+			if (dma_fence_is_signaled(fence_excl))
+				dma_fence_put(fence_excl);
 			else
 				fence = fence_excl;
 		}
@@ -425,12 +427,12 @@ retry:
 	rcu_read_unlock();
 	if (fence) {
 		if (read_seqcount_retry(&obj->seq, seq)) {
-			fence_put(fence);
+			dma_fence_put(fence);
 			goto retry;
 		}
 
-		ret = fence_wait_timeout(fence, intr, ret);
-		fence_put(fence);
+		ret = dma_fence_wait_timeout(fence, intr, ret);
+		dma_fence_put(fence);
 		if (ret > 0 && wait_all && (i + 1 < shared_count))
 			goto retry;
 	}
@@ -444,18 +446,18 @@ EXPORT_SYMBOL_GPL(reservation_object_wait_timeout_rcu);
 
 
 static inline int
-reservation_object_test_signaled_single(struct fence *passed_fence)
+reservation_object_test_signaled_single(struct dma_fence *passed_fence)
 {
-	struct fence *fence, *lfence = passed_fence;
+	struct dma_fence *fence, *lfence = passed_fence;
 	int ret = 1;
 
-	if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) {
-		fence = fence_get_rcu(lfence);
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) {
+		fence = dma_fence_get_rcu(lfence);
 		if (!fence)
 			return -1;
 
-		ret = !!fence_is_signaled(fence);
-		fence_put(fence);
+		ret = !!dma_fence_is_signaled(fence);
+		dma_fence_put(fence);
 	}
 	return ret;
 }
@@ -492,7 +494,7 @@ retry:
 			shared_count = fobj->shared_count;
 
 		for (i = 0; i < shared_count; ++i) {
-			struct fence *fence = rcu_dereference(fobj->shared[i]);
+			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
 
 			ret = reservation_object_test_signaled_single(fence);
 			if (ret < 0)
@@ -506,7 +508,7 @@ retry:
 	}
 
 	if (!shared_count) {
-		struct fence *fence_excl = rcu_dereference(obj->fence_excl);
+		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
 
 		if (fence_excl) {
 			ret = reservation_object_test_signaled_single(
diff --git a/drivers/dma-buf/seqno-fence.c b/drivers/dma-buf/seqno-fence.c
index 71127f8f1626..f47112a64763 100644
--- a/drivers/dma-buf/seqno-fence.c
+++ b/drivers/dma-buf/seqno-fence.c
@@ -21,35 +21,35 @@
 #include <linux/export.h>
 #include <linux/seqno-fence.h>
 
-static const char *seqno_fence_get_driver_name(struct fence *fence)
+static const char *seqno_fence_get_driver_name(struct dma_fence *fence)
 {
 	struct seqno_fence *seqno_fence = to_seqno_fence(fence);
 
 	return seqno_fence->ops->get_driver_name(fence);
 }
 
-static const char *seqno_fence_get_timeline_name(struct fence *fence)
+static const char *seqno_fence_get_timeline_name(struct dma_fence *fence)
 {
 	struct seqno_fence *seqno_fence = to_seqno_fence(fence);
 
 	return seqno_fence->ops->get_timeline_name(fence);
 }
 
-static bool seqno_enable_signaling(struct fence *fence)
+static bool seqno_enable_signaling(struct dma_fence *fence)
 {
 	struct seqno_fence *seqno_fence = to_seqno_fence(fence);
 
 	return seqno_fence->ops->enable_signaling(fence);
 }
 
-static bool seqno_signaled(struct fence *fence)
+static bool seqno_signaled(struct dma_fence *fence)
 {
 	struct seqno_fence *seqno_fence = to_seqno_fence(fence);
 
 	return seqno_fence->ops->signaled && seqno_fence->ops->signaled(fence);
 }
 
-static void seqno_release(struct fence *fence)
+static void seqno_release(struct dma_fence *fence)
 {
 	struct seqno_fence *f = to_seqno_fence(fence);
 
@@ -57,18 +57,18 @@ static void seqno_release(struct fence *fence)
 	if (f->ops->release)
 		f->ops->release(fence);
 	else
-		fence_free(&f->base);
+		dma_fence_free(&f->base);
 }
 
-static signed long seqno_wait(struct fence *fence, bool intr,
-				signed long timeout)
+static signed long seqno_wait(struct dma_fence *fence, bool intr,
+			      signed long timeout)
 {
 	struct seqno_fence *f = to_seqno_fence(fence);
 
 	return f->ops->wait(fence, intr, timeout);
 }
 
-const struct fence_ops seqno_fence_ops = {
+const struct dma_fence_ops seqno_fence_ops = {
 	.get_driver_name = seqno_fence_get_driver_name,
 	.get_timeline_name = seqno_fence_get_timeline_name,
 	.enable_signaling = seqno_enable_signaling,
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 62e8e6dc7953..82e0ca4dd0c1 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -68,9 +68,9 @@ struct sw_sync_create_fence_data {
 
 #define SW_SYNC_IOC_INC			_IOW(SW_SYNC_IOC_MAGIC, 1, __u32)
 
-static const struct fence_ops timeline_fence_ops;
+static const struct dma_fence_ops timeline_fence_ops;
 
-static inline struct sync_pt *fence_to_sync_pt(struct fence *fence)
+static inline struct sync_pt *dma_fence_to_sync_pt(struct dma_fence *fence)
 {
 	if (fence->ops != &timeline_fence_ops)
 		return NULL;
@@ -93,7 +93,7 @@ struct sync_timeline *sync_timeline_create(const char *name)
 		return NULL;
 
 	kref_init(&obj->kref);
-	obj->context = fence_context_alloc(1);
+	obj->context = dma_fence_context_alloc(1);
 	strlcpy(obj->name, name, sizeof(obj->name));
 
 	INIT_LIST_HEAD(&obj->child_list_head);
@@ -146,7 +146,7 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 
 	list_for_each_entry_safe(pt, next, &obj->active_list_head,
 				 active_list) {
-		if (fence_is_signaled_locked(&pt->base))
+		if (dma_fence_is_signaled_locked(&pt->base))
 			list_del_init(&pt->active_list);
 	}
 
@@ -179,30 +179,30 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size,
 
 	spin_lock_irqsave(&obj->child_list_lock, flags);
 	sync_timeline_get(obj);
-	fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock,
-		   obj->context, value);
+	dma_fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock,
+		       obj->context, value);
 	list_add_tail(&pt->child_list, &obj->child_list_head);
 	INIT_LIST_HEAD(&pt->active_list);
 	spin_unlock_irqrestore(&obj->child_list_lock, flags);
 	return pt;
 }
 
-static const char *timeline_fence_get_driver_name(struct fence *fence)
+static const char *timeline_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "sw_sync";
 }
 
-static const char *timeline_fence_get_timeline_name(struct fence *fence)
+static const char *timeline_fence_get_timeline_name(struct dma_fence *fence)
 {
-	struct sync_timeline *parent = fence_parent(fence);
+	struct sync_timeline *parent = dma_fence_parent(fence);
 
 	return parent->name;
 }
 
-static void timeline_fence_release(struct fence *fence)
+static void timeline_fence_release(struct dma_fence *fence)
 {
-	struct sync_pt *pt = fence_to_sync_pt(fence);
-	struct sync_timeline *parent = fence_parent(fence);
+	struct sync_pt *pt = dma_fence_to_sync_pt(fence);
+	struct sync_timeline *parent = dma_fence_parent(fence);
 	unsigned long flags;
 
 	spin_lock_irqsave(fence->lock, flags);
@@ -212,20 +212,20 @@ static void timeline_fence_release(struct fence *fence)
 	spin_unlock_irqrestore(fence->lock, flags);
 
 	sync_timeline_put(parent);
-	fence_free(fence);
+	dma_fence_free(fence);
 }
 
-static bool timeline_fence_signaled(struct fence *fence)
+static bool timeline_fence_signaled(struct dma_fence *fence)
 {
-	struct sync_timeline *parent = fence_parent(fence);
+	struct sync_timeline *parent = dma_fence_parent(fence);
 
 	return (fence->seqno > parent->value) ? false : true;
 }
 
-static bool timeline_fence_enable_signaling(struct fence *fence)
+static bool timeline_fence_enable_signaling(struct dma_fence *fence)
 {
-	struct sync_pt *pt = fence_to_sync_pt(fence);
-	struct sync_timeline *parent = fence_parent(fence);
+	struct sync_pt *pt = dma_fence_to_sync_pt(fence);
+	struct sync_timeline *parent = dma_fence_parent(fence);
 
 	if (timeline_fence_signaled(fence))
 		return false;
@@ -234,26 +234,26 @@ static bool timeline_fence_enable_signaling(struct fence *fence)
 	return true;
 }
 
-static void timeline_fence_value_str(struct fence *fence,
+static void timeline_fence_value_str(struct dma_fence *fence,
 				    char *str, int size)
 {
 	snprintf(str, size, "%d", fence->seqno);
 }
 
-static void timeline_fence_timeline_value_str(struct fence *fence,
+static void timeline_fence_timeline_value_str(struct dma_fence *fence,
 					     char *str, int size)
 {
-	struct sync_timeline *parent = fence_parent(fence);
+	struct sync_timeline *parent = dma_fence_parent(fence);
 
 	snprintf(str, size, "%d", parent->value);
 }
 
-static const struct fence_ops timeline_fence_ops = {
+static const struct dma_fence_ops timeline_fence_ops = {
 	.get_driver_name = timeline_fence_get_driver_name,
 	.get_timeline_name = timeline_fence_get_timeline_name,
 	.enable_signaling = timeline_fence_enable_signaling,
 	.signaled = timeline_fence_signaled,
-	.wait = fence_default_wait,
+	.wait = dma_fence_default_wait,
 	.release = timeline_fence_release,
 	.fence_value_str = timeline_fence_value_str,
 	.timeline_value_str = timeline_fence_timeline_value_str,
@@ -317,7 +317,7 @@ static long sw_sync_ioctl_create_fence(struct sync_timeline *obj,
 
 	sync_file = sync_file_create(&pt->base);
 	if (!sync_file) {
-		fence_put(&pt->base);
+		dma_fence_put(&pt->base);
 		err = -ENOMEM;
 		goto err;
 	}
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index 2dd4c3db6caa..48b20e34fb6d 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -71,12 +71,13 @@ static const char *sync_status_str(int status)
 	return "error";
 }
 
-static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show)
+static void sync_print_fence(struct seq_file *s,
+			     struct dma_fence *fence, bool show)
 {
 	int status = 1;
-	struct sync_timeline *parent = fence_parent(fence);
+	struct sync_timeline *parent = dma_fence_parent(fence);
 
-	if (fence_is_signaled_locked(fence))
+	if (dma_fence_is_signaled_locked(fence))
 		status = fence->status;
 
 	seq_printf(s, "  %s%sfence %s",
@@ -135,10 +136,10 @@ static void sync_print_sync_file(struct seq_file *s,
 	int i;
 
 	seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
-		   sync_status_str(!fence_is_signaled(sync_file->fence)));
+		   sync_status_str(!dma_fence_is_signaled(sync_file->fence)));
 
-	if (fence_is_array(sync_file->fence)) {
-		struct fence_array *array = to_fence_array(sync_file->fence);
+	if (dma_fence_is_array(sync_file->fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(sync_file->fence);
 
 		for (i = 0; i < array->num_fences; ++i)
 			sync_print_fence(s, array->fences[i], true);
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
index d269aa6783aa..26fe8b9907b3 100644
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@@ -15,7 +15,7 @@
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 
 #include <linux/sync_file.h>
 #include <uapi/linux/sync_file.h>
@@ -45,10 +45,9 @@ struct sync_timeline {
 	struct list_head	sync_timeline_list;
 };
 
-static inline struct sync_timeline *fence_parent(struct fence *fence)
+static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence)
 {
-	return container_of(fence->lock, struct sync_timeline,
-			    child_list_lock);
+	return container_of(fence->lock, struct sync_timeline, child_list_lock);
 }
 
 /**
@@ -58,7 +57,7 @@ static inline struct sync_timeline *fence_parent(struct fence *fence)
  * @active_list: sync timeline active child's list
  */
 struct sync_pt {
-	struct fence base;
+	struct dma_fence base;
 	struct list_head child_list;
 	struct list_head active_list;
 };
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 235f8ac113cc..69d8ef98d34c 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -54,7 +54,7 @@ err:
 	return NULL;
 }
 
-static void fence_check_cb_func(struct fence *f, struct fence_cb *cb)
+static void fence_check_cb_func(struct dma_fence *f, struct dma_fence_cb *cb)
 {
 	struct sync_file *sync_file;
 
@@ -71,7 +71,7 @@ static void fence_check_cb_func(struct fence *f, struct fence_cb *cb)
  * takes ownership of @fence. The sync_file can be released with
  * fput(sync_file->file). Returns the sync_file or NULL in case of error.
  */
-struct sync_file *sync_file_create(struct fence *fence)
+struct sync_file *sync_file_create(struct dma_fence *fence)
 {
 	struct sync_file *sync_file;
 
@@ -79,7 +79,7 @@ struct sync_file *sync_file_create(struct fence *fence)
 	if (!sync_file)
 		return NULL;
 
-	sync_file->fence = fence_get(fence);
+	sync_file->fence = dma_fence_get(fence);
 
 	snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
 		 fence->ops->get_driver_name(fence),
@@ -121,16 +121,16 @@ err:
  * Ensures @fd references a valid sync_file and returns a fence that
  * represents all fence in the sync_file. On error NULL is returned.
  */
-struct fence *sync_file_get_fence(int fd)
+struct dma_fence *sync_file_get_fence(int fd)
 {
 	struct sync_file *sync_file;
-	struct fence *fence;
+	struct dma_fence *fence;
 
 	sync_file = sync_file_fdget(fd);
 	if (!sync_file)
 		return NULL;
 
-	fence = fence_get(sync_file->fence);
+	fence = dma_fence_get(sync_file->fence);
 	fput(sync_file->file);
 
 	return fence;
@@ -138,22 +138,23 @@ struct fence *sync_file_get_fence(int fd)
 EXPORT_SYMBOL(sync_file_get_fence);
 
 static int sync_file_set_fence(struct sync_file *sync_file,
-			       struct fence **fences, int num_fences)
+			       struct dma_fence **fences, int num_fences)
 {
-	struct fence_array *array;
+	struct dma_fence_array *array;
 
 	/*
 	 * The reference for the fences in the new sync_file and held
 	 * in add_fence() during the merge procedure, so for num_fences == 1
 	 * we already own a new reference to the fence. For num_fence > 1
-	 * we own the reference of the fence_array creation.
+	 * we own the reference of the dma_fence_array creation.
 	 */
 	if (num_fences == 1) {
 		sync_file->fence = fences[0];
 		kfree(fences);
 	} else {
-		array = fence_array_create(num_fences, fences,
-					   fence_context_alloc(1), 1, false);
+		array = dma_fence_array_create(num_fences, fences,
+					       dma_fence_context_alloc(1),
+					       1, false);
 		if (!array)
 			return -ENOMEM;
 
@@ -163,10 +164,11 @@ static int sync_file_set_fence(struct sync_file *sync_file,
 	return 0;
 }
 
-static struct fence **get_fences(struct sync_file *sync_file, int *num_fences)
+static struct dma_fence **get_fences(struct sync_file *sync_file,
+				     int *num_fences)
 {
-	if (fence_is_array(sync_file->fence)) {
-		struct fence_array *array = to_fence_array(sync_file->fence);
+	if (dma_fence_is_array(sync_file->fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(sync_file->fence);
 
 		*num_fences = array->num_fences;
 		return array->fences;
@@ -176,12 +178,13 @@ static struct fence **get_fences(struct sync_file *sync_file, int *num_fences)
 	return &sync_file->fence;
 }
 
-static void add_fence(struct fence **fences, int *i, struct fence *fence)
+static void add_fence(struct dma_fence **fences,
+		      int *i, struct dma_fence *fence)
 {
 	fences[*i] = fence;
 
-	if (!fence_is_signaled(fence)) {
-		fence_get(fence);
+	if (!dma_fence_is_signaled(fence)) {
+		dma_fence_get(fence);
 		(*i)++;
 	}
 }
@@ -200,7 +203,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 					 struct sync_file *b)
 {
 	struct sync_file *sync_file;
-	struct fence **fences, **nfences, **a_fences, **b_fences;
+	struct dma_fence **fences, **nfences, **a_fences, **b_fences;
 	int i, i_a, i_b, num_fences, a_num_fences, b_num_fences;
 
 	sync_file = sync_file_alloc();
@@ -226,8 +229,8 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 	 * and sync_file_create, this is a reasonable assumption.
 	 */
 	for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
-		struct fence *pt_a = a_fences[i_a];
-		struct fence *pt_b = b_fences[i_b];
+		struct dma_fence *pt_a = a_fences[i_a];
+		struct dma_fence *pt_b = b_fences[i_b];
 
 		if (pt_a->context < pt_b->context) {
 			add_fence(fences, &i, pt_a);
@@ -255,7 +258,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 		add_fence(fences, &i, b_fences[i_b]);
 
 	if (i == 0)
-		fences[i++] = fence_get(a_fences[0]);
+		fences[i++] = dma_fence_get(a_fences[0]);
 
 	if (num_fences > i) {
 		nfences = krealloc(fences, i * sizeof(*fences),
@@ -286,8 +289,8 @@ static void sync_file_free(struct kref *kref)
 						     kref);
 
 	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
-		fence_remove_callback(sync_file->fence, &sync_file->cb);
-	fence_put(sync_file->fence);
+		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
+	dma_fence_put(sync_file->fence);
 	kfree(sync_file);
 }
 
@@ -307,12 +310,12 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait)
 
 	if (!poll_does_not_wait(wait) &&
 	    !test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
-		if (fence_add_callback(sync_file->fence, &sync_file->cb,
-				       fence_check_cb_func) < 0)
+		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
+					   fence_check_cb_func) < 0)
 			wake_up_all(&sync_file->wq);
 	}
 
-	return fence_is_signaled(sync_file->fence) ? POLLIN : 0;
+	return dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
 }
 
 static long sync_file_ioctl_merge(struct sync_file *sync_file,
@@ -370,14 +373,14 @@ err_put_fd:
 	return err;
 }
 
-static void sync_fill_fence_info(struct fence *fence,
+static void sync_fill_fence_info(struct dma_fence *fence,
 				 struct sync_fence_info *info)
 {
 	strlcpy(info->obj_name, fence->ops->get_timeline_name(fence),
 		sizeof(info->obj_name));
 	strlcpy(info->driver_name, fence->ops->get_driver_name(fence),
 		sizeof(info->driver_name));
-	if (fence_is_signaled(fence))
+	if (dma_fence_is_signaled(fence))
 		info->status = fence->status >= 0 ? 1 : fence->status;
 	else
 		info->status = 0;
@@ -389,7 +392,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 {
 	struct sync_file_info info;
 	struct sync_fence_info *fence_info = NULL;
-	struct fence **fences;
+	struct dma_fence **fences;
 	__u32 size;
 	int num_fences, ret, i;
 
@@ -429,7 +432,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 
 no_fences:
 	strlcpy(info.name, sync_file->name, sizeof(info.name));
-	info.status = fence_is_signaled(sync_file->fence);
+	info.status = dma_fence_is_signaled(sync_file->fence);
 	info.num_fences = num_fences;
 
 	if (copy_to_user((void __user *)arg, &info, sizeof(info)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 039b57e4644c..283d05927d15 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -34,7 +34,7 @@
 #include <linux/kref.h>
 #include <linux/interval_tree.h>
 #include <linux/hashtable.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
@@ -378,7 +378,7 @@ struct amdgpu_fence_driver {
 	struct timer_list		fallback_timer;
 	unsigned			num_fences_mask;
 	spinlock_t			lock;
-	struct fence			**fences;
+	struct dma_fence		**fences;
 };
 
 /* some special values for the owner field */
@@ -399,7 +399,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 				   unsigned irq_type);
 void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
 void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
 void amdgpu_fence_process(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
@@ -427,7 +427,7 @@ struct amdgpu_bo_va_mapping {
 struct amdgpu_bo_va {
 	/* protected by bo being reserved */
 	struct list_head		bo_list;
-	struct fence		        *last_pt_update;
+	struct dma_fence	        *last_pt_update;
 	unsigned			ref_count;
 
 	/* protected by vm mutex and spinlock */
@@ -543,7 +543,7 @@ struct amdgpu_sa_bo {
 	struct amdgpu_sa_manager	*manager;
 	unsigned			soffset;
 	unsigned			eoffset;
-	struct fence		        *fence;
+	struct dma_fence	        *fence;
 };
 
 /*
@@ -566,19 +566,19 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
  */
 struct amdgpu_sync {
 	DECLARE_HASHTABLE(fences, 4);
-	struct fence	        *last_vm_update;
+	struct dma_fence        *last_vm_update;
 };
 
 void amdgpu_sync_create(struct amdgpu_sync *sync);
 int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
-		      struct fence *f);
+		      struct dma_fence *f);
 int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
 		     void *owner);
-struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 				     struct amdgpu_ring *ring);
-struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
 void amdgpu_sync_fini(void);
@@ -703,10 +703,10 @@ struct amdgpu_flip_work {
 	uint64_t			base;
 	struct drm_pending_vblank_event *event;
 	struct amdgpu_bo		*old_abo;
-	struct fence			*excl;
+	struct dma_fence		*excl;
 	unsigned			shared_count;
-	struct fence			**shared;
-	struct fence_cb			cb;
+	struct dma_fence		**shared;
+	struct dma_fence_cb		cb;
 	bool				async;
 };
 
@@ -742,7 +742,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
-		      struct fence **f);
+		      struct dma_fence **f);
 
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
@@ -844,7 +844,7 @@ struct amdgpu_vm {
 	/* contains the page directory */
 	struct amdgpu_bo	*page_directory;
 	unsigned		max_pde_used;
-	struct fence		*page_directory_fence;
+	struct dma_fence	*page_directory_fence;
 	uint64_t		last_eviction_counter;
 
 	/* array of page tables, one for each page directory entry */
@@ -865,14 +865,14 @@ struct amdgpu_vm {
 
 struct amdgpu_vm_id {
 	struct list_head	list;
-	struct fence		*first;
+	struct dma_fence	*first;
 	struct amdgpu_sync	active;
-	struct fence		*last_flush;
+	struct dma_fence	*last_flush;
 	atomic64_t		owner;
 
 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
-	struct fence		*flushed_updates;
+	struct dma_fence	*flushed_updates;
 
 	uint32_t                current_gpu_reset_count;
 
@@ -921,7 +921,7 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm);
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		      struct amdgpu_sync *sync, struct fence *fence,
+		      struct amdgpu_sync *sync, struct dma_fence *fence,
 		      struct amdgpu_job *job);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
@@ -957,7 +957,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
 struct amdgpu_ctx_ring {
 	uint64_t		sequence;
-	struct fence		**fences;
+	struct dma_fence	**fences;
 	struct amd_sched_entity	entity;
 };
 
@@ -966,7 +966,7 @@ struct amdgpu_ctx {
 	struct amdgpu_device    *adev;
 	unsigned		reset_counter;
 	spinlock_t		ring_lock;
-	struct fence            **fences;
+	struct dma_fence	**fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
 	bool preamble_presented;
 };
@@ -982,8 +982,8 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
 int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-			      struct fence *fence);
-struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+			      struct dma_fence *fence);
+struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 				   struct amdgpu_ring *ring, uint64_t seq);
 
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
@@ -1181,10 +1181,10 @@ struct amdgpu_gfx {
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
-		    struct fence *f);
+		    struct dma_fence *f);
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
-		       struct amdgpu_ib *ib, struct fence *last_vm_update,
-		       struct amdgpu_job *job, struct fence **f);
+		       struct amdgpu_ib *ib, struct dma_fence *last_vm_update,
+		       struct amdgpu_job *job, struct dma_fence **f);
 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
@@ -1225,7 +1225,7 @@ struct amdgpu_cs_parser {
 	struct amdgpu_bo_list		*bo_list;
 	struct amdgpu_bo_list_entry	vm_pd;
 	struct list_head		validated;
-	struct fence			*fence;
+	struct dma_fence		*fence;
 	uint64_t			bytes_moved_threshold;
 	uint64_t			bytes_moved;
 	struct amdgpu_bo_list_entry	*evictable;
@@ -1245,7 +1245,7 @@ struct amdgpu_job {
 	struct amdgpu_ring	*ring;
 	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
-	struct fence		*fence; /* the hw fence */
+	struct dma_fence	*fence; /* the hw fence */
 	uint32_t		preamble_status;
 	uint32_t		num_ibs;
 	void			*owner;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 345305235349..cc97eee93226 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
 {
 	unsigned long start_jiffies;
 	unsigned long end_jiffies;
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 	int i, r;
 
 	start_jiffies = jiffies;
@@ -43,17 +43,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
 				       false);
 		if (r)
 			goto exit_do_move;
-		r = fence_wait(fence, false);
+		r = dma_fence_wait(fence, false);
 		if (r)
 			goto exit_do_move;
-		fence_put(fence);
+		dma_fence_put(fence);
 	}
 	end_jiffies = jiffies;
 	r = jiffies_to_msecs(end_jiffies - start_jiffies);
 
 exit_do_move:
 	if (fence)
-		fence_put(fence);
+		dma_fence_put(fence);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b0f6e6957536..5d582265e929 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -719,7 +719,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
 	}
-	fence_put(parser->fence);
+	dma_fence_put(parser->fence);
 
 	if (parser->ctx)
 		amdgpu_ctx_put(parser->ctx);
@@ -756,7 +756,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
 
 	if (p->bo_list) {
 		for (i = 0; i < p->bo_list->num_entries; i++) {
-			struct fence *f;
+			struct dma_fence *f;
 
 			/* ignore duplicates */
 			bo = p->bo_list->array[i].robj;
@@ -956,7 +956,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 		for (j = 0; j < num_deps; ++j) {
 			struct amdgpu_ring *ring;
 			struct amdgpu_ctx *ctx;
-			struct fence *fence;
+			struct dma_fence *fence;
 
 			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
 					       deps[j].ip_instance,
@@ -978,7 +978,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 			} else if (fence) {
 				r = amdgpu_sync_fence(adev, &p->job->sync,
 						      fence);
-				fence_put(fence);
+				dma_fence_put(fence);
 				amdgpu_ctx_put(ctx);
 				if (r)
 					return r;
@@ -1008,7 +1008,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	job->owner = p->filp;
 	job->fence_ctx = entity->fence_context;
-	p->fence = fence_get(&job->base.s_fence->finished);
+	p->fence = dma_fence_get(&job->base.s_fence->finished);
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
 	amdgpu_job_free_resources(job);
@@ -1091,7 +1091,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
 	struct amdgpu_ring *ring = NULL;
 	struct amdgpu_ctx *ctx;
-	struct fence *fence;
+	struct dma_fence *fence;
 	long r;
 
 	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
@@ -1107,8 +1107,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(fence))
 		r = PTR_ERR(fence);
 	else if (fence) {
-		r = fence_wait_timeout(fence, true, timeout);
-		fence_put(fence);
+		r = dma_fence_wait_timeout(fence, true, timeout);
+		dma_fence_put(fence);
 	} else
 		r = 1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a5e2fcbef0f0..99bbc860322f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -35,7 +35,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 	kref_init(&ctx->refcount);
 	spin_lock_init(&ctx->ring_lock);
 	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
-			      sizeof(struct fence*), GFP_KERNEL);
+			      sizeof(struct dma_fence*), GFP_KERNEL);
 	if (!ctx->fences)
 		return -ENOMEM;
 
@@ -79,7 +79,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			fence_put(ctx->rings[i].fences[j]);
+			dma_fence_put(ctx->rings[i].fences[j]);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 
@@ -241,39 +241,39 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 }
 
 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-			      struct fence *fence)
+			      struct dma_fence *fence)
 {
 	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
 	uint64_t seq = cring->sequence;
 	unsigned idx = 0;
-	struct fence *other = NULL;
+	struct dma_fence *other = NULL;
 
 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = cring->fences[idx];
 	if (other) {
 		signed long r;
-		r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
 		if (r < 0)
 			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
 	}
 
-	fence_get(fence);
+	dma_fence_get(fence);
 
 	spin_lock(&ctx->ring_lock);
 	cring->fences[idx] = fence;
 	cring->sequence++;
 	spin_unlock(&ctx->ring_lock);
 
-	fence_put(other);
+	dma_fence_put(other);
 
 	return seq;
 }
 
-struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
-				   struct amdgpu_ring *ring, uint64_t seq)
+struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+				       struct amdgpu_ring *ring, uint64_t seq)
 {
 	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
-	struct fence *fence;
+	struct dma_fence *fence;
 
 	spin_lock(&ctx->ring_lock);
 
@@ -288,7 +288,7 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 		return NULL;
 	}
 
-	fence = fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
+	fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
 	spin_unlock(&ctx->ring_lock);
 
 	return fence;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b4f4a9239069..0262b43c8f0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1599,7 +1599,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->vm_manager.vm_pte_funcs = NULL;
 	adev->vm_manager.vm_pte_num_rings = 0;
 	adev->gart.gart_funcs = NULL;
-	adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
+	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 
 	adev->smc_rreg = &amdgpu_invalid_rreg;
 	adev->smc_wreg = &amdgpu_invalid_wreg;
@@ -2193,7 +2193,7 @@ bool amdgpu_need_backup(struct amdgpu_device *adev)
 static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
 					   struct amdgpu_ring *ring,
 					   struct amdgpu_bo *bo,
-					   struct fence **fence)
+					   struct dma_fence **fence)
 {
 	uint32_t domain;
 	int r;
@@ -2312,30 +2312,30 @@ retry:
 		if (need_full_reset && amdgpu_need_backup(adev)) {
 			struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 			struct amdgpu_bo *bo, *tmp;
-			struct fence *fence = NULL, *next = NULL;
+			struct dma_fence *fence = NULL, *next = NULL;
 
 			DRM_INFO("recover vram bo from shadow\n");
 			mutex_lock(&adev->shadow_list_lock);
 			list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
 				amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
 				if (fence) {
-					r = fence_wait(fence, false);
+					r = dma_fence_wait(fence, false);
 					if (r) {
 						WARN(r, "recovery from shadow isn't comleted\n");
 						break;
 					}
 				}
 
-				fence_put(fence);
+				dma_fence_put(fence);
 				fence = next;
 			}
 			mutex_unlock(&adev->shadow_list_lock);
 			if (fence) {
-				r = fence_wait(fence, false);
+				r = dma_fence_wait(fence, false);
 				if (r)
 					WARN(r, "recovery from shadow isn't comleted\n");
 			}
-			fence_put(fence);
+			dma_fence_put(fence);
 		}
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 083e2b429872..075c0d7db205 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,29 +35,29 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 
-static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb)
+static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb)
 {
 	struct amdgpu_flip_work *work =
 		container_of(cb, struct amdgpu_flip_work, cb);
 
-	fence_put(f);
+	dma_fence_put(f);
 	schedule_work(&work->flip_work.work);
 }
 
 static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
-				     struct fence **f)
+				     struct dma_fence **f)
 {
-	struct fence *fence= *f;
+	struct dma_fence *fence= *f;
 
 	if (fence == NULL)
 		return false;
 
 	*f = NULL;
 
-	if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
+	if (!dma_fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
 		return true;
 
-	fence_put(fence);
+	dma_fence_put(fence);
 	return false;
 }
 
@@ -244,9 +244,9 @@ unreserve:
 
 cleanup:
 	amdgpu_bo_unref(&work->old_abo);
-	fence_put(work->excl);
+	dma_fence_put(work->excl);
 	for (i = 0; i < work->shared_count; ++i)
-		fence_put(work->shared[i]);
+		dma_fence_put(work->shared[i]);
 	kfree(work->shared);
 	kfree(work);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3a2e42f4b897..57552c79ec58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -48,7 +48,7 @@
  */
 
 struct amdgpu_fence {
-	struct fence base;
+	struct dma_fence base;
 
 	/* RB, DMA, etc. */
 	struct amdgpu_ring		*ring;
@@ -73,8 +73,8 @@ void amdgpu_fence_slab_fini(void)
 /*
  * Cast helper
  */
-static const struct fence_ops amdgpu_fence_ops;
-static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
+static const struct dma_fence_ops amdgpu_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
 {
 	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
 
@@ -130,11 +130,11 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_fence *fence;
-	struct fence *old, **ptr;
+	struct dma_fence *old, **ptr;
 	uint32_t seq;
 
 	fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
@@ -143,10 +143,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
 
 	seq = ++ring->fence_drv.sync_seq;
 	fence->ring = ring;
-	fence_init(&fence->base, &amdgpu_fence_ops,
-		   &ring->fence_drv.lock,
-		   adev->fence_context + ring->idx,
-		   seq);
+	dma_fence_init(&fence->base, &amdgpu_fence_ops,
+		       &ring->fence_drv.lock,
+		       adev->fence_context + ring->idx,
+		       seq);
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 			       seq, AMDGPU_FENCE_FLAG_INT);
 
@@ -155,12 +155,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
 	 * emitting the fence would mess up the hardware ring buffer.
 	 */
 	old = rcu_dereference_protected(*ptr, 1);
-	if (old && !fence_is_signaled(old)) {
+	if (old && !dma_fence_is_signaled(old)) {
 		DRM_INFO("rcu slot is busy\n");
-		fence_wait(old, false);
+		dma_fence_wait(old, false);
 	}
 
-	rcu_assign_pointer(*ptr, fence_get(&fence->base));
+	rcu_assign_pointer(*ptr, dma_fence_get(&fence->base));
 
 	*f = &fence->base;
 
@@ -211,7 +211,7 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 	seq &= drv->num_fences_mask;
 
 	do {
-		struct fence *fence, **ptr;
+		struct dma_fence *fence, **ptr;
 
 		++last_seq;
 		last_seq &= drv->num_fences_mask;
@@ -224,13 +224,13 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 		if (!fence)
 			continue;
 
-		r = fence_signal(fence);
+		r = dma_fence_signal(fence);
 		if (!r)
-			FENCE_TRACE(fence, "signaled from irq context\n");
+			DMA_FENCE_TRACE(fence, "signaled from irq context\n");
 		else
 			BUG();
 
-		fence_put(fence);
+		dma_fence_put(fence);
 	} while (last_seq != seq);
 }
 
@@ -260,7 +260,7 @@ static void amdgpu_fence_fallback(unsigned long arg)
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 {
 	uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq);
-	struct fence *fence, **ptr;
+	struct dma_fence *fence, **ptr;
 	int r;
 
 	if (!seq)
@@ -269,14 +269,14 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	rcu_read_lock();
 	fence = rcu_dereference(*ptr);
-	if (!fence || !fence_get_rcu(fence)) {
+	if (!fence || !dma_fence_get_rcu(fence)) {
 		rcu_read_unlock();
 		return 0;
 	}
 	rcu_read_unlock();
 
-	r = fence_wait(fence, false);
-	fence_put(fence);
+	r = dma_fence_wait(fence, false);
+	dma_fence_put(fence);
 	return r;
 }
 
@@ -452,7 +452,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 		amd_sched_fini(&ring->sched);
 		del_timer_sync(&ring->fence_drv.fallback_timer);
 		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
-			fence_put(ring->fence_drv.fences[j]);
+			dma_fence_put(ring->fence_drv.fences[j]);
 		kfree(ring->fence_drv.fences);
 		ring->fence_drv.fences = NULL;
 		ring->fence_drv.initialized = false;
@@ -541,12 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
  * Common fence implementation
  */
 
-static const char *amdgpu_fence_get_driver_name(struct fence *fence)
+static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "amdgpu";
 }
 
-static const char *amdgpu_fence_get_timeline_name(struct fence *f)
+static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
 {
 	struct amdgpu_fence *fence = to_amdgpu_fence(f);
 	return (const char *)fence->ring->name;
@@ -560,7 +560,7 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f)
  * to fence_queue that checks if this fence is signaled, and if so it
  * signals the fence and removes itself.
  */
-static bool amdgpu_fence_enable_signaling(struct fence *f)
+static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
 {
 	struct amdgpu_fence *fence = to_amdgpu_fence(f);
 	struct amdgpu_ring *ring = fence->ring;
@@ -568,7 +568,7 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
 	if (!timer_pending(&ring->fence_drv.fallback_timer))
 		amdgpu_fence_schedule_fallback(ring);
 
-	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
+	DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
 
 	return true;
 }
@@ -582,7 +582,7 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
  */
 static void amdgpu_fence_free(struct rcu_head *rcu)
 {
-	struct fence *f = container_of(rcu, struct fence, rcu);
+	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
 	struct amdgpu_fence *fence = to_amdgpu_fence(f);
 	kmem_cache_free(amdgpu_fence_slab, fence);
 }
@@ -595,16 +595,16 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
  * This function is called when the reference count becomes zero.
  * It just RCU schedules freeing up the fence.
  */
-static void amdgpu_fence_release(struct fence *f)
+static void amdgpu_fence_release(struct dma_fence *f)
 {
 	call_rcu(&f->rcu, amdgpu_fence_free);
 }
 
-static const struct fence_ops amdgpu_fence_ops = {
+static const struct dma_fence_ops amdgpu_fence_ops = {
 	.get_driver_name = amdgpu_fence_get_driver_name,
 	.get_timeline_name = amdgpu_fence_get_timeline_name,
 	.enable_signaling = amdgpu_fence_enable_signaling,
-	.wait = fence_default_wait,
+	.wait = dma_fence_default_wait,
 	.release = amdgpu_fence_release,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 6a6c86c9c169..c3672dfcfd6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -89,7 +89,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  * Free an IB (all asics).
  */
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
-		    struct fence *f)
+		    struct dma_fence *f)
 {
 	amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
 }
@@ -116,8 +116,8 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
  * to SI there was just a DE IB.
  */
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
-		       struct amdgpu_ib *ibs, struct fence *last_vm_update,
-		       struct amdgpu_job *job, struct fence **f)
+		       struct amdgpu_ib *ibs, struct dma_fence *last_vm_update,
+		       struct amdgpu_job *job, struct dma_fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 8c5807994073..a0de6286c453 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -81,7 +81,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 
 void amdgpu_job_free_resources(struct amdgpu_job *job)
 {
-	struct fence *f;
+	struct dma_fence *f;
 	unsigned i;
 
 	/* use sched fence if available */
@@ -95,7 +95,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
 {
 	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
-	fence_put(job->fence);
+	dma_fence_put(job->fence);
 	amdgpu_sync_free(&job->sync);
 	kfree(job);
 }
@@ -104,14 +104,14 @@ void amdgpu_job_free(struct amdgpu_job *job)
 {
 	amdgpu_job_free_resources(job);
 
-	fence_put(job->fence);
+	dma_fence_put(job->fence);
 	amdgpu_sync_free(&job->sync);
 	kfree(job);
 }
 
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
-		      struct fence **f)
+		      struct dma_fence **f)
 {
 	int r;
 	job->ring = ring;
@@ -125,19 +125,19 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 
 	job->owner = owner;
 	job->fence_ctx = entity->fence_context;
-	*f = fence_get(&job->base.s_fence->finished);
+	*f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
 	amd_sched_entity_push_job(&job->base);
 
 	return 0;
 }
 
-static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
+static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 {
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
 	struct amdgpu_vm *vm = job->vm;
 
-	struct fence *fence = amdgpu_sync_get_fence(&job->sync);
+	struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync);
 
 	if (fence == NULL && vm && !job->vm_id) {
 		struct amdgpu_ring *ring = job->ring;
@@ -155,9 +155,9 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 	return fence;
 }
 
-static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
+static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 {
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 	struct amdgpu_job *job;
 	int r;
 
@@ -176,8 +176,8 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);
 
 	/* if gpu reset, hw fence will be replaced here */
-	fence_put(job->fence);
-	job->fence = fence_get(fence);
+	dma_fence_put(job->fence);
+	job->fence = dma_fence_get(fence);
 	amdgpu_job_free_resources(job);
 	return fence;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index aa074fac0c7f..55e142a5ff5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -383,7 +383,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 
 	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
-		struct fence *fence;
+		struct dma_fence *fence;
 
 		if (adev->mman.buffer_funcs_ring == NULL ||
 		   !adev->mman.buffer_funcs_ring->ready) {
@@ -403,9 +403,9 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 		amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
 		amdgpu_bo_fence(bo, fence, false);
 		amdgpu_bo_unreserve(bo);
-		fence_put(bo->tbo.moving);
-		bo->tbo.moving = fence_get(fence);
-		fence_put(fence);
+		dma_fence_put(bo->tbo.moving);
+		bo->tbo.moving = dma_fence_get(fence);
+		dma_fence_put(fence);
 	}
 	*bo_ptr = bo;
 
@@ -491,7 +491,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring,
 			       struct amdgpu_bo *bo,
 			       struct reservation_object *resv,
-			       struct fence **fence,
+			       struct dma_fence **fence,
 			       bool direct)
 
 {
@@ -523,7 +523,7 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_bo *bo,
 				  struct reservation_object *resv,
-				  struct fence **fence,
+				  struct dma_fence **fence,
 				  bool direct)
 
 {
@@ -926,7 +926,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
  * @shared: true if fence should be added shared
  *
  */
-void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
+void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		     bool shared)
 {
 	struct reservation_object *resv = bo->tbo.resv;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 8255034d73eb..3e785ed3cb4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -156,19 +156,19 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 				  struct ttm_mem_reg *new_mem);
 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
-void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
+void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		     bool shared);
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
 int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring,
 			       struct amdgpu_bo *bo,
 			       struct reservation_object *resv,
-			       struct fence **fence, bool direct);
+			       struct dma_fence **fence, bool direct);
 int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_bo *bo,
 				  struct reservation_object *resv,
-				  struct fence **fence,
+				  struct dma_fence **fence,
 				  bool direct);
 
 
@@ -200,7 +200,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     unsigned size, unsigned align);
 void amdgpu_sa_bo_free(struct amdgpu_device *adev,
 			      struct amdgpu_sa_bo **sa_bo,
-			      struct fence *fence);
+			      struct dma_fence *fence);
 #if defined(CONFIG_DEBUG_FS)
 void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
 					 struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index d8af37a845f4..fd26c4b8d793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -147,7 +147,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
 	}
 	list_del_init(&sa_bo->olist);
 	list_del_init(&sa_bo->flist);
-	fence_put(sa_bo->fence);
+	dma_fence_put(sa_bo->fence);
 	kfree(sa_bo);
 }
 
@@ -161,7 +161,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
 	sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
 	list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
 		if (sa_bo->fence == NULL ||
-		    !fence_is_signaled(sa_bo->fence)) {
+		    !dma_fence_is_signaled(sa_bo->fence)) {
 			return;
 		}
 		amdgpu_sa_bo_remove_locked(sa_bo);
@@ -244,7 +244,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
 }
 
 static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
-				   struct fence **fences,
+				   struct dma_fence **fences,
 				   unsigned *tries)
 {
 	struct amdgpu_sa_bo *best_bo = NULL;
@@ -272,7 +272,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
 		sa_bo = list_first_entry(&sa_manager->flist[i],
 					 struct amdgpu_sa_bo, flist);
 
-		if (!fence_is_signaled(sa_bo->fence)) {
+		if (!dma_fence_is_signaled(sa_bo->fence)) {
 			fences[i] = sa_bo->fence;
 			continue;
 		}
@@ -314,7 +314,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     struct amdgpu_sa_bo **sa_bo,
 		     unsigned size, unsigned align)
 {
-	struct fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
+	struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
 	unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
 	unsigned count;
 	int i, r;
@@ -356,14 +356,14 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 
 		for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 			if (fences[i])
-				fences[count++] = fence_get(fences[i]);
+				fences[count++] = dma_fence_get(fences[i]);
 
 		if (count) {
 			spin_unlock(&sa_manager->wq.lock);
-			t = fence_wait_any_timeout(fences, count, false,
-						   MAX_SCHEDULE_TIMEOUT);
+			t = dma_fence_wait_any_timeout(fences, count, false,
+						       MAX_SCHEDULE_TIMEOUT);
 			for (i = 0; i < count; ++i)
-				fence_put(fences[i]);
+				dma_fence_put(fences[i]);
 
 			r = (t > 0) ? 0 : t;
 			spin_lock(&sa_manager->wq.lock);
@@ -384,7 +384,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 }
 
 void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
-		       struct fence *fence)
+		       struct dma_fence *fence)
 {
 	struct amdgpu_sa_manager *sa_manager;
 
@@ -394,10 +394,10 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
 
 	sa_manager = (*sa_bo)->manager;
 	spin_lock(&sa_manager->wq.lock);
-	if (fence && !fence_is_signaled(fence)) {
+	if (fence && !dma_fence_is_signaled(fence)) {
 		uint32_t idx;
 
-		(*sa_bo)->fence = fence_get(fence);
+		(*sa_bo)->fence = dma_fence_get(fence);
 		idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
 		list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
 	} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 5c8d3022fb87..ed814e6d0207 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -34,7 +34,7 @@
 
 struct amdgpu_sync_entry {
 	struct hlist_node	node;
-	struct fence		*fence;
+	struct dma_fence	*fence;
 };
 
 static struct kmem_cache *amdgpu_sync_slab;
@@ -60,7 +60,8 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
  *
  * Test if the fence was issued by us.
  */
-static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
+static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
+				 struct dma_fence *f)
 {
 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
 
@@ -81,7 +82,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
  *
  * Extract who originally created the fence.
  */
-static void *amdgpu_sync_get_owner(struct fence *f)
+static void *amdgpu_sync_get_owner(struct dma_fence *f)
 {
 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
 
@@ -99,13 +100,14 @@ static void *amdgpu_sync_get_owner(struct fence *f)
  *
  * Either keep the existing fence or the new one, depending which one is later.
  */
-static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
+static void amdgpu_sync_keep_later(struct dma_fence **keep,
+				   struct dma_fence *fence)
 {
-	if (*keep && fence_is_later(*keep, fence))
+	if (*keep && dma_fence_is_later(*keep, fence))
 		return;
 
-	fence_put(*keep);
-	*keep = fence_get(fence);
+	dma_fence_put(*keep);
+	*keep = dma_fence_get(fence);
 }
 
 /**
@@ -117,7 +119,7 @@ static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
  * Tries to add the fence to an existing hash entry. Returns true when an entry
  * was found, false otherwise.
  */
-static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f)
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
 {
 	struct amdgpu_sync_entry *e;
 
@@ -139,7 +141,7 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f)
  *
  */
 int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
-		      struct fence *f)
+		      struct dma_fence *f)
 {
 	struct amdgpu_sync_entry *e;
 
@@ -158,7 +160,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		return -ENOMEM;
 
 	hash_add(sync->fences, &e->node, f->context);
-	e->fence = fence_get(f);
+	e->fence = dma_fence_get(f);
 	return 0;
 }
 
@@ -177,7 +179,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     void *owner)
 {
 	struct reservation_object_list *flist;
-	struct fence *f;
+	struct dma_fence *f;
 	void *fence_owner;
 	unsigned i;
 	int r = 0;
@@ -231,15 +233,15 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
  * Returns the next fence not signaled yet without removing it from the sync
  * object.
  */
-struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
-				     struct amdgpu_ring *ring)
+struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+					 struct amdgpu_ring *ring)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
 	int i;
 
 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
-		struct fence *f = e->fence;
+		struct dma_fence *f = e->fence;
 		struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
 
 		if (ring && s_fence) {
@@ -247,16 +249,16 @@ struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 			 * when they are scheduled.
 			 */
 			if (s_fence->sched == &ring->sched) {
-				if (fence_is_signaled(&s_fence->scheduled))
+				if (dma_fence_is_signaled(&s_fence->scheduled))
 					continue;
 
 				return &s_fence->scheduled;
 			}
 		}
 
-		if (fence_is_signaled(f)) {
+		if (dma_fence_is_signaled(f)) {
 			hash_del(&e->node);
-			fence_put(f);
+			dma_fence_put(f);
 			kmem_cache_free(amdgpu_sync_slab, e);
 			continue;
 		}
@@ -274,11 +276,11 @@ struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
  *
  * Get and removes the next fence from the sync object not signaled yet.
  */
-struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
-	struct fence *f;
+	struct dma_fence *f;
 	int i;
 
 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
@@ -288,10 +290,10 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 		hash_del(&e->node);
 		kmem_cache_free(amdgpu_sync_slab, e);
 
-		if (!fence_is_signaled(f))
+		if (!dma_fence_is_signaled(f))
 			return f;
 
-		fence_put(f);
+		dma_fence_put(f);
 	}
 	return NULL;
 }
@@ -311,11 +313,11 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
 
 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
 		hash_del(&e->node);
-		fence_put(e->fence);
+		dma_fence_put(e->fence);
 		kmem_cache_free(amdgpu_sync_slab, e);
 	}
 
-	fence_put(sync->last_vm_update);
+	dma_fence_put(sync->last_vm_update);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index b827c75e95de..e05a24325eeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -78,7 +78,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		void *gtt_map, *vram_map;
 		void **gtt_start, **gtt_end;
 		void **vram_start, **vram_end;
-		struct fence *fence = NULL;
+		struct dma_fence *fence = NULL;
 
 		r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
@@ -118,13 +118,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 			goto out_lclean_unpin;
 		}
 
-		r = fence_wait(fence, false);
+		r = dma_fence_wait(fence, false);
 		if (r) {
 			DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
 			goto out_lclean_unpin;
 		}
 
-		fence_put(fence);
+		dma_fence_put(fence);
 
 		r = amdgpu_bo_kmap(vram_obj, &vram_map);
 		if (r) {
@@ -163,13 +163,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 			goto out_lclean_unpin;
 		}
 
-		r = fence_wait(fence, false);
+		r = dma_fence_wait(fence, false);
 		if (r) {
 			DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
 			goto out_lclean_unpin;
 		}
 
-		fence_put(fence);
+		dma_fence_put(fence);
 
 		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
 		if (r) {
@@ -216,7 +216,7 @@ out_lclean:
 			amdgpu_bo_unref(&gtt_obj[i]);
 		}
 		if (fence)
-			fence_put(fence);
+			dma_fence_put(fence);
 		break;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 067e5e683bb3..bb964a8ff938 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -104,7 +104,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			     __field(struct amdgpu_device *, adev)
 			     __field(struct amd_sched_job *, sched_job)
 			     __field(struct amdgpu_ib *, ib)
-			     __field(struct fence *, fence)
+			     __field(struct dma_fence *, fence)
 			     __field(char *, ring_name)
 			     __field(u32, num_ibs)
 			     ),
@@ -129,7 +129,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			     __field(struct amdgpu_device *, adev)
 			     __field(struct amd_sched_job *, sched_job)
 			     __field(struct amdgpu_ib *, ib)
-			     __field(struct fence *, fence)
+			     __field(struct dma_fence *, fence)
 			     __field(char *, ring_name)
 			     __field(u32, num_ibs)
 			     ),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dcaf691f56b5..a743aeabc767 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -268,7 +268,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	struct amdgpu_device *adev;
 	struct amdgpu_ring *ring;
 	uint64_t old_start, new_start;
-	struct fence *fence;
+	struct dma_fence *fence;
 	int r;
 
 	adev = amdgpu_get_adev(bo->bdev);
@@ -316,7 +316,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 		return r;
 
 	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
-	fence_put(fence);
+	dma_fence_put(fence);
 	return r;
 }
 
@@ -1247,7 +1247,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 		       uint64_t dst_offset,
 		       uint32_t byte_count,
 		       struct reservation_object *resv,
-		       struct fence **fence, bool direct_submit)
+		       struct dma_fence **fence, bool direct_submit)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_job *job;
@@ -1294,7 +1294,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 	if (direct_submit) {
 		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
 				       NULL, NULL, fence);
-		job->fence = fence_get(*fence);
+		job->fence = dma_fence_get(*fence);
 		if (r)
 			DRM_ERROR("Error scheduling IBs (%d)\n", r);
 		amdgpu_job_free(job);
@@ -1315,7 +1315,7 @@ error_free:
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		uint32_t src_data,
 		struct reservation_object *resv,
-		struct fence **fence)
+		struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = bo->adev;
 	struct amdgpu_job *job;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 9812c805326c..3f293e189378 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -77,11 +77,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 		       uint64_t dst_offset,
 		       uint32_t byte_count,
 		       struct reservation_object *resv,
-		       struct fence **fence, bool direct_submit);
+		       struct dma_fence **fence, bool direct_submit);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct reservation_object *resv,
-			struct fence **fence);
+			struct dma_fence **fence);
 
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
 bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e3281cacc586..0f6575e7ef8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -333,7 +333,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 	for (i = 0; i < adev->uvd.max_handles; ++i) {
 		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
 		if (handle != 0 && adev->uvd.filp[i] == filp) {
-			struct fence *fence;
+			struct dma_fence *fence;
 
 			r = amdgpu_uvd_get_destroy_msg(ring, handle,
 						       false, &fence);
@@ -342,8 +342,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 				continue;
 			}
 
-			fence_wait(fence, false);
-			fence_put(fence);
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
 
 			adev->uvd.filp[i] = NULL;
 			atomic_set(&adev->uvd.handles[i], 0);
@@ -909,14 +909,14 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 }
 
 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
-			       bool direct, struct fence **fence)
+			       bool direct, struct dma_fence **fence)
 {
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct list_head head;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	struct amdgpu_device *adev = ring->adev;
 	uint64_t addr;
 	int i, r;
@@ -960,7 +960,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 
 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = fence_get(f);
+		job->fence = dma_fence_get(f);
 		if (r)
 			goto err_free;
 
@@ -975,9 +975,9 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	ttm_eu_fence_buffer_objects(&ticket, &head, f);
 
 	if (fence)
-		*fence = fence_get(f);
+		*fence = dma_fence_get(f);
 	amdgpu_bo_unref(&bo);
-	fence_put(f);
+	dma_fence_put(f);
 
 	return 0;
 
@@ -993,7 +993,7 @@ err:
    crash the vcpu so just try to emmit a dummy create/destroy msg to
    avoid this */
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct fence **fence)
+			      struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_bo *bo;
@@ -1042,7 +1042,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 }
 
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct fence **fence)
+			       bool direct, struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_bo *bo;
@@ -1128,7 +1128,7 @@ void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
  */
 int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
-	struct fence *fence;
+	struct dma_fence *fence;
 	long r;
 
 	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
@@ -1143,7 +1143,7 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		goto error;
 	}
 
-	r = fence_wait_timeout(fence, false, timeout);
+	r = dma_fence_wait_timeout(fence, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out.\n");
 		r = -ETIMEDOUT;
@@ -1154,7 +1154,7 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		r = 0;
 	}
 
-	fence_put(fence);
+	dma_fence_put(fence);
 
 error:
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index c850009602d1..6249ba1bde2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -29,9 +29,9 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
 int amdgpu_uvd_suspend(struct amdgpu_device *adev);
 int amdgpu_uvd_resume(struct amdgpu_device *adev);
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct fence **fence);
+			      struct dma_fence **fence);
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct fence **fence);
+			       bool direct, struct dma_fence **fence);
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
 			     struct drm_file *filp);
 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 7fe8fd884f06..f0f8afb85585 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -395,12 +395,12 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
  * Open up a stream for HW test
  */
 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct fence **fence)
+			      struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 1024;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	uint64_t dummy;
 	int i, r;
 
@@ -450,14 +450,14 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 		ib->ptr[i] = 0x0;
 
 	r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-	job->fence = fence_get(f);
+	job->fence = dma_fence_get(f);
 	if (r)
 		goto err;
 
 	amdgpu_job_free(job);
 	if (fence)
-		*fence = fence_get(f);
-	fence_put(f);
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
 	return 0;
 
 err:
@@ -476,12 +476,12 @@ err:
  * Close up a stream for HW test or if userspace failed to do so
  */
 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct fence **fence)
+			       bool direct, struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 1024;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	int i, r;
 
 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -513,7 +513,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 
 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = fence_get(f);
+		job->fence = dma_fence_get(f);
 		if (r)
 			goto err;
 
@@ -526,8 +526,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	}
 
 	if (fence)
-		*fence = fence_get(f);
-	fence_put(f);
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
 	return 0;
 
 err:
@@ -883,7 +883,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
  */
 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 	long r;
 
 	/* skip vce ring1/2 ib test for now, since it's not reliable */
@@ -902,7 +902,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		goto error;
 	}
 
-	r = fence_wait_timeout(fence, false, timeout);
+	r = dma_fence_wait_timeout(fence, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out.\n");
 		r = -ETIMEDOUT;
@@ -913,6 +913,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		r = 0;
 	}
 error:
-	fence_put(fence);
+	dma_fence_put(fence);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 12729d2852df..566c29ddeeb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -29,9 +29,9 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
 int amdgpu_vce_suspend(struct amdgpu_device *adev);
 int amdgpu_vce_resume(struct amdgpu_device *adev);
 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct fence **fence);
+			      struct dma_fence **fence);
 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct fence **fence);
+			       bool direct, struct dma_fence **fence);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 06f24322e7c3..22cabb5456e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -25,7 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <linux/fence-array.h>
+#include <linux/dma-fence-array.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -194,14 +194,14 @@ static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
  * Allocate an id for the vm, adding fences to the sync obj as necessary.
  */
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		      struct amdgpu_sync *sync, struct fence *fence,
+		      struct amdgpu_sync *sync, struct dma_fence *fence,
 		      struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
 	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct fence *updates = sync->last_vm_update;
+	struct dma_fence *updates = sync->last_vm_update;
 	struct amdgpu_vm_id *id, *idle;
-	struct fence **fences;
+	struct dma_fence **fences;
 	unsigned i;
 	int r = 0;
 
@@ -225,17 +225,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (&idle->list == &adev->vm_manager.ids_lru) {
 		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
 		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
-		struct fence_array *array;
+		struct dma_fence_array *array;
 		unsigned j;
 
 		for (j = 0; j < i; ++j)
-			fence_get(fences[j]);
+			dma_fence_get(fences[j]);
 
-		array = fence_array_create(i, fences, fence_context,
+		array = dma_fence_array_create(i, fences, fence_context,
 					   seqno, true);
 		if (!array) {
 			for (j = 0; j < i; ++j)
-				fence_put(fences[j]);
+				dma_fence_put(fences[j]);
 			kfree(fences);
 			r = -ENOMEM;
 			goto error;
@@ -243,7 +243,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 
 		r = amdgpu_sync_fence(ring->adev, sync, &array->base);
-		fence_put(&array->base);
+		dma_fence_put(&array->base);
 		if (r)
 			goto error;
 
@@ -257,7 +257,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	/* Check if we can use a VMID already assigned to this VM */
 	i = ring->idx;
 	do {
-		struct fence *flushed;
+		struct dma_fence *flushed;
 
 		id = vm->ids[i++];
 		if (i == AMDGPU_MAX_RINGS)
@@ -279,12 +279,12 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			continue;
 
 		if (id->last_flush->context != fence_context &&
-		    !fence_is_signaled(id->last_flush))
+		    !dma_fence_is_signaled(id->last_flush))
 			continue;
 
 		flushed  = id->flushed_updates;
 		if (updates &&
-		    (!flushed || fence_is_later(updates, flushed)))
+		    (!flushed || dma_fence_is_later(updates, flushed)))
 			continue;
 
 		/* Good we can use this VMID. Remember this submission as
@@ -315,14 +315,14 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (r)
 		goto error;
 
-	fence_put(id->first);
-	id->first = fence_get(fence);
+	dma_fence_put(id->first);
+	id->first = dma_fence_get(fence);
 
-	fence_put(id->last_flush);
+	dma_fence_put(id->last_flush);
 	id->last_flush = NULL;
 
-	fence_put(id->flushed_updates);
-	id->flushed_updates = fence_get(updates);
+	dma_fence_put(id->flushed_updates);
+	id->flushed_updates = dma_fence_get(updates);
 
 	id->pd_gpu_addr = job->vm_pd_addr;
 	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
@@ -393,7 +393,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 
 	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
 	    amdgpu_vm_is_gpu_reset(adev, id))) {
-		struct fence *fence;
+		struct dma_fence *fence;
 
 		trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id);
 		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
@@ -403,7 +403,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 			return r;
 
 		mutex_lock(&adev->vm_manager.lock);
-		fence_put(id->last_flush);
+		dma_fence_put(id->last_flush);
 		id->last_flush = fence;
 		mutex_unlock(&adev->vm_manager.lock);
 	}
@@ -537,7 +537,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 			      struct amdgpu_bo *bo)
 {
 	struct amdgpu_ring *ring;
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 	struct amdgpu_job *job;
 	struct amdgpu_pte_update_params params;
 	unsigned entries;
@@ -578,7 +578,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 		goto error_free;
 
 	amdgpu_bo_fence(bo, fence, true);
-	fence_put(fence);
+	dma_fence_put(fence);
 	return 0;
 
 error_free:
@@ -625,7 +625,7 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev,
 	unsigned count = 0, pt_idx, ndw;
 	struct amdgpu_job *job;
 	struct amdgpu_pte_update_params params;
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 
 	int r;
 
@@ -714,9 +714,9 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev,
 			goto error_free;
 
 		amdgpu_bo_fence(pd, fence, true);
-		fence_put(vm->page_directory_fence);
-		vm->page_directory_fence = fence_get(fence);
-		fence_put(fence);
+		dma_fence_put(vm->page_directory_fence);
+		vm->page_directory_fence = dma_fence_get(fence);
+		dma_fence_put(fence);
 
 	} else {
 		amdgpu_job_free(job);
@@ -929,20 +929,20 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-				       struct fence *exclusive,
+				       struct dma_fence *exclusive,
 				       uint64_t src,
 				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
 				       uint64_t start, uint64_t last,
 				       uint32_t flags, uint64_t addr,
-				       struct fence **fence)
+				       struct dma_fence **fence)
 {
 	struct amdgpu_ring *ring;
 	void *owner = AMDGPU_FENCE_OWNER_VM;
 	unsigned nptes, ncmds, ndw;
 	struct amdgpu_job *job;
 	struct amdgpu_pte_update_params params;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	int r;
 
 	memset(&params, 0, sizeof(params));
@@ -1045,10 +1045,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	amdgpu_bo_fence(vm->page_directory, f, true);
 	if (fence) {
-		fence_put(*fence);
-		*fence = fence_get(f);
+		dma_fence_put(*fence);
+		*fence = dma_fence_get(f);
 	}
-	fence_put(f);
+	dma_fence_put(f);
 	return 0;
 
 error_free:
@@ -1074,13 +1074,13 @@ error_free:
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
-				      struct fence *exclusive,
+				      struct dma_fence *exclusive,
 				      uint32_t gtt_flags,
 				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
 				      struct amdgpu_bo_va_mapping *mapping,
 				      uint32_t flags, uint64_t addr,
-				      struct fence **fence)
+				      struct dma_fence **fence)
 {
 	const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
 
@@ -1147,7 +1147,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	dma_addr_t *pages_addr = NULL;
 	uint32_t gtt_flags, flags;
 	struct ttm_mem_reg *mem;
-	struct fence *exclusive;
+	struct dma_fence *exclusive;
 	uint64_t addr;
 	int r;
 
@@ -1547,7 +1547,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		kfree(mapping);
 	}
 
-	fence_put(bo_va->last_pt_update);
+	dma_fence_put(bo_va->last_pt_update);
 	kfree(bo_va);
 }
 
@@ -1709,7 +1709,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	amdgpu_bo_unref(&vm->page_directory->shadow);
 	amdgpu_bo_unref(&vm->page_directory);
-	fence_put(vm->page_directory_fence);
+	dma_fence_put(vm->page_directory_fence);
 }
 
 /**
@@ -1733,7 +1733,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 			      &adev->vm_manager.ids_lru);
 	}
 
-	adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
+	adev->vm_manager.fence_context =
+		dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		adev->vm_manager.seqno[i] = 0;
 
@@ -1755,8 +1756,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
 		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
 
-		fence_put(adev->vm_manager.ids[i].first);
+		dma_fence_put(adev->vm_manager.ids[i].first);
 		amdgpu_sync_free(&adev->vm_manager.ids[i].active);
-		fence_put(id->flushed_updates);
+		dma_fence_put(id->flushed_updates);
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index cb952acc7133..321b9d5a4e6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -622,7 +622,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	unsigned index;
 	u32 tmp = 0;
 	u64 gpu_addr;
@@ -655,7 +655,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err1;
 
-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -675,7 +675,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 err1:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 40abb6b81c09..7dc11a19e49d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1522,7 +1522,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	uint32_t scratch;
 	uint32_t tmp = 0;
 	long r;
@@ -1548,7 +1548,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err2;
 
-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -1569,7 +1569,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 err2:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err1:
 	amdgpu_gfx_scratch_free(adev, scratch);
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 71116da9e782..3865ffe7de55 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2286,7 +2286,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	uint32_t scratch;
 	uint32_t tmp = 0;
 	long r;
@@ -2312,7 +2312,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err2;
 
-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -2333,7 +2333,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 err2:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err1:
 	amdgpu_gfx_scratch_free(adev, scratch);
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ee6a48a09214..a9dd18847c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -798,7 +798,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	uint32_t scratch;
 	uint32_t tmp = 0;
 	long r;
@@ -824,7 +824,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err2;
 
-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out.\n");
 		r = -ETIMEDOUT;
@@ -844,7 +844,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	}
 err2:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err1:
 	amdgpu_gfx_scratch_free(adev, scratch);
 	return r;
@@ -1575,7 +1575,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	int r, i;
 	u32 tmp;
 	unsigned total_size, vgpr_offset, sgpr_offset;
@@ -1708,7 +1708,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 	}
 
 	/* wait for the GPU to finish processing the IB */
-	r = fence_wait(f, false);
+	r = dma_fence_wait(f, false);
 	if (r) {
 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
 		goto fail;
@@ -1729,7 +1729,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 fail:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 565dab3c7218..7edf6e8c63dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -668,7 +668,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	unsigned index;
 	u32 tmp = 0;
 	u64 gpu_addr;
@@ -705,7 +705,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err1;
 
-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -725,7 +725,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 err1:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index a9d10941fb53..1932a67c62ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -871,7 +871,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	unsigned index;
 	u32 tmp = 0;
 	u64 gpu_addr;
@@ -908,7 +908,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err1;
 
-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -927,7 +927,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	}
 err1:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index de358193a8f9..b4cf4e25bf91 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -274,7 +274,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	unsigned index;
 	u32 tmp = 0;
 	u64 gpu_addr;
@@ -305,7 +305,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err1;
 
-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -325,7 +325,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 err1:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index b961a1c6caf3..dbd4fd3a810b 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -17,7 +17,7 @@ TRACE_EVENT(amd_sched_job,
 	    TP_STRUCT__entry(
 			     __field(struct amd_sched_entity *, entity)
 			     __field(struct amd_sched_job *, sched_job)
-			     __field(struct fence *, fence)
+			     __field(struct dma_fence *, fence)
 			     __field(const char *, name)
 			     __field(u32, job_count)
 			     __field(int, hw_job_count)
@@ -42,7 +42,7 @@ TRACE_EVENT(amd_sched_process_job,
 	    TP_PROTO(struct amd_sched_fence *fence),
 	    TP_ARGS(fence),
 	    TP_STRUCT__entry(
-		    __field(struct fence *, fence)
+		    __field(struct dma_fence *, fence)
 		    ),
 
 	    TP_fast_assign(
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 963a24d46a93..5364e6a7ec8f 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -32,7 +32,7 @@
 
 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
-static void amd_sched_process_job(struct fence *f, struct fence_cb *cb);
+static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
 
 struct kmem_cache *sched_fence_slab;
 atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
@@ -141,7 +141,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 		return r;
 
 	atomic_set(&entity->fence_seq, 0);
-	entity->fence_context = fence_context_alloc(2);
+	entity->fence_context = dma_fence_context_alloc(2);
 
 	return 0;
 }
@@ -221,32 +221,32 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 	kfifo_free(&entity->job_queue);
 }
 
-static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
+static void amd_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
 {
 	struct amd_sched_entity *entity =
 		container_of(cb, struct amd_sched_entity, cb);
 	entity->dependency = NULL;
-	fence_put(f);
+	dma_fence_put(f);
 	amd_sched_wakeup(entity->sched);
 }
 
-static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb)
+static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb)
 {
 	struct amd_sched_entity *entity =
 		container_of(cb, struct amd_sched_entity, cb);
 	entity->dependency = NULL;
-	fence_put(f);
+	dma_fence_put(f);
 }
 
 static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 {
 	struct amd_gpu_scheduler *sched = entity->sched;
-	struct fence * fence = entity->dependency;
+	struct dma_fence * fence = entity->dependency;
 	struct amd_sched_fence *s_fence;
 
 	if (fence->context == entity->fence_context) {
 		/* We can ignore fences from ourself */
-		fence_put(entity->dependency);
+		dma_fence_put(entity->dependency);
 		return false;
 	}
 
@@ -257,23 +257,23 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 		 * Fence is from the same scheduler, only need to wait for
 		 * it to be scheduled
 		 */
-		fence = fence_get(&s_fence->scheduled);
-		fence_put(entity->dependency);
+		fence = dma_fence_get(&s_fence->scheduled);
+		dma_fence_put(entity->dependency);
 		entity->dependency = fence;
-		if (!fence_add_callback(fence, &entity->cb,
-					amd_sched_entity_clear_dep))
+		if (!dma_fence_add_callback(fence, &entity->cb,
+					    amd_sched_entity_clear_dep))
 			return true;
 
 		/* Ignore it when it is already scheduled */
-		fence_put(fence);
+		dma_fence_put(fence);
 		return false;
 	}
 
-	if (!fence_add_callback(entity->dependency, &entity->cb,
-				amd_sched_entity_wakeup))
+	if (!dma_fence_add_callback(entity->dependency, &entity->cb,
+				    amd_sched_entity_wakeup))
 		return true;
 
-	fence_put(entity->dependency);
+	dma_fence_put(entity->dependency);
 	return false;
 }
 
@@ -354,7 +354,8 @@ static void amd_sched_job_finish(struct work_struct *work)
 	sched->ops->free_job(s_job);
 }
 
-static void amd_sched_job_finish_cb(struct fence *f, struct fence_cb *cb)
+static void amd_sched_job_finish_cb(struct dma_fence *f,
+				    struct dma_fence_cb *cb)
 {
 	struct amd_sched_job *job = container_of(cb, struct amd_sched_job,
 						 finish_cb);
@@ -388,8 +389,8 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
 
 	spin_lock(&sched->job_list_lock);
 	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
-		if (fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
-			fence_put(s_job->s_fence->parent);
+		if (dma_fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
+			dma_fence_put(s_job->s_fence->parent);
 			s_job->s_fence->parent = NULL;
 		}
 	}
@@ -410,21 +411,21 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
 
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct amd_sched_fence *s_fence = s_job->s_fence;
-		struct fence *fence;
+		struct dma_fence *fence;
 
 		spin_unlock(&sched->job_list_lock);
 		fence = sched->ops->run_job(s_job);
 		atomic_inc(&sched->hw_rq_count);
 		if (fence) {
-			s_fence->parent = fence_get(fence);
-			r = fence_add_callback(fence, &s_fence->cb,
-					       amd_sched_process_job);
+			s_fence->parent = dma_fence_get(fence);
+			r = dma_fence_add_callback(fence, &s_fence->cb,
+						   amd_sched_process_job);
 			if (r == -ENOENT)
 				amd_sched_process_job(fence, &s_fence->cb);
 			else if (r)
 				DRM_ERROR("fence add callback failed (%d)\n",
 					  r);
-			fence_put(fence);
+			dma_fence_put(fence);
 		} else {
 			DRM_ERROR("Failed to run job!\n");
 			amd_sched_process_job(NULL, &s_fence->cb);
@@ -446,8 +447,8 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 	struct amd_sched_entity *entity = sched_job->s_entity;
 
 	trace_amd_sched_job(sched_job);
-	fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb,
-			   amd_sched_job_finish_cb);
+	dma_fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb,
+			       amd_sched_job_finish_cb);
 	wait_event(entity->sched->job_scheduled,
 		   amd_sched_entity_in(sched_job));
 }
@@ -511,7 +512,7 @@ amd_sched_select_entity(struct amd_gpu_scheduler *sched)
 	return entity;
 }
 
-static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
+static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 {
 	struct amd_sched_fence *s_fence =
 		container_of(cb, struct amd_sched_fence, cb);
@@ -521,7 +522,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 	amd_sched_fence_finished(s_fence);
 
 	trace_amd_sched_process_job(s_fence);
-	fence_put(&s_fence->finished);
+	dma_fence_put(&s_fence->finished);
 	wake_up_interruptible(&sched->wake_up_worker);
 }
 
@@ -547,7 +548,7 @@ static int amd_sched_main(void *param)
 		struct amd_sched_entity *entity = NULL;
 		struct amd_sched_fence *s_fence;
 		struct amd_sched_job *sched_job;
-		struct fence *fence;
+		struct dma_fence *fence;
 
 		wait_event_interruptible(sched->wake_up_worker,
 					 (!amd_sched_blocked(sched) &&
@@ -569,15 +570,15 @@ static int amd_sched_main(void *param)
 		fence = sched->ops->run_job(sched_job);
 		amd_sched_fence_scheduled(s_fence);
 		if (fence) {
-			s_fence->parent = fence_get(fence);
-			r = fence_add_callback(fence, &s_fence->cb,
-					       amd_sched_process_job);
+			s_fence->parent = dma_fence_get(fence);
+			r = dma_fence_add_callback(fence, &s_fence->cb,
+						   amd_sched_process_job);
 			if (r == -ENOENT)
 				amd_sched_process_job(fence, &s_fence->cb);
 			else if (r)
 				DRM_ERROR("fence add callback failed (%d)\n",
 					  r);
-			fence_put(fence);
+			dma_fence_put(fence);
 		} else {
 			DRM_ERROR("Failed to run job!\n");
 			amd_sched_process_job(NULL, &s_fence->cb);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 7cbbbfb502ef..876aa43b57df 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -25,7 +25,7 @@
 #define _GPU_SCHEDULER_H_
 
 #include <linux/kfifo.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 
 struct amd_gpu_scheduler;
 struct amd_sched_rq;
@@ -50,8 +50,8 @@ struct amd_sched_entity {
 	atomic_t			fence_seq;
 	uint64_t                        fence_context;
 
-	struct fence			*dependency;
-	struct fence_cb			cb;
+	struct dma_fence		*dependency;
+	struct dma_fence_cb		cb;
 };
 
 /**
@@ -66,10 +66,10 @@ struct amd_sched_rq {
 };
 
 struct amd_sched_fence {
-	struct fence                    scheduled;
-	struct fence                    finished;
-	struct fence_cb                 cb;
-	struct fence                    *parent;
+	struct dma_fence                scheduled;
+	struct dma_fence                finished;
+	struct dma_fence_cb             cb;
+	struct dma_fence                *parent;
 	struct amd_gpu_scheduler	*sched;
 	spinlock_t			lock;
 	void                            *owner;
@@ -79,15 +79,15 @@ struct amd_sched_job {
 	struct amd_gpu_scheduler        *sched;
 	struct amd_sched_entity         *s_entity;
 	struct amd_sched_fence          *s_fence;
-	struct fence_cb			finish_cb;
+	struct dma_fence_cb		finish_cb;
 	struct work_struct		finish_work;
 	struct list_head		node;
 	struct delayed_work		work_tdr;
 };
 
-extern const struct fence_ops amd_sched_fence_ops_scheduled;
-extern const struct fence_ops amd_sched_fence_ops_finished;
-static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
+extern const struct dma_fence_ops amd_sched_fence_ops_scheduled;
+extern const struct dma_fence_ops amd_sched_fence_ops_finished;
+static inline struct amd_sched_fence *to_amd_sched_fence(struct dma_fence *f)
 {
 	if (f->ops == &amd_sched_fence_ops_scheduled)
 		return container_of(f, struct amd_sched_fence, scheduled);
@@ -103,8 +103,8 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
  * these functions should be implemented in driver side
 */
 struct amd_sched_backend_ops {
-	struct fence *(*dependency)(struct amd_sched_job *sched_job);
-	struct fence *(*run_job)(struct amd_sched_job *sched_job);
+	struct dma_fence *(*dependency)(struct amd_sched_job *sched_job);
+	struct dma_fence *(*run_job)(struct amd_sched_job *sched_job);
 	void (*timedout_job)(struct amd_sched_job *sched_job);
 	void (*free_job)(struct amd_sched_job *sched_job);
 };
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 6b63beaf7574..c26fa298fe9e 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -42,46 +42,50 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity,
 	spin_lock_init(&fence->lock);
 
 	seq = atomic_inc_return(&entity->fence_seq);
-	fence_init(&fence->scheduled, &amd_sched_fence_ops_scheduled,
-		   &fence->lock, entity->fence_context, seq);
-	fence_init(&fence->finished, &amd_sched_fence_ops_finished,
-		   &fence->lock, entity->fence_context + 1, seq);
+	dma_fence_init(&fence->scheduled, &amd_sched_fence_ops_scheduled,
+		       &fence->lock, entity->fence_context, seq);
+	dma_fence_init(&fence->finished, &amd_sched_fence_ops_finished,
+		       &fence->lock, entity->fence_context + 1, seq);
 
 	return fence;
 }
 
 void amd_sched_fence_scheduled(struct amd_sched_fence *fence)
 {
-	int ret = fence_signal(&fence->scheduled);
+	int ret = dma_fence_signal(&fence->scheduled);
 
 	if (!ret)
-		FENCE_TRACE(&fence->scheduled, "signaled from irq context\n");
+		DMA_FENCE_TRACE(&fence->scheduled,
+				"signaled from irq context\n");
 	else
-		FENCE_TRACE(&fence->scheduled, "was already signaled\n");
+		DMA_FENCE_TRACE(&fence->scheduled,
+				"was already signaled\n");
 }
 
 void amd_sched_fence_finished(struct amd_sched_fence *fence)
 {
-	int ret = fence_signal(&fence->finished);
+	int ret = dma_fence_signal(&fence->finished);
 
 	if (!ret)
-		FENCE_TRACE(&fence->finished, "signaled from irq context\n");
+		DMA_FENCE_TRACE(&fence->finished,
+				"signaled from irq context\n");
 	else
-		FENCE_TRACE(&fence->finished, "was already signaled\n");
+		DMA_FENCE_TRACE(&fence->finished,
+				"was already signaled\n");
 }
 
-static const char *amd_sched_fence_get_driver_name(struct fence *fence)
+static const char *amd_sched_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "amd_sched";
 }
 
-static const char *amd_sched_fence_get_timeline_name(struct fence *f)
+static const char *amd_sched_fence_get_timeline_name(struct dma_fence *f)
 {
 	struct amd_sched_fence *fence = to_amd_sched_fence(f);
 	return (const char *)fence->sched->name;
 }
 
-static bool amd_sched_fence_enable_signaling(struct fence *f)
+static bool amd_sched_fence_enable_signaling(struct dma_fence *f)
 {
 	return true;
 }
@@ -95,10 +99,10 @@ static bool amd_sched_fence_enable_signaling(struct fence *f)
  */
 static void amd_sched_fence_free(struct rcu_head *rcu)
 {
-	struct fence *f = container_of(rcu, struct fence, rcu);
+	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
 	struct amd_sched_fence *fence = to_amd_sched_fence(f);
 
-	fence_put(fence->parent);
+	dma_fence_put(fence->parent);
 	kmem_cache_free(sched_fence_slab, fence);
 }
 
@@ -110,7 +114,7 @@ static void amd_sched_fence_free(struct rcu_head *rcu)
  * This function is called when the reference count becomes zero.
  * It just RCU schedules freeing up the fence.
  */
-static void amd_sched_fence_release_scheduled(struct fence *f)
+static void amd_sched_fence_release_scheduled(struct dma_fence *f)
 {
 	struct amd_sched_fence *fence = to_amd_sched_fence(f);
 
@@ -124,27 +128,27 @@ static void amd_sched_fence_release_scheduled(struct fence *f)
  *
  * Drop the extra reference from the scheduled fence to the base fence.
  */
-static void amd_sched_fence_release_finished(struct fence *f)
+static void amd_sched_fence_release_finished(struct dma_fence *f)
 {
 	struct amd_sched_fence *fence = to_amd_sched_fence(f);
 
-	fence_put(&fence->scheduled);
+	dma_fence_put(&fence->scheduled);
 }
 
-const struct fence_ops amd_sched_fence_ops_scheduled = {
+const struct dma_fence_ops amd_sched_fence_ops_scheduled = {
 	.get_driver_name = amd_sched_fence_get_driver_name,
 	.get_timeline_name = amd_sched_fence_get_timeline_name,
 	.enable_signaling = amd_sched_fence_enable_signaling,
 	.signaled = NULL,
-	.wait = fence_default_wait,
+	.wait = dma_fence_default_wait,
 	.release = amd_sched_fence_release_scheduled,
 };
 
-const struct fence_ops amd_sched_fence_ops_finished = {
+const struct dma_fence_ops amd_sched_fence_ops_finished = {
 	.get_driver_name = amd_sched_fence_get_driver_name,
 	.get_timeline_name = amd_sched_fence_get_timeline_name,
 	.enable_signaling = amd_sched_fence_enable_signaling,
 	.signaled = NULL,
-	.wait = fence_default_wait,
+	.wait = dma_fence_default_wait,
 	.release = amd_sched_fence_release_finished,
 };
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 1b5a32df9a9a..c32fb3c1d6f0 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1463,7 +1463,7 @@ EXPORT_SYMBOL(drm_atomic_nonblocking_commit);
 
 static struct drm_pending_vblank_event *create_vblank_event(
 		struct drm_device *dev, struct drm_file *file_priv,
-		struct fence *fence, uint64_t user_data)
+		struct dma_fence *fence, uint64_t user_data)
 {
 	struct drm_pending_vblank_event *e = NULL;
 	int ret;
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index f9362760bfb2..75ad01d595fd 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -30,7 +30,7 @@
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_atomic_helper.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 
 #include "drm_crtc_internal.h"
 
@@ -1017,7 +1017,7 @@ EXPORT_SYMBOL(drm_atomic_helper_commit_modeset_enables);
  * drm_atomic_helper_swap_state() so it uses the current plane state (and
  * just uses the atomic state to find the changed planes)
  *
- * Returns zero if success or < 0 if fence_wait() fails.
+ * Returns zero if success or < 0 if dma_fence_wait() fails.
  */
 int drm_atomic_helper_wait_for_fences(struct drm_device *dev,
 				      struct drm_atomic_state *state,
@@ -1041,11 +1041,11 @@ int drm_atomic_helper_wait_for_fences(struct drm_device *dev,
 		 * still interrupt the operation. Instead of blocking until the
 		 * timer expires, make the wait interruptible.
 		 */
-		ret = fence_wait(plane_state->fence, pre_swap);
+		ret = dma_fence_wait(plane_state->fence, pre_swap);
 		if (ret)
 			return ret;
 
-		fence_put(plane_state->fence);
+		dma_fence_put(plane_state->fence);
 		plane_state->fence = NULL;
 	}
 
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 8bed5f459182..cf993dbf602e 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -665,7 +665,7 @@ void drm_event_cancel_free(struct drm_device *dev,
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
 	if (p->fence)
-		fence_put(p->fence);
+		dma_fence_put(p->fence);
 
 	kfree(p);
 }
@@ -696,8 +696,8 @@ void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e)
 	}
 
 	if (e->fence) {
-		fence_signal(e->fence);
-		fence_put(e->fence);
+		dma_fence_signal(e->fence);
+		dma_fence_put(e->fence);
 	}
 
 	if (!e->file_priv) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 3755ef935af4..7d066a91d778 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -466,10 +466,10 @@ int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
 }
 
 #ifdef CONFIG_DEBUG_FS
-static void etnaviv_gem_describe_fence(struct fence *fence,
+static void etnaviv_gem_describe_fence(struct dma_fence *fence,
 	const char *type, struct seq_file *m)
 {
-	if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		seq_printf(m, "\t%9s: %s %s seq %u\n",
 			   type,
 			   fence->ops->get_driver_name(fence),
@@ -482,7 +482,7 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 	struct reservation_object *robj = etnaviv_obj->resv;
 	struct reservation_object_list *fobj;
-	struct fence *fence;
+	struct dma_fence *fence;
 	unsigned long off = drm_vma_node_start(&obj->vma_node);
 
 	seq_printf(m, "%08x: %c %2d (%2d) %08lx %p %zd\n",
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index b1254f885fed..d2211825e5c8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -15,7 +15,7 @@
  */
 
 #include <linux/component.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <linux/moduleparam.h>
 #include <linux/of_device.h>
 #include "etnaviv_dump.h"
@@ -882,7 +882,7 @@ static void recover_worker(struct work_struct *work)
 	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
 		if (!gpu->event[i].used)
 			continue;
-		fence_signal(gpu->event[i].fence);
+		dma_fence_signal(gpu->event[i].fence);
 		gpu->event[i].fence = NULL;
 		gpu->event[i].used = false;
 		complete(&gpu->event_free);
@@ -952,55 +952,55 @@ static void hangcheck_disable(struct etnaviv_gpu *gpu)
 /* fence object management */
 struct etnaviv_fence {
 	struct etnaviv_gpu *gpu;
-	struct fence base;
+	struct dma_fence base;
 };
 
-static inline struct etnaviv_fence *to_etnaviv_fence(struct fence *fence)
+static inline struct etnaviv_fence *to_etnaviv_fence(struct dma_fence *fence)
 {
 	return container_of(fence, struct etnaviv_fence, base);
 }
 
-static const char *etnaviv_fence_get_driver_name(struct fence *fence)
+static const char *etnaviv_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "etnaviv";
 }
 
-static const char *etnaviv_fence_get_timeline_name(struct fence *fence)
+static const char *etnaviv_fence_get_timeline_name(struct dma_fence *fence)
 {
 	struct etnaviv_fence *f = to_etnaviv_fence(fence);
 
 	return dev_name(f->gpu->dev);
 }
 
-static bool etnaviv_fence_enable_signaling(struct fence *fence)
+static bool etnaviv_fence_enable_signaling(struct dma_fence *fence)
 {
 	return true;
 }
 
-static bool etnaviv_fence_signaled(struct fence *fence)
+static bool etnaviv_fence_signaled(struct dma_fence *fence)
 {
 	struct etnaviv_fence *f = to_etnaviv_fence(fence);
 
 	return fence_completed(f->gpu, f->base.seqno);
 }
 
-static void etnaviv_fence_release(struct fence *fence)
+static void etnaviv_fence_release(struct dma_fence *fence)
 {
 	struct etnaviv_fence *f = to_etnaviv_fence(fence);
 
 	kfree_rcu(f, base.rcu);
 }
 
-static const struct fence_ops etnaviv_fence_ops = {
+static const struct dma_fence_ops etnaviv_fence_ops = {
 	.get_driver_name = etnaviv_fence_get_driver_name,
 	.get_timeline_name = etnaviv_fence_get_timeline_name,
 	.enable_signaling = etnaviv_fence_enable_signaling,
 	.signaled = etnaviv_fence_signaled,
-	.wait = fence_default_wait,
+	.wait = dma_fence_default_wait,
 	.release = etnaviv_fence_release,
 };
 
-static struct fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
+static struct dma_fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
 {
 	struct etnaviv_fence *f;
 
@@ -1010,8 +1010,8 @@ static struct fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
 
 	f->gpu = gpu;
 
-	fence_init(&f->base, &etnaviv_fence_ops, &gpu->fence_spinlock,
-		   gpu->fence_context, ++gpu->next_fence);
+	dma_fence_init(&f->base, &etnaviv_fence_ops, &gpu->fence_spinlock,
+		       gpu->fence_context, ++gpu->next_fence);
 
 	return &f->base;
 }
@@ -1021,7 +1021,7 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
 {
 	struct reservation_object *robj = etnaviv_obj->resv;
 	struct reservation_object_list *fobj;
-	struct fence *fence;
+	struct dma_fence *fence;
 	int i, ret;
 
 	if (!exclusive) {
@@ -1039,7 +1039,7 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
 		/* Wait on any existing exclusive fence which isn't our own */
 		fence = reservation_object_get_excl(robj);
 		if (fence && fence->context != context) {
-			ret = fence_wait(fence, true);
+			ret = dma_fence_wait(fence, true);
 			if (ret)
 				return ret;
 		}
@@ -1052,7 +1052,7 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
 		fence = rcu_dereference_protected(fobj->shared[i],
 						reservation_object_held(robj));
 		if (fence->context != context) {
-			ret = fence_wait(fence, true);
+			ret = dma_fence_wait(fence, true);
 			if (ret)
 				return ret;
 		}
@@ -1158,11 +1158,11 @@ static void retire_worker(struct work_struct *work)
 
 	mutex_lock(&gpu->lock);
 	list_for_each_entry_safe(cmdbuf, tmp, &gpu->active_cmd_list, node) {
-		if (!fence_is_signaled(cmdbuf->fence))
+		if (!dma_fence_is_signaled(cmdbuf->fence))
 			break;
 
 		list_del(&cmdbuf->node);
-		fence_put(cmdbuf->fence);
+		dma_fence_put(cmdbuf->fence);
 
 		for (i = 0; i < cmdbuf->nr_bos; i++) {
 			struct etnaviv_vram_mapping *mapping = cmdbuf->bo_map[i];
@@ -1275,7 +1275,7 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
 int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
 {
-	struct fence *fence;
+	struct dma_fence *fence;
 	unsigned int event, i;
 	int ret;
 
@@ -1391,7 +1391,7 @@ static irqreturn_t irq_handler(int irq, void *data)
 		}
 
 		while ((event = ffs(intr)) != 0) {
-			struct fence *fence;
+			struct dma_fence *fence;
 
 			event -= 1;
 
@@ -1401,7 +1401,7 @@ static irqreturn_t irq_handler(int irq, void *data)
 
 			fence = gpu->event[event].fence;
 			gpu->event[event].fence = NULL;
-			fence_signal(fence);
+			dma_fence_signal(fence);
 
 			/*
 			 * Events can be processed out of order.  Eg,
@@ -1553,7 +1553,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 		return ret;
 
 	gpu->drm = drm;
-	gpu->fence_context = fence_context_alloc(1);
+	gpu->fence_context = dma_fence_context_alloc(1);
 	spin_lock_init(&gpu->fence_spinlock);
 
 	INIT_LIST_HEAD(&gpu->active_cmd_list);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 73c278dc3706..8c6b824e9d0a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -89,7 +89,7 @@ struct etnaviv_chip_identity {
 
 struct etnaviv_event {
 	bool used;
-	struct fence *fence;
+	struct dma_fence *fence;
 };
 
 struct etnaviv_cmdbuf;
@@ -163,7 +163,7 @@ struct etnaviv_cmdbuf {
 	/* vram node used if the cmdbuf is mapped through the MMUv2 */
 	struct drm_mm_node vram_node;
 	/* fence after which this buffer is to be disposed */
-	struct fence *fence;
+	struct dma_fence *fence;
 	/* target exec state */
 	u32 exec_state;
 	/* per GPU in-flight list */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 74ede1f53372..f9af2a00625e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -26,12 +26,12 @@
 
 #include "i915_drv.h"
 
-static const char *i915_fence_get_driver_name(struct fence *fence)
+static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "i915";
 }
 
-static const char *i915_fence_get_timeline_name(struct fence *fence)
+static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
 {
 	/* Timelines are bound by eviction to a VM. However, since
 	 * we only have a global seqno at the moment, we only have
@@ -42,12 +42,12 @@ static const char *i915_fence_get_timeline_name(struct fence *fence)
 	return "global";
 }
 
-static bool i915_fence_signaled(struct fence *fence)
+static bool i915_fence_signaled(struct dma_fence *fence)
 {
 	return i915_gem_request_completed(to_request(fence));
 }
 
-static bool i915_fence_enable_signaling(struct fence *fence)
+static bool i915_fence_enable_signaling(struct dma_fence *fence)
 {
 	if (i915_fence_signaled(fence))
 		return false;
@@ -56,7 +56,7 @@ static bool i915_fence_enable_signaling(struct fence *fence)
 	return true;
 }
 
-static signed long i915_fence_wait(struct fence *fence,
+static signed long i915_fence_wait(struct dma_fence *fence,
 				   bool interruptible,
 				   signed long timeout_jiffies)
 {
@@ -85,26 +85,26 @@ static signed long i915_fence_wait(struct fence *fence,
 	return timeout_jiffies;
 }
 
-static void i915_fence_value_str(struct fence *fence, char *str, int size)
+static void i915_fence_value_str(struct dma_fence *fence, char *str, int size)
 {
 	snprintf(str, size, "%u", fence->seqno);
 }
 
-static void i915_fence_timeline_value_str(struct fence *fence, char *str,
+static void i915_fence_timeline_value_str(struct dma_fence *fence, char *str,
 					  int size)
 {
 	snprintf(str, size, "%u",
 		 intel_engine_get_seqno(to_request(fence)->engine));
 }
 
-static void i915_fence_release(struct fence *fence)
+static void i915_fence_release(struct dma_fence *fence)
 {
 	struct drm_i915_gem_request *req = to_request(fence);
 
 	kmem_cache_free(req->i915->requests, req);
 }
 
-const struct fence_ops i915_fence_ops = {
+const struct dma_fence_ops i915_fence_ops = {
 	.get_driver_name = i915_fence_get_driver_name,
 	.get_timeline_name = i915_fence_get_timeline_name,
 	.enable_signaling = i915_fence_enable_signaling,
@@ -388,8 +388,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * The reference count is incremented atomically. If it is zero,
 	 * the lookup knows the request is unallocated and complete. Otherwise,
 	 * it is either still in use, or has been reallocated and reset
-	 * with fence_init(). This increment is safe for release as we check
-	 * that the request we have a reference to and matches the active
+	 * with dma_fence_init(). This increment is safe for release as we
+	 * check that the request we have a reference to and matches the active
 	 * request.
 	 *
 	 * Before we increment the refcount, we chase the request->engine
@@ -412,11 +412,11 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		goto err;
 
 	spin_lock_init(&req->lock);
-	fence_init(&req->fence,
-		   &i915_fence_ops,
-		   &req->lock,
-		   engine->fence_context,
-		   seqno);
+	dma_fence_init(&req->fence,
+		       &i915_fence_ops,
+		       &req->lock,
+		       engine->fence_context,
+		       seqno);
 
 	i915_sw_fence_init(&req->submit, submit_notify);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 974bd7bcc801..bceeaa3a5193 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -25,7 +25,7 @@
 #ifndef I915_GEM_REQUEST_H
 #define I915_GEM_REQUEST_H
 
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 
 #include "i915_gem.h"
 #include "i915_sw_fence.h"
@@ -62,7 +62,7 @@ struct intel_signal_node {
  * The requests are reference counted.
  */
 struct drm_i915_gem_request {
-	struct fence fence;
+	struct dma_fence fence;
 	spinlock_t lock;
 
 	/** On Which ring this request was generated */
@@ -145,9 +145,9 @@ struct drm_i915_gem_request {
 	struct list_head execlist_link;
 };
 
-extern const struct fence_ops i915_fence_ops;
+extern const struct dma_fence_ops i915_fence_ops;
 
-static inline bool fence_is_i915(struct fence *fence)
+static inline bool fence_is_i915(struct dma_fence *fence)
 {
 	return fence->ops == &i915_fence_ops;
 }
@@ -172,7 +172,7 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req)
 }
 
 static inline struct drm_i915_gem_request *
-to_request(struct fence *fence)
+to_request(struct dma_fence *fence)
 {
 	/* We assume that NULL fence/request are interoperable */
 	BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
@@ -183,19 +183,19 @@ to_request(struct fence *fence)
 static inline struct drm_i915_gem_request *
 i915_gem_request_get(struct drm_i915_gem_request *req)
 {
-	return to_request(fence_get(&req->fence));
+	return to_request(dma_fence_get(&req->fence));
 }
 
 static inline struct drm_i915_gem_request *
 i915_gem_request_get_rcu(struct drm_i915_gem_request *req)
 {
-	return to_request(fence_get_rcu(&req->fence));
+	return to_request(dma_fence_get_rcu(&req->fence));
 }
 
 static inline void
 i915_gem_request_put(struct drm_i915_gem_request *req)
 {
-	fence_put(&req->fence);
+	dma_fence_put(&req->fence);
 }
 
 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
@@ -497,7 +497,7 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
 		 * compiler.
 		 *
 		 * The atomic operation at the heart of
-		 * i915_gem_request_get_rcu(), see fence_get_rcu(), is
+		 * i915_gem_request_get_rcu(), see dma_fence_get_rcu(), is
 		 * atomic_inc_not_zero() which is only a full memory barrier
 		 * when successful. That is, if i915_gem_request_get_rcu()
 		 * returns the request (and so with the reference counted
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 1e5cbc585ca2..8185002d7ec8 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/slab.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <linux/reservation.h>
 
 #include "i915_sw_fence.h"
@@ -226,49 +226,50 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 	return pending;
 }
 
-struct dma_fence_cb {
-	struct fence_cb base;
+struct i915_sw_dma_fence_cb {
+	struct dma_fence_cb base;
 	struct i915_sw_fence *fence;
-	struct fence *dma;
+	struct dma_fence *dma;
 	struct timer_list timer;
 };
 
 static void timer_i915_sw_fence_wake(unsigned long data)
 {
-	struct dma_fence_cb *cb = (struct dma_fence_cb *)data;
+	struct i915_sw_dma_fence_cb *cb = (struct i915_sw_dma_fence_cb *)data;
 
 	printk(KERN_WARNING "asynchronous wait on fence %s:%s:%x timed out\n",
 	       cb->dma->ops->get_driver_name(cb->dma),
 	       cb->dma->ops->get_timeline_name(cb->dma),
 	       cb->dma->seqno);
-	fence_put(cb->dma);
+	dma_fence_put(cb->dma);
 	cb->dma = NULL;
 
 	i915_sw_fence_commit(cb->fence);
 	cb->timer.function = NULL;
 }
 
-static void dma_i915_sw_fence_wake(struct fence *dma, struct fence_cb *data)
+static void dma_i915_sw_fence_wake(struct dma_fence *dma,
+				   struct dma_fence_cb *data)
 {
-	struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
 
 	del_timer_sync(&cb->timer);
 	if (cb->timer.function)
 		i915_sw_fence_commit(cb->fence);
-	fence_put(cb->dma);
+	dma_fence_put(cb->dma);
 
 	kfree(cb);
 }
 
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
-				  struct fence *dma,
+				  struct dma_fence *dma,
 				  unsigned long timeout,
 				  gfp_t gfp)
 {
-	struct dma_fence_cb *cb;
+	struct i915_sw_dma_fence_cb *cb;
 	int ret;
 
-	if (fence_is_signaled(dma))
+	if (dma_fence_is_signaled(dma))
 		return 0;
 
 	cb = kmalloc(sizeof(*cb), gfp);
@@ -276,7 +277,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 		if (!gfpflags_allow_blocking(gfp))
 			return -ENOMEM;
 
-		return fence_wait(dma, false);
+		return dma_fence_wait(dma, false);
 	}
 
 	cb->fence = i915_sw_fence_get(fence);
@@ -287,11 +288,11 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 		      timer_i915_sw_fence_wake, (unsigned long)cb,
 		      TIMER_IRQSAFE);
 	if (timeout) {
-		cb->dma = fence_get(dma);
+		cb->dma = dma_fence_get(dma);
 		mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout));
 	}
 
-	ret = fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake);
+	ret = dma_fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake);
 	if (ret == 0) {
 		ret = 1;
 	} else {
@@ -305,16 +306,16 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
-				    const struct fence_ops *exclude,
+				    const struct dma_fence_ops *exclude,
 				    bool write,
 				    unsigned long timeout,
 				    gfp_t gfp)
 {
-	struct fence *excl;
+	struct dma_fence *excl;
 	int ret = 0, pending;
 
 	if (write) {
-		struct fence **shared;
+		struct dma_fence **shared;
 		unsigned int count, i;
 
 		ret = reservation_object_get_fences_rcu(resv,
@@ -339,7 +340,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 		}
 
 		for (i = 0; i < count; i++)
-			fence_put(shared[i]);
+			dma_fence_put(shared[i]);
 		kfree(shared);
 	} else {
 		excl = reservation_object_get_excl_rcu(resv);
@@ -356,7 +357,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 			ret |= pending;
 	}
 
-	fence_put(excl);
+	dma_fence_put(excl);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 373141602ca4..cd239e92f67f 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -16,8 +16,8 @@
 #include <linux/wait.h>
 
 struct completion;
-struct fence;
-struct fence_ops;
+struct dma_fence;
+struct dma_fence_ops;
 struct reservation_object;
 
 struct i915_sw_fence {
@@ -47,12 +47,12 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 				 struct i915_sw_fence *after,
 				 wait_queue_t *wq);
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
-				  struct fence *dma,
+				  struct dma_fence *dma,
 				  unsigned long timeout,
 				  gfp_t gfp);
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
-				    const struct fence_ops *exclude,
+				    const struct dma_fence_ops *exclude,
 				    bool write,
 				    unsigned long timeout,
 				    gfp_t gfp);
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 178798002a73..5c912c25f7d3 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -491,7 +491,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 			   __entry->ring = req->engine->id;
 			   __entry->seqno = req->fence.seqno;
 			   __entry->flags = flags;
-			   fence_enable_sw_signaling(&req->fence);
+			   dma_fence_enable_sw_signaling(&req->fence);
 			   ),
 
 	    TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 23fc1042fed4..56efcc507ea2 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -464,7 +464,7 @@ static int intel_breadcrumbs_signaler(void *arg)
 						 &request->signaling.wait);
 
 			local_bh_disable();
-			fence_signal(&request->fence);
+			dma_fence_signal(&request->fence);
 			local_bh_enable(); /* kick start the tasklets */
 
 			/* Find the next oldest signal. Note that as we have
@@ -502,7 +502,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	struct rb_node *parent, **p;
 	bool first, wakeup;
 
-	/* locked by fence_enable_sw_signaling() */
+	/* locked by dma_fence_enable_sw_signaling() */
 	assert_spin_locked(&request->lock);
 
 	request->signaling.wait.tsk = b->signaler;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2dc94812bea5..8cceb345aa0f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -245,7 +245,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	spin_lock_init(&engine->execlist_lock);
 
-	engine->fence_context = fence_context_alloc(1);
+	engine->fence_context = dma_fence_context_alloc(1);
 
 	intel_engine_init_requests(engine);
 	intel_engine_init_hangcheck(engine);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index d0da52f2a806..940bf4992fe2 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -217,7 +217,7 @@ void msm_gem_vunmap(struct drm_gem_object *obj);
 int msm_gem_sync_object(struct drm_gem_object *obj,
 		struct msm_fence_context *fctx, bool exclusive);
 void msm_gem_move_to_active(struct drm_gem_object *obj,
-		struct msm_gpu *gpu, bool exclusive, struct fence *fence);
+		struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence);
 void msm_gem_move_to_inactive(struct drm_gem_object *obj);
 int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout);
 int msm_gem_cpu_fini(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
index a9b9b1c95a2e..3f299c537b77 100644
--- a/drivers/gpu/drm/msm/msm_fence.c
+++ b/drivers/gpu/drm/msm/msm_fence.c
@@ -15,7 +15,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 
 #include "msm_drv.h"
 #include "msm_fence.h"
@@ -32,7 +32,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name)
 
 	fctx->dev = dev;
 	fctx->name = name;
-	fctx->context = fence_context_alloc(1);
+	fctx->context = dma_fence_context_alloc(1);
 	init_waitqueue_head(&fctx->event);
 	spin_lock_init(&fctx->spinlock);
 
@@ -100,52 +100,52 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence)
 
 struct msm_fence {
 	struct msm_fence_context *fctx;
-	struct fence base;
+	struct dma_fence base;
 };
 
-static inline struct msm_fence *to_msm_fence(struct fence *fence)
+static inline struct msm_fence *to_msm_fence(struct dma_fence *fence)
 {
 	return container_of(fence, struct msm_fence, base);
 }
 
-static const char *msm_fence_get_driver_name(struct fence *fence)
+static const char *msm_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "msm";
 }
 
-static const char *msm_fence_get_timeline_name(struct fence *fence)
+static const char *msm_fence_get_timeline_name(struct dma_fence *fence)
 {
 	struct msm_fence *f = to_msm_fence(fence);
 	return f->fctx->name;
 }
 
-static bool msm_fence_enable_signaling(struct fence *fence)
+static bool msm_fence_enable_signaling(struct dma_fence *fence)
 {
 	return true;
 }
 
-static bool msm_fence_signaled(struct fence *fence)
+static bool msm_fence_signaled(struct dma_fence *fence)
 {
 	struct msm_fence *f = to_msm_fence(fence);
 	return fence_completed(f->fctx, f->base.seqno);
 }
 
-static void msm_fence_release(struct fence *fence)
+static void msm_fence_release(struct dma_fence *fence)
 {
 	struct msm_fence *f = to_msm_fence(fence);
 	kfree_rcu(f, base.rcu);
 }
 
-static const struct fence_ops msm_fence_ops = {
+static const struct dma_fence_ops msm_fence_ops = {
 	.get_driver_name = msm_fence_get_driver_name,
 	.get_timeline_name = msm_fence_get_timeline_name,
 	.enable_signaling = msm_fence_enable_signaling,
 	.signaled = msm_fence_signaled,
-	.wait = fence_default_wait,
+	.wait = dma_fence_default_wait,
 	.release = msm_fence_release,
 };
 
-struct fence *
+struct dma_fence *
 msm_fence_alloc(struct msm_fence_context *fctx)
 {
 	struct msm_fence *f;
@@ -156,8 +156,8 @@ msm_fence_alloc(struct msm_fence_context *fctx)
 
 	f->fctx = fctx;
 
-	fence_init(&f->base, &msm_fence_ops, &fctx->spinlock,
-			fctx->context, ++fctx->last_fence);
+	dma_fence_init(&f->base, &msm_fence_ops, &fctx->spinlock,
+		       fctx->context, ++fctx->last_fence);
 
 	return &f->base;
 }
diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h
index ceb5b3d314b4..56061aa1959d 100644
--- a/drivers/gpu/drm/msm/msm_fence.h
+++ b/drivers/gpu/drm/msm/msm_fence.h
@@ -41,6 +41,6 @@ int msm_queue_fence_cb(struct msm_fence_context *fctx,
 		struct msm_fence_cb *cb, uint32_t fence);
 void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence);
 
-struct fence * msm_fence_alloc(struct msm_fence_context *fctx);
+struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx);
 
 #endif
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index b6ac27e31929..57db7dbbb618 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -521,7 +521,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj,
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	struct reservation_object_list *fobj;
-	struct fence *fence;
+	struct dma_fence *fence;
 	int i, ret;
 
 	if (!exclusive) {
@@ -540,7 +540,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj,
 		fence = reservation_object_get_excl(msm_obj->resv);
 		/* don't need to wait on our own fences, since ring is fifo */
 		if (fence && (fence->context != fctx->context)) {
-			ret = fence_wait(fence, true);
+			ret = dma_fence_wait(fence, true);
 			if (ret)
 				return ret;
 		}
@@ -553,7 +553,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj,
 		fence = rcu_dereference_protected(fobj->shared[i],
 						reservation_object_held(msm_obj->resv));
 		if (fence->context != fctx->context) {
-			ret = fence_wait(fence, true);
+			ret = dma_fence_wait(fence, true);
 			if (ret)
 				return ret;
 		}
@@ -563,7 +563,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj,
 }
 
 void msm_gem_move_to_active(struct drm_gem_object *obj,
-		struct msm_gpu *gpu, bool exclusive, struct fence *fence)
+		struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
@@ -616,10 +616,10 @@ int msm_gem_cpu_fini(struct drm_gem_object *obj)
 }
 
 #ifdef CONFIG_DEBUG_FS
-static void describe_fence(struct fence *fence, const char *type,
+static void describe_fence(struct dma_fence *fence, const char *type,
 		struct seq_file *m)
 {
-	if (!fence_is_signaled(fence))
+	if (!dma_fence_is_signaled(fence))
 		seq_printf(m, "\t%9s: %s %s seq %u\n", type,
 				fence->ops->get_driver_name(fence),
 				fence->ops->get_timeline_name(fence),
@@ -631,7 +631,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	struct reservation_object *robj = msm_obj->resv;
 	struct reservation_object_list *fobj;
-	struct fence *fence;
+	struct dma_fence *fence;
 	uint64_t off = drm_vma_node_start(&obj->vma_node);
 	const char *madv;
 
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index b2f13cfe945e..2cb8551fda70 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -104,7 +104,7 @@ struct msm_gem_submit {
 	struct list_head node;   /* node in gpu submit_list */
 	struct list_head bo_list;
 	struct ww_acquire_ctx ticket;
-	struct fence *fence;
+	struct dma_fence *fence;
 	struct pid *pid;    /* submitting process */
 	bool valid;         /* true if no cmdstream patching needed */
 	unsigned int nr_cmds;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index b6a0f37a65f3..25e8786fa4ca 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -60,7 +60,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 
 void msm_gem_submit_free(struct msm_gem_submit *submit)
 {
-	fence_put(submit->fence);
+	dma_fence_put(submit->fence);
 	list_del(&submit->node);
 	put_pid(submit->pid);
 	kfree(submit);
@@ -380,7 +380,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct msm_file_private *ctx = file->driver_priv;
 	struct msm_gem_submit *submit;
 	struct msm_gpu *gpu = priv->gpu;
-	struct fence *in_fence = NULL;
+	struct dma_fence *in_fence = NULL;
 	struct sync_file *sync_file = NULL;
 	int out_fence_fd = -1;
 	unsigned i;
@@ -439,7 +439,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		 */
 
 		if (in_fence->context != gpu->fctx->context) {
-			ret = fence_wait(in_fence, true);
+			ret = dma_fence_wait(in_fence, true);
 			if (ret)
 				goto out;
 		}
@@ -542,7 +542,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 
 out:
 	if (in_fence)
-		fence_put(in_fence);
+		dma_fence_put(in_fence);
 	submit_cleanup(submit);
 	if (ret)
 		msm_gem_submit_free(submit);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 5bb09838b5ae..3249707e6834 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -476,7 +476,7 @@ static void retire_submits(struct msm_gpu *gpu)
 		submit = list_first_entry(&gpu->submit_list,
 				struct msm_gem_submit, node);
 
-		if (fence_is_signaled(submit->fence)) {
+		if (dma_fence_is_signaled(submit->fence)) {
 			retire_submit(gpu, submit);
 		} else {
 			break;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 343b8659472c..ec8ac756aab4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -83,13 +83,13 @@ nv10_bo_get_tile_region(struct drm_device *dev, int i)
 
 static void
 nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile,
-			struct fence *fence)
+			struct dma_fence *fence)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
 	if (tile) {
 		spin_lock(&drm->tile.lock);
-		tile->fence = (struct nouveau_fence *)fence_get(fence);
+		tile->fence = (struct nouveau_fence *)dma_fence_get(fence);
 		tile->used = false;
 		spin_unlock(&drm->tile.lock);
 	}
@@ -1243,7 +1243,7 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = drm->dev;
-	struct fence *fence = reservation_object_get_excl(bo->resv);
+	struct dma_fence *fence = reservation_object_get_excl(bo->resv);
 
 	nv10_bo_put_tile_region(dev, *old_tile, fence);
 	*old_tile = new_tile;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 4bb9ab892ae1..e9529ee6bc23 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -28,7 +28,7 @@
 
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
-#include <trace/events/fence.h>
+#include <trace/events/dma_fence.h>
 
 #include <nvif/cl826e.h>
 #include <nvif/notify.h>
@@ -38,11 +38,11 @@
 #include "nouveau_dma.h"
 #include "nouveau_fence.h"
 
-static const struct fence_ops nouveau_fence_ops_uevent;
-static const struct fence_ops nouveau_fence_ops_legacy;
+static const struct dma_fence_ops nouveau_fence_ops_uevent;
+static const struct dma_fence_ops nouveau_fence_ops_legacy;
 
 static inline struct nouveau_fence *
-from_fence(struct fence *fence)
+from_fence(struct dma_fence *fence)
 {
 	return container_of(fence, struct nouveau_fence, base);
 }
@@ -58,23 +58,23 @@ nouveau_fence_signal(struct nouveau_fence *fence)
 {
 	int drop = 0;
 
-	fence_signal_locked(&fence->base);
+	dma_fence_signal_locked(&fence->base);
 	list_del(&fence->head);
 	rcu_assign_pointer(fence->channel, NULL);
 
-	if (test_bit(FENCE_FLAG_USER_BITS, &fence->base.flags)) {
+	if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
 		struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
 
 		if (!--fctx->notify_ref)
 			drop = 1;
 	}
 
-	fence_put(&fence->base);
+	dma_fence_put(&fence->base);
 	return drop;
 }
 
 static struct nouveau_fence *
-nouveau_local_fence(struct fence *fence, struct nouveau_drm *drm) {
+nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm) {
 	struct nouveau_fence_priv *priv = (void*)drm->fence;
 
 	if (fence->ops != &nouveau_fence_ops_legacy &&
@@ -201,7 +201,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 
 struct nouveau_fence_work {
 	struct work_struct work;
-	struct fence_cb cb;
+	struct dma_fence_cb cb;
 	void (*func)(void *);
 	void *data;
 };
@@ -214,7 +214,7 @@ nouveau_fence_work_handler(struct work_struct *kwork)
 	kfree(work);
 }
 
-static void nouveau_fence_work_cb(struct fence *fence, struct fence_cb *cb)
+static void nouveau_fence_work_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
 	struct nouveau_fence_work *work = container_of(cb, typeof(*work), cb);
 
@@ -222,12 +222,12 @@ static void nouveau_fence_work_cb(struct fence *fence, struct fence_cb *cb)
 }
 
 void
-nouveau_fence_work(struct fence *fence,
+nouveau_fence_work(struct dma_fence *fence,
 		   void (*func)(void *), void *data)
 {
 	struct nouveau_fence_work *work;
 
-	if (fence_is_signaled(fence))
+	if (dma_fence_is_signaled(fence))
 		goto err;
 
 	work = kmalloc(sizeof(*work), GFP_KERNEL);
@@ -245,7 +245,7 @@ nouveau_fence_work(struct fence *fence,
 	work->func = func;
 	work->data = data;
 
-	if (fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0)
+	if (dma_fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0)
 		goto err_free;
 	return;
 
@@ -266,17 +266,17 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 	fence->timeout  = jiffies + (15 * HZ);
 
 	if (priv->uevent)
-		fence_init(&fence->base, &nouveau_fence_ops_uevent,
-			   &fctx->lock, fctx->context, ++fctx->sequence);
+		dma_fence_init(&fence->base, &nouveau_fence_ops_uevent,
+			       &fctx->lock, fctx->context, ++fctx->sequence);
 	else
-		fence_init(&fence->base, &nouveau_fence_ops_legacy,
-			   &fctx->lock, fctx->context, ++fctx->sequence);
+		dma_fence_init(&fence->base, &nouveau_fence_ops_legacy,
+			       &fctx->lock, fctx->context, ++fctx->sequence);
 	kref_get(&fctx->fence_ref);
 
-	trace_fence_emit(&fence->base);
+	trace_dma_fence_emit(&fence->base);
 	ret = fctx->emit(fence);
 	if (!ret) {
-		fence_get(&fence->base);
+		dma_fence_get(&fence->base);
 		spin_lock_irq(&fctx->lock);
 
 		if (nouveau_fence_update(chan, fctx))
@@ -298,7 +298,7 @@ nouveau_fence_done(struct nouveau_fence *fence)
 		struct nouveau_channel *chan;
 		unsigned long flags;
 
-		if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
 			return true;
 
 		spin_lock_irqsave(&fctx->lock, flags);
@@ -307,11 +307,11 @@ nouveau_fence_done(struct nouveau_fence *fence)
 			nvif_notify_put(&fctx->notify);
 		spin_unlock_irqrestore(&fctx->lock, flags);
 	}
-	return fence_is_signaled(&fence->base);
+	return dma_fence_is_signaled(&fence->base);
 }
 
 static long
-nouveau_fence_wait_legacy(struct fence *f, bool intr, long wait)
+nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	unsigned long sleep_time = NSEC_PER_MSEC / 1000;
@@ -378,7 +378,7 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 	if (!lazy)
 		return nouveau_fence_wait_busy(fence, intr);
 
-	ret = fence_wait_timeout(&fence->base, intr, 15 * HZ);
+	ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ);
 	if (ret < 0)
 		return ret;
 	else if (!ret)
@@ -391,7 +391,7 @@ int
 nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr)
 {
 	struct nouveau_fence_chan *fctx = chan->fence;
-	struct fence *fence;
+	struct dma_fence *fence;
 	struct reservation_object *resv = nvbo->bo.resv;
 	struct reservation_object_list *fobj;
 	struct nouveau_fence *f;
@@ -421,7 +421,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
 		}
 
 		if (must_wait)
-			ret = fence_wait(fence, intr);
+			ret = dma_fence_wait(fence, intr);
 
 		return ret;
 	}
@@ -446,7 +446,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
 		}
 
 		if (must_wait)
-			ret = fence_wait(fence, intr);
+			ret = dma_fence_wait(fence, intr);
 	}
 
 	return ret;
@@ -456,7 +456,7 @@ void
 nouveau_fence_unref(struct nouveau_fence **pfence)
 {
 	if (*pfence)
-		fence_put(&(*pfence)->base);
+		dma_fence_put(&(*pfence)->base);
 	*pfence = NULL;
 }
 
@@ -484,12 +484,12 @@ nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
 	return ret;
 }
 
-static const char *nouveau_fence_get_get_driver_name(struct fence *fence)
+static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
 {
 	return "nouveau";
 }
 
-static const char *nouveau_fence_get_timeline_name(struct fence *f)
+static const char *nouveau_fence_get_timeline_name(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
@@ -503,7 +503,7 @@ static const char *nouveau_fence_get_timeline_name(struct fence *f)
  * result. The drm node should still be there, so we can derive the index from
  * the fence context.
  */
-static bool nouveau_fence_is_signaled(struct fence *f)
+static bool nouveau_fence_is_signaled(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
@@ -519,7 +519,7 @@ static bool nouveau_fence_is_signaled(struct fence *f)
 	return ret;
 }
 
-static bool nouveau_fence_no_signaling(struct fence *f)
+static bool nouveau_fence_no_signaling(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 
@@ -530,30 +530,30 @@ static bool nouveau_fence_no_signaling(struct fence *f)
 	WARN_ON(atomic_read(&fence->base.refcount.refcount) <= 1);
 
 	/*
-	 * This needs uevents to work correctly, but fence_add_callback relies on
+	 * This needs uevents to work correctly, but dma_fence_add_callback relies on
 	 * being able to enable signaling. It will still get signaled eventually,
 	 * just not right away.
 	 */
 	if (nouveau_fence_is_signaled(f)) {
 		list_del(&fence->head);
 
-		fence_put(&fence->base);
+		dma_fence_put(&fence->base);
 		return false;
 	}
 
 	return true;
 }
 
-static void nouveau_fence_release(struct fence *f)
+static void nouveau_fence_release(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
 
 	kref_put(&fctx->fence_ref, nouveau_fence_context_put);
-	fence_free(&fence->base);
+	dma_fence_free(&fence->base);
 }
 
-static const struct fence_ops nouveau_fence_ops_legacy = {
+static const struct dma_fence_ops nouveau_fence_ops_legacy = {
 	.get_driver_name = nouveau_fence_get_get_driver_name,
 	.get_timeline_name = nouveau_fence_get_timeline_name,
 	.enable_signaling = nouveau_fence_no_signaling,
@@ -562,7 +562,7 @@ static const struct fence_ops nouveau_fence_ops_legacy = {
 	.release = nouveau_fence_release
 };
 
-static bool nouveau_fence_enable_signaling(struct fence *f)
+static bool nouveau_fence_enable_signaling(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
@@ -573,18 +573,18 @@ static bool nouveau_fence_enable_signaling(struct fence *f)
 
 	ret = nouveau_fence_no_signaling(f);
 	if (ret)
-		set_bit(FENCE_FLAG_USER_BITS, &fence->base.flags);
+		set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags);
 	else if (!--fctx->notify_ref)
 		nvif_notify_put(&fctx->notify);
 
 	return ret;
 }
 
-static const struct fence_ops nouveau_fence_ops_uevent = {
+static const struct dma_fence_ops nouveau_fence_ops_uevent = {
 	.get_driver_name = nouveau_fence_get_get_driver_name,
 	.get_timeline_name = nouveau_fence_get_timeline_name,
 	.enable_signaling = nouveau_fence_enable_signaling,
 	.signaled = nouveau_fence_is_signaled,
-	.wait = fence_default_wait,
+	.wait = dma_fence_default_wait,
 	.release = NULL
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 64c4ce7115ad..41f3c019e534 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -1,14 +1,14 @@
 #ifndef __NOUVEAU_FENCE_H__
 #define __NOUVEAU_FENCE_H__
 
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <nvif/notify.h>
 
 struct nouveau_drm;
 struct nouveau_bo;
 
 struct nouveau_fence {
-	struct fence base;
+	struct dma_fence base;
 
 	struct list_head head;
 
@@ -24,7 +24,7 @@ void nouveau_fence_unref(struct nouveau_fence **);
 
 int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
 bool nouveau_fence_done(struct nouveau_fence *);
-void nouveau_fence_work(struct fence *, void (*)(void *), void *);
+void nouveau_fence_work(struct dma_fence *, void (*)(void *), void *);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
 int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 0bd7164bc817..7f083c95f422 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -119,7 +119,7 @@ nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nvkm_vma *vma)
 	const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM;
 	struct reservation_object *resv = nvbo->bo.resv;
 	struct reservation_object_list *fobj;
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 
 	fobj = reservation_object_get_list(resv);
 
diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c
index 1915b7b82a59..fa8f2375c398 100644
--- a/drivers/gpu/drm/nouveau/nv04_fence.c
+++ b/drivers/gpu/drm/nouveau/nv04_fence.c
@@ -110,6 +110,6 @@ nv04_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv04_fence_context_new;
 	priv->base.context_del = nv04_fence_context_del;
 	priv->base.contexts = 15;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c
index 4e3de34ff6f4..f99fcf56928a 100644
--- a/drivers/gpu/drm/nouveau/nv10_fence.c
+++ b/drivers/gpu/drm/nouveau/nv10_fence.c
@@ -107,7 +107,7 @@ nv10_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv10_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
 	priv->base.contexts = 31;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c
index 7d5e562a55c5..79bc01111351 100644
--- a/drivers/gpu/drm/nouveau/nv17_fence.c
+++ b/drivers/gpu/drm/nouveau/nv17_fence.c
@@ -126,7 +126,7 @@ nv17_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv17_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
 	priv->base.contexts = 31;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
 	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c
index 4d6f202b7770..8c5295414578 100644
--- a/drivers/gpu/drm/nouveau/nv50_fence.c
+++ b/drivers/gpu/drm/nouveau/nv50_fence.c
@@ -97,7 +97,7 @@ nv50_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv50_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
 	priv->base.contexts = 127;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
 	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index 18bde9d8e6d6..23ef04b4e0b2 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -229,7 +229,7 @@ nv84_fence_create(struct nouveau_drm *drm)
 	priv->base.context_del = nv84_fence_context_del;
 
 	priv->base.contexts = fifo->nr;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	priv->base.uevent = true;
 
 	/* Use VRAM if there is any ; otherwise fallback to system memory */
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 5f3e5ad99de7..84995ebc6ffc 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -31,7 +31,7 @@
  * Definitions taken from spice-protocol, plus kernel driver specific bits.
  */
 
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <linux/workqueue.h>
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
@@ -190,7 +190,7 @@ enum {
  * spice-protocol/qxl_dev.h */
 #define QXL_MAX_RES 96
 struct qxl_release {
-	struct fence base;
+	struct dma_fence base;
 
 	int id;
 	int type;
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index cd83f050cf3e..50b4e522f05f 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -21,7 +21,7 @@
  */
 #include "qxl_drv.h"
 #include "qxl_object.h"
-#include <trace/events/fence.h>
+#include <trace/events/dma_fence.h>
 
 /*
  * drawable cmd cache - allocate a bunch of VRAM pages, suballocate
@@ -40,23 +40,24 @@
 static const int release_size_per_bo[] = { RELEASE_SIZE, SURFACE_RELEASE_SIZE, RELEASE_SIZE };
 static const int releases_per_bo[] = { RELEASES_PER_BO, SURFACE_RELEASES_PER_BO, RELEASES_PER_BO };
 
-static const char *qxl_get_driver_name(struct fence *fence)
+static const char *qxl_get_driver_name(struct dma_fence *fence)
 {
 	return "qxl";
 }
 
-static const char *qxl_get_timeline_name(struct fence *fence)
+static const char *qxl_get_timeline_name(struct dma_fence *fence)
 {
 	return "release";
 }
 
-static bool qxl_nop_signaling(struct fence *fence)
+static bool qxl_nop_signaling(struct dma_fence *fence)
 {
 	/* fences are always automatically signaled, so just pretend we did this.. */
 	return true;
 }
 
-static long qxl_fence_wait(struct fence *fence, bool intr, signed long timeout)
+static long qxl_fence_wait(struct dma_fence *fence, bool intr,
+			   signed long timeout)
 {
 	struct qxl_device *qdev;
 	struct qxl_release *release;
@@ -71,7 +72,7 @@ static long qxl_fence_wait(struct fence *fence, bool intr, signed long timeout)
 retry:
 	sc++;
 
-	if (fence_is_signaled(fence))
+	if (dma_fence_is_signaled(fence))
 		goto signaled;
 
 	qxl_io_notify_oom(qdev);
@@ -80,11 +81,11 @@ retry:
 		if (!qxl_queue_garbage_collect(qdev, true))
 			break;
 
-		if (fence_is_signaled(fence))
+		if (dma_fence_is_signaled(fence))
 			goto signaled;
 	}
 
-	if (fence_is_signaled(fence))
+	if (dma_fence_is_signaled(fence))
 		goto signaled;
 
 	if (have_drawable_releases || sc < 4) {
@@ -96,9 +97,9 @@ retry:
 			return 0;
 
 		if (have_drawable_releases && sc > 300) {
-			FENCE_WARN(fence, "failed to wait on release %llu "
-					  "after spincount %d\n",
-					  fence->context & ~0xf0000000, sc);
+			DMA_FENCE_WARN(fence, "failed to wait on release %llu "
+				       "after spincount %d\n",
+				       fence->context & ~0xf0000000, sc);
 			goto signaled;
 		}
 		goto retry;
@@ -115,7 +116,7 @@ signaled:
 	return end - cur;
 }
 
-static const struct fence_ops qxl_fence_ops = {
+static const struct dma_fence_ops qxl_fence_ops = {
 	.get_driver_name = qxl_get_driver_name,
 	.get_timeline_name = qxl_get_timeline_name,
 	.enable_signaling = qxl_nop_signaling,
@@ -192,8 +193,8 @@ qxl_release_free(struct qxl_device *qdev,
 		WARN_ON(list_empty(&release->bos));
 		qxl_release_free_list(release);
 
-		fence_signal(&release->base);
-		fence_put(&release->base);
+		dma_fence_signal(&release->base);
+		dma_fence_put(&release->base);
 	} else {
 		qxl_release_free_list(release);
 		kfree(release);
@@ -453,9 +454,9 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
 	 * Since we never really allocated a context and we don't want to conflict,
 	 * set the highest bits. This will break if we really allow exporting of dma-bufs.
 	 */
-	fence_init(&release->base, &qxl_fence_ops, &qdev->release_lock,
-		   release->id | 0xf0000000, release->base.seqno);
-	trace_fence_emit(&release->base);
+	dma_fence_init(&release->base, &qxl_fence_ops, &qdev->release_lock,
+		       release->id | 0xf0000000, release->base.seqno);
+	trace_dma_fence_emit(&release->base);
 
 	driver = bdev->driver;
 	glob = bo->glob;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 1b0dcad916b0..44e0c5ed6418 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -66,7 +66,7 @@
 #include <linux/kref.h>
 #include <linux/interval_tree.h>
 #include <linux/hashtable.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
@@ -367,7 +367,7 @@ struct radeon_fence_driver {
 };
 
 struct radeon_fence {
-	struct fence		base;
+	struct dma_fence		base;
 
 	struct radeon_device	*rdev;
 	uint64_t		seq;
@@ -746,7 +746,7 @@ struct radeon_flip_work {
 	uint64_t			base;
 	struct drm_pending_vblank_event *event;
 	struct radeon_bo		*old_rbo;
-	struct fence			*fence;
+	struct dma_fence		*fence;
 	bool				async;
 };
 
@@ -2514,9 +2514,9 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v);
 /*
  * Cast helper
  */
-extern const struct fence_ops radeon_fence_ops;
+extern const struct dma_fence_ops radeon_fence_ops;
 
-static inline struct radeon_fence *to_radeon_fence(struct fence *f)
+static inline struct radeon_fence *to_radeon_fence(struct dma_fence *f)
 {
 	struct radeon_fence *__f = container_of(f, struct radeon_fence, base);
 
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index eb92aef46e3c..36b7ac7e57e5 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1320,7 +1320,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
 		rdev->ring[i].idx = i;
 	}
-	rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS);
+	rdev->fence_context = dma_fence_context_alloc(RADEON_NUM_RINGS);
 
 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
 		 radeon_family_name[rdev->family], pdev->vendor, pdev->device,
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index cdb8cb568c15..e7409e8a9f87 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -437,7 +437,7 @@ static void radeon_flip_work_func(struct work_struct *__work)
 				down_read(&rdev->exclusive_lock);
 			}
 		} else
-			r = fence_wait(work->fence, false);
+			r = dma_fence_wait(work->fence, false);
 
 		if (r)
 			DRM_ERROR("failed to wait on page flip fence (%d)!\n", r);
@@ -447,7 +447,7 @@ static void radeon_flip_work_func(struct work_struct *__work)
 		 * confused about which BO the CRTC is scanning out
 		 */
 
-		fence_put(work->fence);
+		dma_fence_put(work->fence);
 		work->fence = NULL;
 	}
 
@@ -542,7 +542,7 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc,
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
 		goto cleanup;
 	}
-	work->fence = fence_get(reservation_object_get_excl(new_rbo->tbo.resv));
+	work->fence = dma_fence_get(reservation_object_get_excl(new_rbo->tbo.resv));
 	radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL);
 	radeon_bo_unreserve(new_rbo);
 
@@ -617,7 +617,7 @@ pflip_cleanup:
 
 cleanup:
 	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
-	fence_put(work->fence);
+	dma_fence_put(work->fence);
 	kfree(work);
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 7ef075acde9c..ef09f0a63754 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -141,8 +141,10 @@ int radeon_fence_emit(struct radeon_device *rdev,
 	(*fence)->seq = seq = ++rdev->fence_drv[ring].sync_seq[ring];
 	(*fence)->ring = ring;
 	(*fence)->is_vm_update = false;
-	fence_init(&(*fence)->base, &radeon_fence_ops,
-		   &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
+	dma_fence_init(&(*fence)->base, &radeon_fence_ops,
+		       &rdev->fence_queue.lock,
+		       rdev->fence_context + ring,
+		       seq);
 	radeon_fence_ring_emit(rdev, ring, *fence);
 	trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
 	radeon_fence_schedule_check(rdev, ring);
@@ -169,18 +171,18 @@ static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int fl
 	 */
 	seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
 	if (seq >= fence->seq) {
-		int ret = fence_signal_locked(&fence->base);
+		int ret = dma_fence_signal_locked(&fence->base);
 
 		if (!ret)
-			FENCE_TRACE(&fence->base, "signaled from irq context\n");
+			DMA_FENCE_TRACE(&fence->base, "signaled from irq context\n");
 		else
-			FENCE_TRACE(&fence->base, "was already signaled\n");
+			DMA_FENCE_TRACE(&fence->base, "was already signaled\n");
 
 		radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
 		__remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
-		fence_put(&fence->base);
+		dma_fence_put(&fence->base);
 	} else
-		FENCE_TRACE(&fence->base, "pending\n");
+		DMA_FENCE_TRACE(&fence->base, "pending\n");
 	return 0;
 }
 
@@ -351,7 +353,7 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 	return false;
 }
 
-static bool radeon_fence_is_signaled(struct fence *f)
+static bool radeon_fence_is_signaled(struct dma_fence *f)
 {
 	struct radeon_fence *fence = to_radeon_fence(f);
 	struct radeon_device *rdev = fence->rdev;
@@ -381,7 +383,7 @@ static bool radeon_fence_is_signaled(struct fence *f)
  * to fence_queue that checks if this fence is signaled, and if so it
  * signals the fence and removes itself.
  */
-static bool radeon_fence_enable_signaling(struct fence *f)
+static bool radeon_fence_enable_signaling(struct dma_fence *f)
 {
 	struct radeon_fence *fence = to_radeon_fence(f);
 	struct radeon_device *rdev = fence->rdev;
@@ -414,9 +416,9 @@ static bool radeon_fence_enable_signaling(struct fence *f)
 	fence->fence_wake.private = NULL;
 	fence->fence_wake.func = radeon_fence_check_signaled;
 	__add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
-	fence_get(f);
+	dma_fence_get(f);
 
-	FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
+	DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
 	return true;
 }
 
@@ -436,9 +438,9 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 	if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
 		int ret;
 
-		ret = fence_signal(&fence->base);
+		ret = dma_fence_signal(&fence->base);
 		if (!ret)
-			FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
+			DMA_FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
 		return true;
 	}
 	return false;
@@ -552,7 +554,7 @@ long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeo
 	 * exclusive_lock is not held in that case.
 	 */
 	if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
-		return fence_wait(&fence->base, intr);
+		return dma_fence_wait(&fence->base, intr);
 
 	seq[fence->ring] = fence->seq;
 	r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout);
@@ -560,9 +562,9 @@ long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeo
 		return r;
 	}
 
-	r_sig = fence_signal(&fence->base);
+	r_sig = dma_fence_signal(&fence->base);
 	if (!r_sig)
-		FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
+		DMA_FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
 	return r;
 }
 
@@ -697,7 +699,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
  */
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 {
-	fence_get(&fence->base);
+	dma_fence_get(&fence->base);
 	return fence;
 }
 
@@ -714,7 +716,7 @@ void radeon_fence_unref(struct radeon_fence **fence)
 
 	*fence = NULL;
 	if (tmp) {
-		fence_put(&tmp->base);
+		dma_fence_put(&tmp->base);
 	}
 }
 
@@ -1028,12 +1030,12 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev)
 #endif
 }
 
-static const char *radeon_fence_get_driver_name(struct fence *fence)
+static const char *radeon_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "radeon";
 }
 
-static const char *radeon_fence_get_timeline_name(struct fence *f)
+static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
 {
 	struct radeon_fence *fence = to_radeon_fence(f);
 	switch (fence->ring) {
@@ -1051,16 +1053,16 @@ static const char *radeon_fence_get_timeline_name(struct fence *f)
 
 static inline bool radeon_test_signaled(struct radeon_fence *fence)
 {
-	return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
+	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
 }
 
 struct radeon_wait_cb {
-	struct fence_cb base;
+	struct dma_fence_cb base;
 	struct task_struct *task;
 };
 
 static void
-radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
+radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
 	struct radeon_wait_cb *wait =
 		container_of(cb, struct radeon_wait_cb, base);
@@ -1068,7 +1070,7 @@ radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
 	wake_up_process(wait->task);
 }
 
-static signed long radeon_fence_default_wait(struct fence *f, bool intr,
+static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
 					     signed long t)
 {
 	struct radeon_fence *fence = to_radeon_fence(f);
@@ -1077,7 +1079,7 @@ static signed long radeon_fence_default_wait(struct fence *f, bool intr,
 
 	cb.task = current;
 
-	if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
+	if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
 		return t;
 
 	while (t > 0) {
@@ -1105,12 +1107,12 @@ static signed long radeon_fence_default_wait(struct fence *f, bool intr,
 	}
 
 	__set_current_state(TASK_RUNNING);
-	fence_remove_callback(f, &cb.base);
+	dma_fence_remove_callback(f, &cb.base);
 
 	return t;
 }
 
-const struct fence_ops radeon_fence_ops = {
+const struct dma_fence_ops radeon_fence_ops = {
 	.get_driver_name = radeon_fence_get_driver_name,
 	.get_timeline_name = radeon_fence_get_timeline_name,
 	.enable_signaling = radeon_fence_enable_signaling,
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
index 02ac8a1de4ff..be5d7a38d3aa 100644
--- a/drivers/gpu/drm/radeon/radeon_sync.c
+++ b/drivers/gpu/drm/radeon/radeon_sync.c
@@ -92,7 +92,7 @@ int radeon_sync_resv(struct radeon_device *rdev,
 		     bool shared)
 {
 	struct reservation_object_list *flist;
-	struct fence *f;
+	struct dma_fence *f;
 	struct radeon_fence *fence;
 	unsigned i;
 	int r = 0;
@@ -103,7 +103,7 @@ int radeon_sync_resv(struct radeon_device *rdev,
 	if (fence && fence->rdev == rdev)
 		radeon_sync_fence(sync, fence);
 	else if (f)
-		r = fence_wait(f, true);
+		r = dma_fence_wait(f, true);
 
 	flist = reservation_object_get_list(resv);
 	if (shared || !flist || r)
@@ -116,7 +116,7 @@ int radeon_sync_resv(struct radeon_device *rdev,
 		if (fence && fence->rdev == rdev)
 			radeon_sync_fence(sync, fence);
 		else
-			r = fence_wait(f, true);
+			r = dma_fence_wait(f, true);
 
 		if (r)
 			break;
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 0cd0e7bdee55..d34d1cf33895 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -467,7 +467,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
 {
 	int32_t *msg, msg_type, handle;
 	unsigned img_size = 0;
-	struct fence *f;
+	struct dma_fence *f;
 	void *ptr;
 
 	int i, r;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index fc6217dfe401..915e0d1c316a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -148,7 +148,7 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	BUG_ON(!list_empty(&bo->ddestroy));
 	ttm_tt_destroy(bo->ttm);
 	atomic_dec(&bo->glob->bo_count);
-	fence_put(bo->moving);
+	dma_fence_put(bo->moving);
 	if (bo->resv == &bo->ttm_resv)
 		reservation_object_fini(&bo->ttm_resv);
 	mutex_destroy(&bo->wu_mutex);
@@ -426,20 +426,20 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
 {
 	struct reservation_object_list *fobj;
-	struct fence *fence;
+	struct dma_fence *fence;
 	int i;
 
 	fobj = reservation_object_get_list(bo->resv);
 	fence = reservation_object_get_excl(bo->resv);
 	if (fence && !fence->ops->signaled)
-		fence_enable_sw_signaling(fence);
+		dma_fence_enable_sw_signaling(fence);
 
 	for (i = 0; fobj && i < fobj->shared_count; ++i) {
 		fence = rcu_dereference_protected(fobj->shared[i],
 					reservation_object_held(bo->resv));
 
 		if (!fence->ops->signaled)
-			fence_enable_sw_signaling(fence);
+			dma_fence_enable_sw_signaling(fence);
 	}
 }
 
@@ -792,11 +792,11 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
 				 struct ttm_mem_type_manager *man,
 				 struct ttm_mem_reg *mem)
 {
-	struct fence *fence;
+	struct dma_fence *fence;
 	int ret;
 
 	spin_lock(&man->move_lock);
-	fence = fence_get(man->move);
+	fence = dma_fence_get(man->move);
 	spin_unlock(&man->move_lock);
 
 	if (fence) {
@@ -806,7 +806,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
 		if (unlikely(ret))
 			return ret;
 
-		fence_put(bo->moving);
+		dma_fence_put(bo->moving);
 		bo->moving = fence;
 	}
 
@@ -1286,7 +1286,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_bo_global *glob = bdev->glob;
-	struct fence *fence;
+	struct dma_fence *fence;
 	int ret;
 
 	/*
@@ -1309,12 +1309,12 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	spin_unlock(&glob->lru_lock);
 
 	spin_lock(&man->move_lock);
-	fence = fence_get(man->move);
+	fence = dma_fence_get(man->move);
 	spin_unlock(&man->move_lock);
 
 	if (fence) {
-		ret = fence_wait(fence, false);
-		fence_put(fence);
+		ret = dma_fence_wait(fence, false);
+		dma_fence_put(fence);
 		if (ret) {
 			if (allow_errors) {
 				return ret;
@@ -1343,7 +1343,7 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 		       mem_type);
 		return ret;
 	}
-	fence_put(man->move);
+	dma_fence_put(man->move);
 
 	man->use_type = false;
 	man->has_type = false;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index bf6e21655c57..d0459b392e5e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -644,7 +644,7 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
 EXPORT_SYMBOL(ttm_bo_kunmap);
 
 int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
-			      struct fence *fence,
+			      struct dma_fence *fence,
 			      bool evict,
 			      struct ttm_mem_reg *new_mem)
 {
@@ -674,8 +674,8 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		 * operation has completed.
 		 */
 
-		fence_put(bo->moving);
-		bo->moving = fence_get(fence);
+		dma_fence_put(bo->moving);
+		bo->moving = dma_fence_get(fence);
 
 		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
 		if (ret)
@@ -706,7 +706,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 EXPORT_SYMBOL(ttm_bo_move_accel_cleanup);
 
 int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
-			 struct fence *fence, bool evict,
+			 struct dma_fence *fence, bool evict,
 			 struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -730,8 +730,8 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
 		 * operation has completed.
 		 */
 
-		fence_put(bo->moving);
-		bo->moving = fence_get(fence);
+		dma_fence_put(bo->moving);
+		bo->moving = dma_fence_get(fence);
 
 		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
 		if (ret)
@@ -761,16 +761,16 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
 		 */
 
 		spin_lock(&from->move_lock);
-		if (!from->move || fence_is_later(fence, from->move)) {
-			fence_put(from->move);
-			from->move = fence_get(fence);
+		if (!from->move || dma_fence_is_later(fence, from->move)) {
+			dma_fence_put(from->move);
+			from->move = dma_fence_get(fence);
 		}
 		spin_unlock(&from->move_lock);
 
 		ttm_bo_free_old_node(bo);
 
-		fence_put(bo->moving);
-		bo->moving = fence_get(fence);
+		dma_fence_put(bo->moving);
+		bo->moving = dma_fence_get(fence);
 
 	} else {
 		/**
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index a6ed9d5e5167..4748aedc933a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -54,7 +54,7 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 	/*
 	 * Quick non-stalling check for idle.
 	 */
-	if (fence_is_signaled(bo->moving))
+	if (dma_fence_is_signaled(bo->moving))
 		goto out_clear;
 
 	/*
@@ -67,14 +67,14 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 			goto out_unlock;
 
 		up_read(&vma->vm_mm->mmap_sem);
-		(void) fence_wait(bo->moving, true);
+		(void) dma_fence_wait(bo->moving, true);
 		goto out_unlock;
 	}
 
 	/*
 	 * Ordinary wait.
 	 */
-	ret = fence_wait(bo->moving, true);
+	ret = dma_fence_wait(bo->moving, true);
 	if (unlikely(ret != 0)) {
 		ret = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
 			VM_FAULT_NOPAGE;
@@ -82,7 +82,7 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 	}
 
 out_clear:
-	fence_put(bo->moving);
+	dma_fence_put(bo->moving);
 	bo->moving = NULL;
 
 out_unlock:
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index a80717b35dc6..d35bc491e8de 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -179,7 +179,8 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 EXPORT_SYMBOL(ttm_eu_reserve_buffers);
 
 void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
-				 struct list_head *list, struct fence *fence)
+				 struct list_head *list,
+				 struct dma_fence *fence)
 {
 	struct ttm_validate_buffer *entry;
 	struct ttm_buffer_object *bo;
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
index 5c57c1ffa1f9..488909a21ed8 100644
--- a/drivers/gpu/drm/vgem/vgem_fence.c
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -28,56 +28,57 @@
 #define VGEM_FENCE_TIMEOUT (10*HZ)
 
 struct vgem_fence {
-	struct fence base;
+	struct dma_fence base;
 	struct spinlock lock;
 	struct timer_list timer;
 };
 
-static const char *vgem_fence_get_driver_name(struct fence *fence)
+static const char *vgem_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "vgem";
 }
 
-static const char *vgem_fence_get_timeline_name(struct fence *fence)
+static const char *vgem_fence_get_timeline_name(struct dma_fence *fence)
 {
 	return "unbound";
 }
 
-static bool vgem_fence_signaled(struct fence *fence)
+static bool vgem_fence_signaled(struct dma_fence *fence)
 {
 	return false;
 }
 
-static bool vgem_fence_enable_signaling(struct fence *fence)
+static bool vgem_fence_enable_signaling(struct dma_fence *fence)
 {
 	return true;
 }
 
-static void vgem_fence_release(struct fence *base)
+static void vgem_fence_release(struct dma_fence *base)
 {
 	struct vgem_fence *fence = container_of(base, typeof(*fence), base);
 
 	del_timer_sync(&fence->timer);
-	fence_free(&fence->base);
+	dma_fence_free(&fence->base);
 }
 
-static void vgem_fence_value_str(struct fence *fence, char *str, int size)
+static void vgem_fence_value_str(struct dma_fence *fence, char *str, int size)
 {
 	snprintf(str, size, "%u", fence->seqno);
 }
 
-static void vgem_fence_timeline_value_str(struct fence *fence, char *str,
+static void vgem_fence_timeline_value_str(struct dma_fence *fence, char *str,
 					  int size)
 {
-	snprintf(str, size, "%u", fence_is_signaled(fence) ? fence->seqno : 0);
+	snprintf(str, size, "%u",
+		 dma_fence_is_signaled(fence) ? fence->seqno : 0);
 }
 
-static const struct fence_ops vgem_fence_ops = {
+static const struct dma_fence_ops vgem_fence_ops = {
 	.get_driver_name = vgem_fence_get_driver_name,
 	.get_timeline_name = vgem_fence_get_timeline_name,
 	.enable_signaling = vgem_fence_enable_signaling,
 	.signaled = vgem_fence_signaled,
-	.wait = fence_default_wait,
+	.wait = dma_fence_default_wait,
 	.release = vgem_fence_release,
 
 	.fence_value_str = vgem_fence_value_str,
@@ -88,11 +89,11 @@ static void vgem_fence_timeout(unsigned long data)
 {
 	struct vgem_fence *fence = (struct vgem_fence *)data;
 
-	fence_signal(&fence->base);
+	dma_fence_signal(&fence->base);
 }
 
-static struct fence *vgem_fence_create(struct vgem_file *vfile,
-				       unsigned int flags)
+static struct dma_fence *vgem_fence_create(struct vgem_file *vfile,
+					   unsigned int flags)
 {
 	struct vgem_fence *fence;
 
@@ -101,8 +102,8 @@ static struct fence *vgem_fence_create(struct vgem_file *vfile,
 		return NULL;
 
 	spin_lock_init(&fence->lock);
-	fence_init(&fence->base, &vgem_fence_ops, &fence->lock,
-		   fence_context_alloc(1), 1);
+	dma_fence_init(&fence->base, &vgem_fence_ops, &fence->lock,
+		       dma_fence_context_alloc(1), 1);
 
 	setup_timer(&fence->timer, vgem_fence_timeout, (unsigned long)fence);
 
@@ -157,7 +158,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
 	struct vgem_file *vfile = file->driver_priv;
 	struct reservation_object *resv;
 	struct drm_gem_object *obj;
-	struct fence *fence;
+	struct dma_fence *fence;
 	int ret;
 
 	if (arg->flags & ~VGEM_FENCE_WRITE)
@@ -209,8 +210,8 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
 	}
 err_fence:
 	if (ret) {
-		fence_signal(fence);
-		fence_put(fence);
+		dma_fence_signal(fence);
+		dma_fence_put(fence);
 	}
 err:
 	drm_gem_object_unreference_unlocked(obj);
@@ -239,7 +240,7 @@ int vgem_fence_signal_ioctl(struct drm_device *dev,
 {
 	struct vgem_file *vfile = file->driver_priv;
 	struct drm_vgem_fence_signal *arg = data;
-	struct fence *fence;
+	struct dma_fence *fence;
 	int ret = 0;
 
 	if (arg->flags)
@@ -253,11 +254,11 @@ int vgem_fence_signal_ioctl(struct drm_device *dev,
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
 
-	if (fence_is_signaled(fence))
+	if (dma_fence_is_signaled(fence))
 		ret = -ETIMEDOUT;
 
-	fence_signal(fence);
-	fence_put(fence);
+	dma_fence_signal(fence);
+	dma_fence_put(fence);
 	return ret;
 }
 
@@ -271,8 +272,8 @@ int vgem_fence_open(struct vgem_file *vfile)
 
 static int __vgem_fence_idr_fini(int id, void *p, void *data)
 {
-	fence_signal(p);
-	fence_put(p);
+	dma_fence_signal(p);
+	dma_fence_put(p);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index ae59080d63d1..ec1ebdcfe80b 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -82,7 +82,7 @@ struct virtio_gpu_fence_driver {
 };
 
 struct virtio_gpu_fence {
-	struct fence f;
+	struct dma_fence f;
 	struct virtio_gpu_fence_driver *drv;
 	struct list_head node;
 	uint64_t seq;
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c
index f3f70fa8a4c7..23353521f903 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fence.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fence.c
@@ -26,22 +26,22 @@
 #include <drm/drmP.h>
 #include "virtgpu_drv.h"
 
-static const char *virtio_get_driver_name(struct fence *f)
+static const char *virtio_get_driver_name(struct dma_fence *f)
 {
 	return "virtio_gpu";
 }
 
-static const char *virtio_get_timeline_name(struct fence *f)
+static const char *virtio_get_timeline_name(struct dma_fence *f)
 {
 	return "controlq";
 }
 
-static bool virtio_enable_signaling(struct fence *f)
+static bool virtio_enable_signaling(struct dma_fence *f)
 {
 	return true;
 }
 
-static bool virtio_signaled(struct fence *f)
+static bool virtio_signaled(struct dma_fence *f)
 {
 	struct virtio_gpu_fence *fence = to_virtio_fence(f);
 
@@ -50,26 +50,26 @@ static bool virtio_signaled(struct fence *f)
 	return false;
 }
 
-static void virtio_fence_value_str(struct fence *f, char *str, int size)
+static void virtio_fence_value_str(struct dma_fence *f, char *str, int size)
 {
 	struct virtio_gpu_fence *fence = to_virtio_fence(f);
 
 	snprintf(str, size, "%llu", fence->seq);
 }
 
-static void virtio_timeline_value_str(struct fence *f, char *str, int size)
+static void virtio_timeline_value_str(struct dma_fence *f, char *str, int size)
 {
 	struct virtio_gpu_fence *fence = to_virtio_fence(f);
 
 	snprintf(str, size, "%llu", (u64)atomic64_read(&fence->drv->last_seq));
 }
 
-static const struct fence_ops virtio_fence_ops = {
+static const struct dma_fence_ops virtio_fence_ops = {
 	.get_driver_name     = virtio_get_driver_name,
 	.get_timeline_name   = virtio_get_timeline_name,
 	.enable_signaling    = virtio_enable_signaling,
 	.signaled            = virtio_signaled,
-	.wait                = fence_default_wait,
+	.wait                = dma_fence_default_wait,
 	.fence_value_str     = virtio_fence_value_str,
 	.timeline_value_str  = virtio_timeline_value_str,
 };
@@ -88,9 +88,9 @@ int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 	spin_lock_irqsave(&drv->lock, irq_flags);
 	(*fence)->drv = drv;
 	(*fence)->seq = ++drv->sync_seq;
-	fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock,
-		   drv->context, (*fence)->seq);
-	fence_get(&(*fence)->f);
+	dma_fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock,
+		       drv->context, (*fence)->seq);
+	dma_fence_get(&(*fence)->f);
 	list_add_tail(&(*fence)->node, &drv->fences);
 	spin_unlock_irqrestore(&drv->lock, irq_flags);
 
@@ -111,9 +111,9 @@ void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
 	list_for_each_entry_safe(fence, tmp, &drv->fences, node) {
 		if (last_seq < fence->seq)
 			continue;
-		fence_signal_locked(&fence->f);
+		dma_fence_signal_locked(&fence->f);
 		list_del(&fence->node);
-		fence_put(&fence->f);
+		dma_fence_put(&fence->f);
 	}
 	spin_unlock_irqrestore(&drv->lock, irq_flags);
 }
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 818478b4c4f0..61f3a963af95 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -172,7 +172,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	/* fence the command bo */
 	virtio_gpu_unref_list(&validate_list);
 	drm_free_large(buflist);
-	fence_put(&fence->f);
+	dma_fence_put(&fence->f);
 	return 0;
 
 out_unresv:
@@ -298,7 +298,7 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data,
 		drm_gem_object_release(obj);
 		if (vgdev->has_virgl_3d) {
 			virtio_gpu_unref_list(&validate_list);
-			fence_put(&fence->f);
+			dma_fence_put(&fence->f);
 		}
 		return ret;
 	}
@@ -309,13 +309,13 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data,
 
 	if (vgdev->has_virgl_3d) {
 		virtio_gpu_unref_list(&validate_list);
-		fence_put(&fence->f);
+		dma_fence_put(&fence->f);
 	}
 	return 0;
 fail_unref:
 	if (vgdev->has_virgl_3d) {
 		virtio_gpu_unref_list(&validate_list);
-		fence_put(&fence->f);
+		dma_fence_put(&fence->f);
 	}
 //fail_obj:
 //	drm_gem_object_handle_unreference_unlocked(obj);
@@ -383,7 +383,7 @@ static int virtio_gpu_transfer_from_host_ioctl(struct drm_device *dev,
 	reservation_object_add_excl_fence(qobj->tbo.resv,
 					  &fence->f);
 
-	fence_put(&fence->f);
+	dma_fence_put(&fence->f);
 out_unres:
 	virtio_gpu_object_unreserve(qobj);
 out:
@@ -431,7 +431,7 @@ static int virtio_gpu_transfer_to_host_ioctl(struct drm_device *dev, void *data,
 			 args->level, &box, &fence);
 		reservation_object_add_excl_fence(qobj->tbo.resv,
 						  &fence->f);
-		fence_put(&fence->f);
+		dma_fence_put(&fence->f);
 	}
 
 out_unres:
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 036b0fbae0fb..1235519853f4 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -159,7 +159,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
 	virtio_gpu_init_vq(&vgdev->ctrlq, virtio_gpu_dequeue_ctrl_func);
 	virtio_gpu_init_vq(&vgdev->cursorq, virtio_gpu_dequeue_cursor_func);
 
-	vgdev->fence_drv.context = fence_context_alloc(1);
+	vgdev->fence_drv.context = dma_fence_context_alloc(1);
 	spin_lock_init(&vgdev->fence_drv.lock);
 	INIT_LIST_HEAD(&vgdev->fence_drv.fences);
 	INIT_LIST_HEAD(&vgdev->cap_cache);
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index ba28c0f6f28a..cb75f0663ba0 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -152,7 +152,7 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane,
 		if (!ret) {
 			reservation_object_add_excl_fence(bo->tbo.resv,
 							  &fence->f);
-			fence_put(&fence->f);
+			dma_fence_put(&fence->f);
 			fence = NULL;
 			virtio_gpu_object_unreserve(bo);
 			virtio_gpu_object_wait(bo, false);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 26ac8e80a478..6541dd8b82dc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -108,7 +108,7 @@ fman_from_fence(struct vmw_fence_obj *fence)
  * objects with actions attached to them.
  */
 
-static void vmw_fence_obj_destroy(struct fence *f)
+static void vmw_fence_obj_destroy(struct dma_fence *f)
 {
 	struct vmw_fence_obj *fence =
 		container_of(f, struct vmw_fence_obj, base);
@@ -123,17 +123,17 @@ static void vmw_fence_obj_destroy(struct fence *f)
 	fence->destroy(fence);
 }
 
-static const char *vmw_fence_get_driver_name(struct fence *f)
+static const char *vmw_fence_get_driver_name(struct dma_fence *f)
 {
 	return "vmwgfx";
 }
 
-static const char *vmw_fence_get_timeline_name(struct fence *f)
+static const char *vmw_fence_get_timeline_name(struct dma_fence *f)
 {
 	return "svga";
 }
 
-static bool vmw_fence_enable_signaling(struct fence *f)
+static bool vmw_fence_enable_signaling(struct dma_fence *f)
 {
 	struct vmw_fence_obj *fence =
 		container_of(f, struct vmw_fence_obj, base);
@@ -152,12 +152,12 @@ static bool vmw_fence_enable_signaling(struct fence *f)
 }
 
 struct vmwgfx_wait_cb {
-	struct fence_cb base;
+	struct dma_fence_cb base;
 	struct task_struct *task;
 };
 
 static void
-vmwgfx_wait_cb(struct fence *fence, struct fence_cb *cb)
+vmwgfx_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
 	struct vmwgfx_wait_cb *wait =
 		container_of(cb, struct vmwgfx_wait_cb, base);
@@ -167,7 +167,7 @@ vmwgfx_wait_cb(struct fence *fence, struct fence_cb *cb)
 
 static void __vmw_fences_update(struct vmw_fence_manager *fman);
 
-static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
+static long vmw_fence_wait(struct dma_fence *f, bool intr, signed long timeout)
 {
 	struct vmw_fence_obj *fence =
 		container_of(f, struct vmw_fence_obj, base);
@@ -197,7 +197,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
 
 	while (ret > 0) {
 		__vmw_fences_update(fman);
-		if (test_bit(FENCE_FLAG_SIGNALED_BIT, &f->flags))
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->flags))
 			break;
 
 		if (intr)
@@ -225,7 +225,7 @@ out:
 	return ret;
 }
 
-static struct fence_ops vmw_fence_ops = {
+static struct dma_fence_ops vmw_fence_ops = {
 	.get_driver_name = vmw_fence_get_driver_name,
 	.get_timeline_name = vmw_fence_get_timeline_name,
 	.enable_signaling = vmw_fence_enable_signaling,
@@ -298,7 +298,7 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv)
 	fman->event_fence_action_size =
 		ttm_round_pot(sizeof(struct vmw_event_fence_action));
 	mutex_init(&fman->goal_irq_mutex);
-	fman->ctx = fence_context_alloc(1);
+	fman->ctx = dma_fence_context_alloc(1);
 
 	return fman;
 }
@@ -326,8 +326,8 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
 	unsigned long irq_flags;
 	int ret = 0;
 
-	fence_init(&fence->base, &vmw_fence_ops, &fman->lock,
-		   fman->ctx, seqno);
+	dma_fence_init(&fence->base, &vmw_fence_ops, &fman->lock,
+		       fman->ctx, seqno);
 	INIT_LIST_HEAD(&fence->seq_passed_actions);
 	fence->destroy = destroy;
 
@@ -431,7 +431,7 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
 	u32 goal_seqno;
 	u32 *fifo_mem;
 
-	if (fence_is_signaled_locked(&fence->base))
+	if (dma_fence_is_signaled_locked(&fence->base))
 		return false;
 
 	fifo_mem = fman->dev_priv->mmio_virt;
@@ -459,7 +459,7 @@ rerun:
 	list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
 		if (seqno - fence->base.seqno < VMW_FENCE_WRAP) {
 			list_del_init(&fence->head);
-			fence_signal_locked(&fence->base);
+			dma_fence_signal_locked(&fence->base);
 			INIT_LIST_HEAD(&action_list);
 			list_splice_init(&fence->seq_passed_actions,
 					 &action_list);
@@ -500,18 +500,18 @@ bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence)
 {
 	struct vmw_fence_manager *fman = fman_from_fence(fence);
 
-	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
 		return 1;
 
 	vmw_fences_update(fman);
 
-	return fence_is_signaled(&fence->base);
+	return dma_fence_is_signaled(&fence->base);
 }
 
 int vmw_fence_obj_wait(struct vmw_fence_obj *fence, bool lazy,
 		       bool interruptible, unsigned long timeout)
 {
-	long ret = fence_wait_timeout(&fence->base, interruptible, timeout);
+	long ret = dma_fence_wait_timeout(&fence->base, interruptible, timeout);
 
 	if (likely(ret > 0))
 		return 0;
@@ -530,7 +530,7 @@ void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
 
 static void vmw_fence_destroy(struct vmw_fence_obj *fence)
 {
-	fence_free(&fence->base);
+	dma_fence_free(&fence->base);
 }
 
 int vmw_fence_create(struct vmw_fence_manager *fman,
@@ -669,7 +669,7 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
 		struct vmw_fence_obj *fence =
 			list_entry(fman->fence_list.prev, struct vmw_fence_obj,
 				   head);
-		fence_get(&fence->base);
+		dma_fence_get(&fence->base);
 		spin_unlock_irq(&fman->lock);
 
 		ret = vmw_fence_obj_wait(fence, false, false,
@@ -677,7 +677,7 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
 
 		if (unlikely(ret != 0)) {
 			list_del_init(&fence->head);
-			fence_signal(&fence->base);
+			dma_fence_signal(&fence->base);
 			INIT_LIST_HEAD(&action_list);
 			list_splice_init(&fence->seq_passed_actions,
 					 &action_list);
@@ -685,7 +685,7 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
 		}
 
 		BUG_ON(!list_empty(&fence->head));
-		fence_put(&fence->base);
+		dma_fence_put(&fence->base);
 		spin_lock_irq(&fman->lock);
 	}
 	spin_unlock_irq(&fman->lock);
@@ -884,7 +884,7 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
 	spin_lock_irqsave(&fman->lock, irq_flags);
 
 	fman->pending_actions[action->type]++;
-	if (fence_is_signaled_locked(&fence->base)) {
+	if (dma_fence_is_signaled_locked(&fence->base)) {
 		struct list_head action_list;
 
 		INIT_LIST_HEAD(&action_list);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
index 83ae301ee141..d9d85aa6ed20 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -27,7 +27,7 @@
 
 #ifndef _VMWGFX_FENCE_H_
 
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 
 #define VMW_FENCE_WAIT_TIMEOUT (5*HZ)
 
@@ -52,7 +52,7 @@ struct vmw_fence_action {
 };
 
 struct vmw_fence_obj {
-	struct fence base;
+	struct dma_fence base;
 
 	struct list_head head;
 	struct list_head seq_passed_actions;
@@ -71,14 +71,14 @@ vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
 
 	*fence_p = NULL;
 	if (fence)
-		fence_put(&fence->base);
+		dma_fence_put(&fence->base);
 }
 
 static inline struct vmw_fence_obj *
 vmw_fence_obj_reference(struct vmw_fence_obj *fence)
 {
 	if (fence)
-		fence_get(&fence->base);
+		dma_fence_get(&fence->base);
 	return fence;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 1a85fb2d4dc6..8e86d6d4141b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -1454,7 +1454,7 @@ void vmw_fence_single_bo(struct ttm_buffer_object *bo,
 	if (fence == NULL) {
 		vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
 		reservation_object_add_excl_fence(bo->resv, &fence->base);
-		fence_put(&fence->base);
+		dma_fence_put(&fence->base);
 	} else
 		reservation_object_add_excl_fence(bo->resv, &fence->base);
 }
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 672644031bd5..e336e3901876 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -57,7 +57,7 @@
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 
 #include <asm/mman.h>
 #include <asm/pgalloc.h>
@@ -362,7 +362,7 @@ struct drm_ioctl_desc {
 struct drm_pending_event {
 	struct completion *completion;
 	struct drm_event *event;
-	struct fence *fence;
+	struct dma_fence *fence;
 	struct list_head link;
 	struct list_head pending_link;
 	struct drm_file *file_priv;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index bc860cfc67ca..fa1aa214c8ea 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -54,7 +54,7 @@ struct drm_mode_set;
 struct drm_file;
 struct drm_clip_rect;
 struct device_node;
-struct fence;
+struct dma_fence;
 struct edid;
 
 static inline int64_t U642I64(uint64_t val)
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 98b39d66eb32..c5e8a0df1623 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -59,7 +59,7 @@ struct drm_plane_state {
 
 	struct drm_crtc *crtc;   /* do not write directly, use drm_atomic_set_crtc_for_plane() */
 	struct drm_framebuffer *fb;  /* do not write directly, use drm_atomic_set_fb_for_plane() */
-	struct fence *fence;
+	struct dma_fence *fence;
 
 	/* Signed dest location allows it to be partially off screen */
 	int32_t crtc_x, crtc_y;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 9eb940d6755f..5beae7969bf7 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -209,7 +209,7 @@ struct ttm_buffer_object {
 	 * Members protected by a bo reservation.
 	 */
 
-	struct fence *moving;
+	struct dma_fence *moving;
 
 	struct drm_vma_offset_node vma_node;
 
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 4f0a92185995..27e9c26c9150 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -303,7 +303,7 @@ struct ttm_mem_type_manager {
 	/*
 	 * Protected by @move_lock.
 	 */
-	struct fence *move;
+	struct dma_fence *move;
 };
 
 /**
@@ -1025,7 +1025,7 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object *bo);
  */
 
 extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
-				     struct fence *fence, bool evict,
+				     struct dma_fence *fence, bool evict,
 				     struct ttm_mem_reg *new_mem);
 
 /**
@@ -1040,7 +1040,7 @@ extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
  * immediately or hang it on a temporary buffer object.
  */
 int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
-			 struct fence *fence, bool evict,
+			 struct dma_fence *fence, bool evict,
 			 struct ttm_mem_reg *new_mem);
 
 /**
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index b620c317c772..47f35b8e6d09 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -114,6 +114,6 @@ extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 
 extern void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 					struct list_head *list,
-					struct fence *fence);
+					struct dma_fence *fence);
 
 #endif
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index e0b0741ae671..8daeb3ce0016 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -30,7 +30,7 @@
 #include <linux/list.h>
 #include <linux/dma-mapping.h>
 #include <linux/fs.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <linux/wait.h>
 
 struct device;
@@ -143,7 +143,7 @@ struct dma_buf {
 	wait_queue_head_t poll;
 
 	struct dma_buf_poll_cb_t {
-		struct fence_cb cb;
+		struct dma_fence_cb cb;
 		wait_queue_head_t *poll;
 
 		unsigned long active;
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
new file mode 100644
index 000000000000..5900945f962d
--- /dev/null
+++ b/include/linux/dma-fence-array.h
@@ -0,0 +1,86 @@
+/*
+ * fence-array: aggregates fence to be waited together
+ *
+ * Copyright (C) 2016 Collabora Ltd
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Authors:
+ *	Gustavo Padovan <gustavo@padovan.org>
+ *	Christian König <christian.koenig@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_DMA_FENCE_ARRAY_H
+#define __LINUX_DMA_FENCE_ARRAY_H
+
+#include <linux/dma-fence.h>
+
+/**
+ * struct dma_fence_array_cb - callback helper for fence array
+ * @cb: fence callback structure for signaling
+ * @array: reference to the parent fence array object
+ */
+struct dma_fence_array_cb {
+	struct dma_fence_cb cb;
+	struct dma_fence_array *array;
+};
+
+/**
+ * struct dma_fence_array - fence to represent an array of fences
+ * @base: fence base class
+ * @lock: spinlock for fence handling
+ * @num_fences: number of fences in the array
+ * @num_pending: fences in the array still pending
+ * @fences: array of the fences
+ */
+struct dma_fence_array {
+	struct dma_fence base;
+
+	spinlock_t lock;
+	unsigned num_fences;
+	atomic_t num_pending;
+	struct dma_fence **fences;
+};
+
+extern const struct dma_fence_ops dma_fence_array_ops;
+
+/**
+ * dma_fence_is_array - check if a fence is from the array subsclass
+ * @fence: fence to test
+ *
+ * Return true if it is a dma_fence_array and false otherwise.
+ */
+static inline bool dma_fence_is_array(struct dma_fence *fence)
+{
+	return fence->ops == &dma_fence_array_ops;
+}
+
+/**
+ * to_dma_fence_array - cast a fence to a dma_fence_array
+ * @fence: fence to cast to a dma_fence_array
+ *
+ * Returns NULL if the fence is not a dma_fence_array,
+ * or the dma_fence_array otherwise.
+ */
+static inline struct dma_fence_array *
+to_dma_fence_array(struct dma_fence *fence)
+{
+	if (fence->ops != &dma_fence_array_ops)
+		return NULL;
+
+	return container_of(fence, struct dma_fence_array, base);
+}
+
+struct dma_fence_array *dma_fence_array_create(int num_fences,
+					       struct dma_fence **fences,
+					       u64 context, unsigned seqno,
+					       bool signal_on_any);
+
+#endif /* __LINUX_DMA_FENCE_ARRAY_H */
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
new file mode 100644
index 000000000000..ba60c043a5d3
--- /dev/null
+++ b/include/linux/dma-fence.h
@@ -0,0 +1,437 @@
+/*
+ * Fence mechanism for dma-buf to allow for asynchronous dma access
+ *
+ * Copyright (C) 2012 Canonical Ltd
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Authors:
+ * Rob Clark <robdclark@gmail.com>
+ * Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_DMA_FENCE_H
+#define __LINUX_DMA_FENCE_H
+
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/bitops.h>
+#include <linux/kref.h>
+#include <linux/sched.h>
+#include <linux/printk.h>
+#include <linux/rcupdate.h>
+
+struct dma_fence;
+struct dma_fence_ops;
+struct dma_fence_cb;
+
+/**
+ * struct dma_fence - software synchronization primitive
+ * @refcount: refcount for this fence
+ * @ops: dma_fence_ops associated with this fence
+ * @rcu: used for releasing fence with kfree_rcu
+ * @cb_list: list of all callbacks to call
+ * @lock: spin_lock_irqsave used for locking
+ * @context: execution context this fence belongs to, returned by
+ *           dma_fence_context_alloc()
+ * @seqno: the sequence number of this fence inside the execution context,
+ * can be compared to decide which fence would be signaled later.
+ * @flags: A mask of DMA_FENCE_FLAG_* defined below
+ * @timestamp: Timestamp when the fence was signaled.
+ * @status: Optional, only valid if < 0, must be set before calling
+ * dma_fence_signal, indicates that the fence has completed with an error.
+ *
+ * the flags member must be manipulated and read using the appropriate
+ * atomic ops (bit_*), so taking the spinlock will not be needed most
+ * of the time.
+ *
+ * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
+ * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
+ * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
+ * implementer of the fence for its own purposes. Can be used in different
+ * ways by different fence implementers, so do not rely on this.
+ *
+ * Since atomic bitops are used, this is not guaranteed to be the case.
+ * Particularly, if the bit was set, but dma_fence_signal was called right
+ * before this bit was set, it would have been able to set the
+ * DMA_FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called.
+ * Adding a check for DMA_FENCE_FLAG_SIGNALED_BIT after setting
+ * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that
+ * after dma_fence_signal was called, any enable_signaling call will have either
+ * been completed, or never called at all.
+ */
+struct dma_fence {
+	struct kref refcount;
+	const struct dma_fence_ops *ops;
+	struct rcu_head rcu;
+	struct list_head cb_list;
+	spinlock_t *lock;
+	u64 context;
+	unsigned seqno;
+	unsigned long flags;
+	ktime_t timestamp;
+	int status;
+};
+
+enum dma_fence_flag_bits {
+	DMA_FENCE_FLAG_SIGNALED_BIT,
+	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
+};
+
+typedef void (*dma_fence_func_t)(struct dma_fence *fence,
+				 struct dma_fence_cb *cb);
+
+/**
+ * struct dma_fence_cb - callback for dma_fence_add_callback
+ * @node: used by dma_fence_add_callback to append this struct to fence::cb_list
+ * @func: dma_fence_func_t to call
+ *
+ * This struct will be initialized by dma_fence_add_callback, additional
+ * data can be passed along by embedding dma_fence_cb in another struct.
+ */
+struct dma_fence_cb {
+	struct list_head node;
+	dma_fence_func_t func;
+};
+
+/**
+ * struct dma_fence_ops - operations implemented for fence
+ * @get_driver_name: returns the driver name.
+ * @get_timeline_name: return the name of the context this fence belongs to.
+ * @enable_signaling: enable software signaling of fence.
+ * @signaled: [optional] peek whether the fence is signaled, can be null.
+ * @wait: custom wait implementation, or dma_fence_default_wait.
+ * @release: [optional] called on destruction of fence, can be null
+ * @fill_driver_data: [optional] callback to fill in free-form debug info
+ * Returns amount of bytes filled, or -errno.
+ * @fence_value_str: [optional] fills in the value of the fence as a string
+ * @timeline_value_str: [optional] fills in the current value of the timeline
+ * as a string
+ *
+ * Notes on enable_signaling:
+ * For fence implementations that have the capability for hw->hw
+ * signaling, they can implement this op to enable the necessary
+ * irqs, or insert commands into cmdstream, etc.  This is called
+ * in the first wait() or add_callback() path to let the fence
+ * implementation know that there is another driver waiting on
+ * the signal (ie. hw->sw case).
+ *
+ * This function can be called called from atomic context, but not
+ * from irq context, so normal spinlocks can be used.
+ *
+ * A return value of false indicates the fence already passed,
+ * or some failure occurred that made it impossible to enable
+ * signaling. True indicates successful enabling.
+ *
+ * fence->status may be set in enable_signaling, but only when false is
+ * returned.
+ *
+ * Calling dma_fence_signal before enable_signaling is called allows
+ * for a tiny race window in which enable_signaling is called during,
+ * before, or after dma_fence_signal. To fight this, it is recommended
+ * that before enable_signaling returns true an extra reference is
+ * taken on the fence, to be released when the fence is signaled.
+ * This will mean dma_fence_signal will still be called twice, but
+ * the second time will be a noop since it was already signaled.
+ *
+ * Notes on signaled:
+ * May set fence->status if returning true.
+ *
+ * Notes on wait:
+ * Must not be NULL, set to dma_fence_default_wait for default implementation.
+ * the dma_fence_default_wait implementation should work for any fence, as long
+ * as enable_signaling works correctly.
+ *
+ * Must return -ERESTARTSYS if the wait is intr = true and the wait was
+ * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
+ * timed out. Can also return other error values on custom implementations,
+ * which should be treated as if the fence is signaled. For example a hardware
+ * lockup could be reported like that.
+ *
+ * Notes on release:
+ * Can be NULL, this function allows additional commands to run on
+ * destruction of the fence. Can be called from irq context.
+ * If pointer is set to NULL, kfree will get called instead.
+ */
+
+struct dma_fence_ops {
+	const char * (*get_driver_name)(struct dma_fence *fence);
+	const char * (*get_timeline_name)(struct dma_fence *fence);
+	bool (*enable_signaling)(struct dma_fence *fence);
+	bool (*signaled)(struct dma_fence *fence);
+	signed long (*wait)(struct dma_fence *fence,
+			    bool intr, signed long timeout);
+	void (*release)(struct dma_fence *fence);
+
+	int (*fill_driver_data)(struct dma_fence *fence, void *data, int size);
+	void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
+	void (*timeline_value_str)(struct dma_fence *fence,
+				   char *str, int size);
+};
+
+void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
+		    spinlock_t *lock, u64 context, unsigned seqno);
+
+void dma_fence_release(struct kref *kref);
+void dma_fence_free(struct dma_fence *fence);
+
+/**
+ * dma_fence_put - decreases refcount of the fence
+ * @fence:	[in]	fence to reduce refcount of
+ */
+static inline void dma_fence_put(struct dma_fence *fence)
+{
+	if (fence)
+		kref_put(&fence->refcount, dma_fence_release);
+}
+
+/**
+ * dma_fence_get - increases refcount of the fence
+ * @fence:	[in]	fence to increase refcount of
+ *
+ * Returns the same fence, with refcount increased by 1.
+ */
+static inline struct dma_fence *dma_fence_get(struct dma_fence *fence)
+{
+	if (fence)
+		kref_get(&fence->refcount);
+	return fence;
+}
+
+/**
+ * dma_fence_get_rcu - get a fence from a reservation_object_list with
+ *                     rcu read lock
+ * @fence:	[in]	fence to increase refcount of
+ *
+ * Function returns NULL if no refcount could be obtained, or the fence.
+ */
+static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence)
+{
+	if (kref_get_unless_zero(&fence->refcount))
+		return fence;
+	else
+		return NULL;
+}
+
+/**
+ * dma_fence_get_rcu_safe  - acquire a reference to an RCU tracked fence
+ * @fence:	[in]	pointer to fence to increase refcount of
+ *
+ * Function returns NULL if no refcount could be obtained, or the fence.
+ * This function handles acquiring a reference to a fence that may be
+ * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU),
+ * so long as the caller is using RCU on the pointer to the fence.
+ *
+ * An alternative mechanism is to employ a seqlock to protect a bunch of
+ * fences, such as used by struct reservation_object. When using a seqlock,
+ * the seqlock must be taken before and checked after a reference to the
+ * fence is acquired (as shown here).
+ *
+ * The caller is required to hold the RCU read lock.
+ */
+static inline struct dma_fence *
+dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep)
+{
+	do {
+		struct dma_fence *fence;
+
+		fence = rcu_dereference(*fencep);
+		if (!fence || !dma_fence_get_rcu(fence))
+			return NULL;
+
+		/* The atomic_inc_not_zero() inside dma_fence_get_rcu()
+		 * provides a full memory barrier upon success (such as now).
+		 * This is paired with the write barrier from assigning
+		 * to the __rcu protected fence pointer so that if that
+		 * pointer still matches the current fence, we know we
+		 * have successfully acquire a reference to it. If it no
+		 * longer matches, we are holding a reference to some other
+		 * reallocated pointer. This is possible if the allocator
+		 * is using a freelist like SLAB_DESTROY_BY_RCU where the
+		 * fence remains valid for the RCU grace period, but it
+		 * may be reallocated. When using such allocators, we are
+		 * responsible for ensuring the reference we get is to
+		 * the right fence, as below.
+		 */
+		if (fence == rcu_access_pointer(*fencep))
+			return rcu_pointer_handoff(fence);
+
+		dma_fence_put(fence);
+	} while (1);
+}
+
+int dma_fence_signal(struct dma_fence *fence);
+int dma_fence_signal_locked(struct dma_fence *fence);
+signed long dma_fence_default_wait(struct dma_fence *fence,
+				   bool intr, signed long timeout);
+int dma_fence_add_callback(struct dma_fence *fence,
+			   struct dma_fence_cb *cb,
+			   dma_fence_func_t func);
+bool dma_fence_remove_callback(struct dma_fence *fence,
+			       struct dma_fence_cb *cb);
+void dma_fence_enable_sw_signaling(struct dma_fence *fence);
+
+/**
+ * dma_fence_is_signaled_locked - Return an indication if the fence
+ *                                is signaled yet.
+ * @fence:	[in]	the fence to check
+ *
+ * Returns true if the fence was already signaled, false if not. Since this
+ * function doesn't enable signaling, it is not guaranteed to ever return
+ * true if dma_fence_add_callback, dma_fence_wait or
+ * dma_fence_enable_sw_signaling haven't been called before.
+ *
+ * This function requires fence->lock to be held.
+ */
+static inline bool
+dma_fence_is_signaled_locked(struct dma_fence *fence)
+{
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return true;
+
+	if (fence->ops->signaled && fence->ops->signaled(fence)) {
+		dma_fence_signal_locked(fence);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * dma_fence_is_signaled - Return an indication if the fence is signaled yet.
+ * @fence:	[in]	the fence to check
+ *
+ * Returns true if the fence was already signaled, false if not. Since this
+ * function doesn't enable signaling, it is not guaranteed to ever return
+ * true if dma_fence_add_callback, dma_fence_wait or
+ * dma_fence_enable_sw_signaling haven't been called before.
+ *
+ * It's recommended for seqno fences to call dma_fence_signal when the
+ * operation is complete, it makes it possible to prevent issues from
+ * wraparound between time of issue and time of use by checking the return
+ * value of this function before calling hardware-specific wait instructions.
+ */
+static inline bool
+dma_fence_is_signaled(struct dma_fence *fence)
+{
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return true;
+
+	if (fence->ops->signaled && fence->ops->signaled(fence)) {
+		dma_fence_signal(fence);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * dma_fence_is_later - return if f1 is chronologically later than f2
+ * @f1:	[in]	the first fence from the same context
+ * @f2:	[in]	the second fence from the same context
+ *
+ * Returns true if f1 is chronologically later than f2. Both fences must be
+ * from the same context, since a seqno is not re-used across contexts.
+ */
+static inline bool dma_fence_is_later(struct dma_fence *f1,
+				      struct dma_fence *f2)
+{
+	if (WARN_ON(f1->context != f2->context))
+		return false;
+
+	return (int)(f1->seqno - f2->seqno) > 0;
+}
+
+/**
+ * dma_fence_later - return the chronologically later fence
+ * @f1:	[in]	the first fence from the same context
+ * @f2:	[in]	the second fence from the same context
+ *
+ * Returns NULL if both fences are signaled, otherwise the fence that would be
+ * signaled last. Both fences must be from the same context, since a seqno is
+ * not re-used across contexts.
+ */
+static inline struct dma_fence *dma_fence_later(struct dma_fence *f1,
+						struct dma_fence *f2)
+{
+	if (WARN_ON(f1->context != f2->context))
+		return NULL;
+
+	/*
+	 * Can't check just DMA_FENCE_FLAG_SIGNALED_BIT here, it may never
+	 * have been set if enable_signaling wasn't called, and enabling that
+	 * here is overkill.
+	 */
+	if (dma_fence_is_later(f1, f2))
+		return dma_fence_is_signaled(f1) ? NULL : f1;
+	else
+		return dma_fence_is_signaled(f2) ? NULL : f2;
+}
+
+signed long dma_fence_wait_timeout(struct dma_fence *,
+				   bool intr, signed long timeout);
+signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
+				       uint32_t count,
+				       bool intr, signed long timeout);
+
+/**
+ * dma_fence_wait - sleep until the fence gets signaled
+ * @fence:	[in]	the fence to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ *
+ * This function will return -ERESTARTSYS if interrupted by a signal,
+ * or 0 if the fence was signaled. Other error values may be
+ * returned on custom implementations.
+ *
+ * Performs a synchronous wait on this fence. It is assumed the caller
+ * directly or indirectly holds a reference to the fence, otherwise the
+ * fence might be freed before return, resulting in undefined behavior.
+ */
+static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
+{
+	signed long ret;
+
+	/* Since dma_fence_wait_timeout cannot timeout with
+	 * MAX_SCHEDULE_TIMEOUT, only valid return values are
+	 * -ERESTARTSYS and MAX_SCHEDULE_TIMEOUT.
+	 */
+	ret = dma_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
+
+	return ret < 0 ? ret : 0;
+}
+
+u64 dma_fence_context_alloc(unsigned num);
+
+#define DMA_FENCE_TRACE(f, fmt, args...) \
+	do {								\
+		struct dma_fence *__ff = (f);				\
+		if (IS_ENABLED(CONFIG_DMA_FENCE_TRACE))			\
+			pr_info("f %llu#%u: " fmt,			\
+				__ff->context, __ff->seqno, ##args);	\
+	} while (0)
+
+#define DMA_FENCE_WARN(f, fmt, args...) \
+	do {								\
+		struct dma_fence *__ff = (f);				\
+		pr_warn("f %llu#%u: " fmt, __ff->context, __ff->seqno,	\
+			 ##args);					\
+	} while (0)
+
+#define DMA_FENCE_ERR(f, fmt, args...) \
+	do {								\
+		struct dma_fence *__ff = (f);				\
+		pr_err("f %llu#%u: " fmt, __ff->context, __ff->seqno,	\
+			##args);					\
+	} while (0)
+
+#endif /* __LINUX_DMA_FENCE_H */
diff --git a/include/linux/fence-array.h b/include/linux/fence-array.h
deleted file mode 100644
index 9ea2bde10ac1..000000000000
--- a/include/linux/fence-array.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * fence-array: aggregates fence to be waited together
- *
- * Copyright (C) 2016 Collabora Ltd
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
- * Authors:
- *	Gustavo Padovan <gustavo@padovan.org>
- *	Christian König <christian.koenig@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __LINUX_FENCE_ARRAY_H
-#define __LINUX_FENCE_ARRAY_H
-
-#include <linux/fence.h>
-
-/**
- * struct fence_array_cb - callback helper for fence array
- * @cb: fence callback structure for signaling
- * @array: reference to the parent fence array object
- */
-struct fence_array_cb {
-	struct fence_cb cb;
-	struct fence_array *array;
-};
-
-/**
- * struct fence_array - fence to represent an array of fences
- * @base: fence base class
- * @lock: spinlock for fence handling
- * @num_fences: number of fences in the array
- * @num_pending: fences in the array still pending
- * @fences: array of the fences
- */
-struct fence_array {
-	struct fence base;
-
-	spinlock_t lock;
-	unsigned num_fences;
-	atomic_t num_pending;
-	struct fence **fences;
-};
-
-extern const struct fence_ops fence_array_ops;
-
-/**
- * fence_is_array - check if a fence is from the array subsclass
- * @fence: fence to test
- *
- * Return true if it is a fence_array and false otherwise.
- */
-static inline bool fence_is_array(struct fence *fence)
-{
-	return fence->ops == &fence_array_ops;
-}
-
-/**
- * to_fence_array - cast a fence to a fence_array
- * @fence: fence to cast to a fence_array
- *
- * Returns NULL if the fence is not a fence_array,
- * or the fence_array otherwise.
- */
-static inline struct fence_array *to_fence_array(struct fence *fence)
-{
-	if (fence->ops != &fence_array_ops)
-		return NULL;
-
-	return container_of(fence, struct fence_array, base);
-}
-
-struct fence_array *fence_array_create(int num_fences, struct fence **fences,
-				       u64 context, unsigned seqno,
-				       bool signal_on_any);
-
-#endif /* __LINUX_FENCE_ARRAY_H */
diff --git a/include/linux/fence.h b/include/linux/fence.h
deleted file mode 100644
index c9c5ba98c302..000000000000
--- a/include/linux/fence.h
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * Fence mechanism for dma-buf to allow for asynchronous dma access
- *
- * Copyright (C) 2012 Canonical Ltd
- * Copyright (C) 2012 Texas Instruments
- *
- * Authors:
- * Rob Clark <robdclark@gmail.com>
- * Maarten Lankhorst <maarten.lankhorst@canonical.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __LINUX_FENCE_H
-#define __LINUX_FENCE_H
-
-#include <linux/err.h>
-#include <linux/wait.h>
-#include <linux/list.h>
-#include <linux/bitops.h>
-#include <linux/kref.h>
-#include <linux/sched.h>
-#include <linux/printk.h>
-#include <linux/rcupdate.h>
-
-struct fence;
-struct fence_ops;
-struct fence_cb;
-
-/**
- * struct fence - software synchronization primitive
- * @refcount: refcount for this fence
- * @ops: fence_ops associated with this fence
- * @rcu: used for releasing fence with kfree_rcu
- * @cb_list: list of all callbacks to call
- * @lock: spin_lock_irqsave used for locking
- * @context: execution context this fence belongs to, returned by
- *           fence_context_alloc()
- * @seqno: the sequence number of this fence inside the execution context,
- * can be compared to decide which fence would be signaled later.
- * @flags: A mask of FENCE_FLAG_* defined below
- * @timestamp: Timestamp when the fence was signaled.
- * @status: Optional, only valid if < 0, must be set before calling
- * fence_signal, indicates that the fence has completed with an error.
- *
- * the flags member must be manipulated and read using the appropriate
- * atomic ops (bit_*), so taking the spinlock will not be needed most
- * of the time.
- *
- * FENCE_FLAG_SIGNALED_BIT - fence is already signaled
- * FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called*
- * FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
- * implementer of the fence for its own purposes. Can be used in different
- * ways by different fence implementers, so do not rely on this.
- *
- * Since atomic bitops are used, this is not guaranteed to be the case.
- * Particularly, if the bit was set, but fence_signal was called right
- * before this bit was set, it would have been able to set the
- * FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called.
- * Adding a check for FENCE_FLAG_SIGNALED_BIT after setting
- * FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that
- * after fence_signal was called, any enable_signaling call will have either
- * been completed, or never called at all.
- */
-struct fence {
-	struct kref refcount;
-	const struct fence_ops *ops;
-	struct rcu_head rcu;
-	struct list_head cb_list;
-	spinlock_t *lock;
-	u64 context;
-	unsigned seqno;
-	unsigned long flags;
-	ktime_t timestamp;
-	int status;
-};
-
-enum fence_flag_bits {
-	FENCE_FLAG_SIGNALED_BIT,
-	FENCE_FLAG_ENABLE_SIGNAL_BIT,
-	FENCE_FLAG_USER_BITS, /* must always be last member */
-};
-
-typedef void (*fence_func_t)(struct fence *fence, struct fence_cb *cb);
-
-/**
- * struct fence_cb - callback for fence_add_callback
- * @node: used by fence_add_callback to append this struct to fence::cb_list
- * @func: fence_func_t to call
- *
- * This struct will be initialized by fence_add_callback, additional
- * data can be passed along by embedding fence_cb in another struct.
- */
-struct fence_cb {
-	struct list_head node;
-	fence_func_t func;
-};
-
-/**
- * struct fence_ops - operations implemented for fence
- * @get_driver_name: returns the driver name.
- * @get_timeline_name: return the name of the context this fence belongs to.
- * @enable_signaling: enable software signaling of fence.
- * @signaled: [optional] peek whether the fence is signaled, can be null.
- * @wait: custom wait implementation, or fence_default_wait.
- * @release: [optional] called on destruction of fence, can be null
- * @fill_driver_data: [optional] callback to fill in free-form debug info
- * Returns amount of bytes filled, or -errno.
- * @fence_value_str: [optional] fills in the value of the fence as a string
- * @timeline_value_str: [optional] fills in the current value of the timeline
- * as a string
- *
- * Notes on enable_signaling:
- * For fence implementations that have the capability for hw->hw
- * signaling, they can implement this op to enable the necessary
- * irqs, or insert commands into cmdstream, etc.  This is called
- * in the first wait() or add_callback() path to let the fence
- * implementation know that there is another driver waiting on
- * the signal (ie. hw->sw case).
- *
- * This function can be called called from atomic context, but not
- * from irq context, so normal spinlocks can be used.
- *
- * A return value of false indicates the fence already passed,
- * or some failure occurred that made it impossible to enable
- * signaling. True indicates successful enabling.
- *
- * fence->status may be set in enable_signaling, but only when false is
- * returned.
- *
- * Calling fence_signal before enable_signaling is called allows
- * for a tiny race window in which enable_signaling is called during,
- * before, or after fence_signal. To fight this, it is recommended
- * that before enable_signaling returns true an extra reference is
- * taken on the fence, to be released when the fence is signaled.
- * This will mean fence_signal will still be called twice, but
- * the second time will be a noop since it was already signaled.
- *
- * Notes on signaled:
- * May set fence->status if returning true.
- *
- * Notes on wait:
- * Must not be NULL, set to fence_default_wait for default implementation.
- * the fence_default_wait implementation should work for any fence, as long
- * as enable_signaling works correctly.
- *
- * Must return -ERESTARTSYS if the wait is intr = true and the wait was
- * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
- * timed out. Can also return other error values on custom implementations,
- * which should be treated as if the fence is signaled. For example a hardware
- * lockup could be reported like that.
- *
- * Notes on release:
- * Can be NULL, this function allows additional commands to run on
- * destruction of the fence. Can be called from irq context.
- * If pointer is set to NULL, kfree will get called instead.
- */
-
-struct fence_ops {
-	const char * (*get_driver_name)(struct fence *fence);
-	const char * (*get_timeline_name)(struct fence *fence);
-	bool (*enable_signaling)(struct fence *fence);
-	bool (*signaled)(struct fence *fence);
-	signed long (*wait)(struct fence *fence, bool intr, signed long timeout);
-	void (*release)(struct fence *fence);
-
-	int (*fill_driver_data)(struct fence *fence, void *data, int size);
-	void (*fence_value_str)(struct fence *fence, char *str, int size);
-	void (*timeline_value_str)(struct fence *fence, char *str, int size);
-};
-
-void fence_init(struct fence *fence, const struct fence_ops *ops,
-		spinlock_t *lock, u64 context, unsigned seqno);
-
-void fence_release(struct kref *kref);
-void fence_free(struct fence *fence);
-
-/**
- * fence_put - decreases refcount of the fence
- * @fence:	[in]	fence to reduce refcount of
- */
-static inline void fence_put(struct fence *fence)
-{
-	if (fence)
-		kref_put(&fence->refcount, fence_release);
-}
-
-/**
- * fence_get - increases refcount of the fence
- * @fence:	[in]	fence to increase refcount of
- *
- * Returns the same fence, with refcount increased by 1.
- */
-static inline struct fence *fence_get(struct fence *fence)
-{
-	if (fence)
-		kref_get(&fence->refcount);
-	return fence;
-}
-
-/**
- * fence_get_rcu - get a fence from a reservation_object_list with rcu read lock
- * @fence:	[in]	fence to increase refcount of
- *
- * Function returns NULL if no refcount could be obtained, or the fence.
- */
-static inline struct fence *fence_get_rcu(struct fence *fence)
-{
-	if (kref_get_unless_zero(&fence->refcount))
-		return fence;
-	else
-		return NULL;
-}
-
-/**
- * fence_get_rcu_safe  - acquire a reference to an RCU tracked fence
- * @fence:	[in]	pointer to fence to increase refcount of
- *
- * Function returns NULL if no refcount could be obtained, or the fence.
- * This function handles acquiring a reference to a fence that may be
- * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU),
- * so long as the caller is using RCU on the pointer to the fence.
- *
- * An alternative mechanism is to employ a seqlock to protect a bunch of
- * fences, such as used by struct reservation_object. When using a seqlock,
- * the seqlock must be taken before and checked after a reference to the
- * fence is acquired (as shown here).
- *
- * The caller is required to hold the RCU read lock.
- */
-static inline struct fence *fence_get_rcu_safe(struct fence * __rcu *fencep)
-{
-	do {
-		struct fence *fence;
-
-		fence = rcu_dereference(*fencep);
-		if (!fence || !fence_get_rcu(fence))
-			return NULL;
-
-		/* The atomic_inc_not_zero() inside fence_get_rcu()
-		 * provides a full memory barrier upon success (such as now).
-		 * This is paired with the write barrier from assigning
-		 * to the __rcu protected fence pointer so that if that
-		 * pointer still matches the current fence, we know we
-		 * have successfully acquire a reference to it. If it no
-		 * longer matches, we are holding a reference to some other
-		 * reallocated pointer. This is possible if the allocator
-		 * is using a freelist like SLAB_DESTROY_BY_RCU where the
-		 * fence remains valid for the RCU grace period, but it
-		 * may be reallocated. When using such allocators, we are
-		 * responsible for ensuring the reference we get is to
-		 * the right fence, as below.
-		 */
-		if (fence == rcu_access_pointer(*fencep))
-			return rcu_pointer_handoff(fence);
-
-		fence_put(fence);
-	} while (1);
-}
-
-int fence_signal(struct fence *fence);
-int fence_signal_locked(struct fence *fence);
-signed long fence_default_wait(struct fence *fence, bool intr, signed long timeout);
-int fence_add_callback(struct fence *fence, struct fence_cb *cb,
-		       fence_func_t func);
-bool fence_remove_callback(struct fence *fence, struct fence_cb *cb);
-void fence_enable_sw_signaling(struct fence *fence);
-
-/**
- * fence_is_signaled_locked - Return an indication if the fence is signaled yet.
- * @fence:	[in]	the fence to check
- *
- * Returns true if the fence was already signaled, false if not. Since this
- * function doesn't enable signaling, it is not guaranteed to ever return
- * true if fence_add_callback, fence_wait or fence_enable_sw_signaling
- * haven't been called before.
- *
- * This function requires fence->lock to be held.
- */
-static inline bool
-fence_is_signaled_locked(struct fence *fence)
-{
-	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return true;
-
-	if (fence->ops->signaled && fence->ops->signaled(fence)) {
-		fence_signal_locked(fence);
-		return true;
-	}
-
-	return false;
-}
-
-/**
- * fence_is_signaled - Return an indication if the fence is signaled yet.
- * @fence:	[in]	the fence to check
- *
- * Returns true if the fence was already signaled, false if not. Since this
- * function doesn't enable signaling, it is not guaranteed to ever return
- * true if fence_add_callback, fence_wait or fence_enable_sw_signaling
- * haven't been called before.
- *
- * It's recommended for seqno fences to call fence_signal when the
- * operation is complete, it makes it possible to prevent issues from
- * wraparound between time of issue and time of use by checking the return
- * value of this function before calling hardware-specific wait instructions.
- */
-static inline bool
-fence_is_signaled(struct fence *fence)
-{
-	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return true;
-
-	if (fence->ops->signaled && fence->ops->signaled(fence)) {
-		fence_signal(fence);
-		return true;
-	}
-
-	return false;
-}
-
-/**
- * fence_is_later - return if f1 is chronologically later than f2
- * @f1:	[in]	the first fence from the same context
- * @f2:	[in]	the second fence from the same context
- *
- * Returns true if f1 is chronologically later than f2. Both fences must be
- * from the same context, since a seqno is not re-used across contexts.
- */
-static inline bool fence_is_later(struct fence *f1, struct fence *f2)
-{
-	if (WARN_ON(f1->context != f2->context))
-		return false;
-
-	return (int)(f1->seqno - f2->seqno) > 0;
-}
-
-/**
- * fence_later - return the chronologically later fence
- * @f1:	[in]	the first fence from the same context
- * @f2:	[in]	the second fence from the same context
- *
- * Returns NULL if both fences are signaled, otherwise the fence that would be
- * signaled last. Both fences must be from the same context, since a seqno is
- * not re-used across contexts.
- */
-static inline struct fence *fence_later(struct fence *f1, struct fence *f2)
-{
-	if (WARN_ON(f1->context != f2->context))
-		return NULL;
-
-	/*
-	 * can't check just FENCE_FLAG_SIGNALED_BIT here, it may never have been
-	 * set if enable_signaling wasn't called, and enabling that here is
-	 * overkill.
-	 */
-	if (fence_is_later(f1, f2))
-		return fence_is_signaled(f1) ? NULL : f1;
-	else
-		return fence_is_signaled(f2) ? NULL : f2;
-}
-
-signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout);
-signed long fence_wait_any_timeout(struct fence **fences, uint32_t count,
-				   bool intr, signed long timeout);
-
-/**
- * fence_wait - sleep until the fence gets signaled
- * @fence:	[in]	the fence to wait on
- * @intr:	[in]	if true, do an interruptible wait
- *
- * This function will return -ERESTARTSYS if interrupted by a signal,
- * or 0 if the fence was signaled. Other error values may be
- * returned on custom implementations.
- *
- * Performs a synchronous wait on this fence. It is assumed the caller
- * directly or indirectly holds a reference to the fence, otherwise the
- * fence might be freed before return, resulting in undefined behavior.
- */
-static inline signed long fence_wait(struct fence *fence, bool intr)
-{
-	signed long ret;
-
-	/* Since fence_wait_timeout cannot timeout with
-	 * MAX_SCHEDULE_TIMEOUT, only valid return values are
-	 * -ERESTARTSYS and MAX_SCHEDULE_TIMEOUT.
-	 */
-	ret = fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
-
-	return ret < 0 ? ret : 0;
-}
-
-u64 fence_context_alloc(unsigned num);
-
-#define FENCE_TRACE(f, fmt, args...) \
-	do {								\
-		struct fence *__ff = (f);				\
-		if (IS_ENABLED(CONFIG_FENCE_TRACE))			\
-			pr_info("f %llu#%u: " fmt,			\
-				__ff->context, __ff->seqno, ##args);	\
-	} while (0)
-
-#define FENCE_WARN(f, fmt, args...) \
-	do {								\
-		struct fence *__ff = (f);				\
-		pr_warn("f %llu#%u: " fmt, __ff->context, __ff->seqno,	\
-			 ##args);					\
-	} while (0)
-
-#define FENCE_ERR(f, fmt, args...) \
-	do {								\
-		struct fence *__ff = (f);				\
-		pr_err("f %llu#%u: " fmt, __ff->context, __ff->seqno,	\
-			##args);					\
-	} while (0)
-
-#endif /* __LINUX_FENCE_H */
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index b0f305e77b7f..2e313cca08f0 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -40,7 +40,7 @@
 #define _LINUX_RESERVATION_H
 
 #include <linux/ww_mutex.h>
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <linux/slab.h>
 #include <linux/seqlock.h>
 #include <linux/rcupdate.h>
@@ -59,7 +59,7 @@ extern const char reservation_seqcount_string[];
 struct reservation_object_list {
 	struct rcu_head rcu;
 	u32 shared_count, shared_max;
-	struct fence __rcu *shared[];
+	struct dma_fence __rcu *shared[];
 };
 
 /**
@@ -74,7 +74,7 @@ struct reservation_object {
 	struct ww_mutex lock;
 	seqcount_t seq;
 
-	struct fence __rcu *fence_excl;
+	struct dma_fence __rcu *fence_excl;
 	struct reservation_object_list __rcu *fence;
 	struct reservation_object_list *staged;
 };
@@ -107,7 +107,7 @@ reservation_object_fini(struct reservation_object *obj)
 {
 	int i;
 	struct reservation_object_list *fobj;
-	struct fence *excl;
+	struct dma_fence *excl;
 
 	/*
 	 * This object should be dead and all references must have
@@ -115,12 +115,12 @@ reservation_object_fini(struct reservation_object *obj)
 	 */
 	excl = rcu_dereference_protected(obj->fence_excl, 1);
 	if (excl)
-		fence_put(excl);
+		dma_fence_put(excl);
 
 	fobj = rcu_dereference_protected(obj->fence, 1);
 	if (fobj) {
 		for (i = 0; i < fobj->shared_count; ++i)
-			fence_put(rcu_dereference_protected(fobj->shared[i], 1));
+			dma_fence_put(rcu_dereference_protected(fobj->shared[i], 1));
 
 		kfree(fobj);
 	}
@@ -155,7 +155,7 @@ reservation_object_get_list(struct reservation_object *obj)
  * RETURNS
  * The exclusive fence or NULL
  */
-static inline struct fence *
+static inline struct dma_fence *
 reservation_object_get_excl(struct reservation_object *obj)
 {
 	return rcu_dereference_protected(obj->fence_excl,
@@ -173,10 +173,10 @@ reservation_object_get_excl(struct reservation_object *obj)
  * RETURNS
  * The exclusive fence or NULL if none
  */
-static inline struct fence *
+static inline struct dma_fence *
 reservation_object_get_excl_rcu(struct reservation_object *obj)
 {
-	struct fence *fence;
+	struct dma_fence *fence;
 	unsigned seq;
 retry:
 	seq = read_seqcount_begin(&obj->seq);
@@ -186,22 +186,22 @@ retry:
 		rcu_read_unlock();
 		goto retry;
 	}
-	fence = fence_get(fence);
+	fence = dma_fence_get(fence);
 	rcu_read_unlock();
 	return fence;
 }
 
 int reservation_object_reserve_shared(struct reservation_object *obj);
 void reservation_object_add_shared_fence(struct reservation_object *obj,
-					 struct fence *fence);
+					 struct dma_fence *fence);
 
 void reservation_object_add_excl_fence(struct reservation_object *obj,
-				       struct fence *fence);
+				       struct dma_fence *fence);
 
 int reservation_object_get_fences_rcu(struct reservation_object *obj,
-				      struct fence **pfence_excl,
+				      struct dma_fence **pfence_excl,
 				      unsigned *pshared_count,
-				      struct fence ***pshared);
+				      struct dma_fence ***pshared);
 
 long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
 					 bool wait_all, bool intr,
diff --git a/include/linux/seqno-fence.h b/include/linux/seqno-fence.h
index a1ba6a5ccdd6..c58c535d12a8 100644
--- a/include/linux/seqno-fence.h
+++ b/include/linux/seqno-fence.h
@@ -20,7 +20,7 @@
 #ifndef __LINUX_SEQNO_FENCE_H
 #define __LINUX_SEQNO_FENCE_H
 
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <linux/dma-buf.h>
 
 enum seqno_fence_condition {
@@ -29,15 +29,15 @@ enum seqno_fence_condition {
 };
 
 struct seqno_fence {
-	struct fence base;
+	struct dma_fence base;
 
-	const struct fence_ops *ops;
+	const struct dma_fence_ops *ops;
 	struct dma_buf *sync_buf;
 	uint32_t seqno_ofs;
 	enum seqno_fence_condition condition;
 };
 
-extern const struct fence_ops seqno_fence_ops;
+extern const struct dma_fence_ops seqno_fence_ops;
 
 /**
  * to_seqno_fence - cast a fence to a seqno_fence
@@ -47,7 +47,7 @@ extern const struct fence_ops seqno_fence_ops;
  * or the seqno_fence otherwise.
  */
 static inline struct seqno_fence *
-to_seqno_fence(struct fence *fence)
+to_seqno_fence(struct dma_fence *fence)
 {
 	if (fence->ops != &seqno_fence_ops)
 		return NULL;
@@ -83,9 +83,9 @@ to_seqno_fence(struct fence *fence)
  * dma-buf for sync_buf, since mapping or unmapping the sync_buf to the
  * device's vm can be expensive.
  *
- * It is recommended for creators of seqno_fence to call fence_signal
+ * It is recommended for creators of seqno_fence to call dma_fence_signal()
  * before destruction. This will prevent possible issues from wraparound at
- * time of issue vs time of check, since users can check fence_is_signaled
+ * time of issue vs time of check, since users can check dma_fence_is_signaled()
  * before submitting instructions for the hardware to wait on the fence.
  * However, when ops.enable_signaling is not called, it doesn't have to be
  * done as soon as possible, just before there's any real danger of seqno
@@ -96,18 +96,18 @@ seqno_fence_init(struct seqno_fence *fence, spinlock_t *lock,
 		 struct dma_buf *sync_buf,  uint32_t context,
 		 uint32_t seqno_ofs, uint32_t seqno,
 		 enum seqno_fence_condition cond,
-		 const struct fence_ops *ops)
+		 const struct dma_fence_ops *ops)
 {
 	BUG_ON(!fence || !sync_buf || !ops);
 	BUG_ON(!ops->wait || !ops->enable_signaling ||
 	       !ops->get_driver_name || !ops->get_timeline_name);
 
 	/*
-	 * ops is used in fence_init for get_driver_name, so needs to be
+	 * ops is used in dma_fence_init for get_driver_name, so needs to be
 	 * initialized first
 	 */
 	fence->ops = ops;
-	fence_init(&fence->base, &seqno_fence_ops, lock, context, seqno);
+	dma_fence_init(&fence->base, &seqno_fence_ops, lock, context, seqno);
 	get_dma_buf(sync_buf);
 	fence->sync_buf = sync_buf;
 	fence->seqno_ofs = seqno_ofs;
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index aa17ccfc2f57..3e3ab84fc4cd 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -18,8 +18,8 @@
 #include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <linux/fence.h>
-#include <linux/fence-array.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-array.h>
 
 /**
  * struct sync_file - sync file to export to the userspace
@@ -41,13 +41,13 @@ struct sync_file {
 
 	wait_queue_head_t	wq;
 
-	struct fence		*fence;
-	struct fence_cb cb;
+	struct dma_fence	*fence;
+	struct dma_fence_cb cb;
 };
 
-#define POLL_ENABLED FENCE_FLAG_USER_BITS
+#define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
 
-struct sync_file *sync_file_create(struct fence *fence);
-struct fence *sync_file_get_fence(int fd);
+struct sync_file *sync_file_create(struct dma_fence *fence);
+struct dma_fence *sync_file_get_fence(int fd);
 
 #endif /* _LINUX_SYNC_H */
diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h
new file mode 100644
index 000000000000..1157cb4c3c6f
--- /dev/null
+++ b/include/trace/events/dma_fence.h
@@ -0,0 +1,128 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dma_fence
+
+#if !defined(_TRACE_FENCE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DMA_FENCE_H
+
+#include <linux/tracepoint.h>
+
+struct dma_fence;
+
+TRACE_EVENT(dma_fence_annotate_wait_on,
+
+	/* fence: the fence waiting on f1, f1: the fence to be waited on. */
+	TP_PROTO(struct dma_fence *fence, struct dma_fence *f1),
+
+	TP_ARGS(fence, f1),
+
+	TP_STRUCT__entry(
+		__string(driver, fence->ops->get_driver_name(fence))
+		__string(timeline, fence->ops->get_timeline_name(fence))
+		__field(unsigned int, context)
+		__field(unsigned int, seqno)
+
+		__string(waiting_driver, f1->ops->get_driver_name(f1))
+		__string(waiting_timeline, f1->ops->get_timeline_name(f1))
+		__field(unsigned int, waiting_context)
+		__field(unsigned int, waiting_seqno)
+	),
+
+	TP_fast_assign(
+		__assign_str(driver, fence->ops->get_driver_name(fence))
+		__assign_str(timeline, fence->ops->get_timeline_name(fence))
+		__entry->context = fence->context;
+		__entry->seqno = fence->seqno;
+
+		__assign_str(waiting_driver, f1->ops->get_driver_name(f1))
+		__assign_str(waiting_timeline, f1->ops->get_timeline_name(f1))
+		__entry->waiting_context = f1->context;
+		__entry->waiting_seqno = f1->seqno;
+
+	),
+
+	TP_printk("driver=%s timeline=%s context=%u seqno=%u "	\
+		  "waits on driver=%s timeline=%s context=%u seqno=%u",
+		  __get_str(driver), __get_str(timeline), __entry->context,
+		  __entry->seqno,
+		  __get_str(waiting_driver), __get_str(waiting_timeline),
+		  __entry->waiting_context, __entry->waiting_seqno)
+);
+
+DECLARE_EVENT_CLASS(dma_fence,
+
+	TP_PROTO(struct dma_fence *fence),
+
+	TP_ARGS(fence),
+
+	TP_STRUCT__entry(
+		__string(driver, fence->ops->get_driver_name(fence))
+		__string(timeline, fence->ops->get_timeline_name(fence))
+		__field(unsigned int, context)
+		__field(unsigned int, seqno)
+	),
+
+	TP_fast_assign(
+		__assign_str(driver, fence->ops->get_driver_name(fence))
+		__assign_str(timeline, fence->ops->get_timeline_name(fence))
+		__entry->context = fence->context;
+		__entry->seqno = fence->seqno;
+	),
+
+	TP_printk("driver=%s timeline=%s context=%u seqno=%u",
+		  __get_str(driver), __get_str(timeline), __entry->context,
+		  __entry->seqno)
+);
+
+DEFINE_EVENT(dma_fence, dma_fence_emit,
+
+	TP_PROTO(struct dma_fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(dma_fence, dma_fence_init,
+
+	TP_PROTO(struct dma_fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(dma_fence, dma_fence_destroy,
+
+	TP_PROTO(struct dma_fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(dma_fence, dma_fence_enable_signal,
+
+	TP_PROTO(struct dma_fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(dma_fence, dma_fence_signaled,
+
+	TP_PROTO(struct dma_fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(dma_fence, dma_fence_wait_start,
+
+	TP_PROTO(struct dma_fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(dma_fence, dma_fence_wait_end,
+
+	TP_PROTO(struct dma_fence *fence),
+
+	TP_ARGS(fence)
+);
+
+#endif /*  _TRACE_DMA_FENCE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h
deleted file mode 100644
index d6dfa05ba322..000000000000
--- a/include/trace/events/fence.h
+++ /dev/null
@@ -1,128 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM fence
-
-#if !defined(_TRACE_FENCE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_FENCE_H
-
-#include <linux/tracepoint.h>
-
-struct fence;
-
-TRACE_EVENT(fence_annotate_wait_on,
-
-	/* fence: the fence waiting on f1, f1: the fence to be waited on. */
-	TP_PROTO(struct fence *fence, struct fence *f1),
-
-	TP_ARGS(fence, f1),
-
-	TP_STRUCT__entry(
-		__string(driver, fence->ops->get_driver_name(fence))
-		__string(timeline, fence->ops->get_timeline_name(fence))
-		__field(unsigned int, context)
-		__field(unsigned int, seqno)
-
-		__string(waiting_driver, f1->ops->get_driver_name(f1))
-		__string(waiting_timeline, f1->ops->get_timeline_name(f1))
-		__field(unsigned int, waiting_context)
-		__field(unsigned int, waiting_seqno)
-	),
-
-	TP_fast_assign(
-		__assign_str(driver, fence->ops->get_driver_name(fence))
-		__assign_str(timeline, fence->ops->get_timeline_name(fence))
-		__entry->context = fence->context;
-		__entry->seqno = fence->seqno;
-
-		__assign_str(waiting_driver, f1->ops->get_driver_name(f1))
-		__assign_str(waiting_timeline, f1->ops->get_timeline_name(f1))
-		__entry->waiting_context = f1->context;
-		__entry->waiting_seqno = f1->seqno;
-
-	),
-
-	TP_printk("driver=%s timeline=%s context=%u seqno=%u "	\
-		  "waits on driver=%s timeline=%s context=%u seqno=%u",
-		  __get_str(driver), __get_str(timeline), __entry->context,
-		  __entry->seqno,
-		  __get_str(waiting_driver), __get_str(waiting_timeline),
-		  __entry->waiting_context, __entry->waiting_seqno)
-);
-
-DECLARE_EVENT_CLASS(fence,
-
-	TP_PROTO(struct fence *fence),
-
-	TP_ARGS(fence),
-
-	TP_STRUCT__entry(
-		__string(driver, fence->ops->get_driver_name(fence))
-		__string(timeline, fence->ops->get_timeline_name(fence))
-		__field(unsigned int, context)
-		__field(unsigned int, seqno)
-	),
-
-	TP_fast_assign(
-		__assign_str(driver, fence->ops->get_driver_name(fence))
-		__assign_str(timeline, fence->ops->get_timeline_name(fence))
-		__entry->context = fence->context;
-		__entry->seqno = fence->seqno;
-	),
-
-	TP_printk("driver=%s timeline=%s context=%u seqno=%u",
-		  __get_str(driver), __get_str(timeline), __entry->context,
-		  __entry->seqno)
-);
-
-DEFINE_EVENT(fence, fence_emit,
-
-	TP_PROTO(struct fence *fence),
-
-	TP_ARGS(fence)
-);
-
-DEFINE_EVENT(fence, fence_init,
-
-	TP_PROTO(struct fence *fence),
-
-	TP_ARGS(fence)
-);
-
-DEFINE_EVENT(fence, fence_destroy,
-
-	TP_PROTO(struct fence *fence),
-
-	TP_ARGS(fence)
-);
-
-DEFINE_EVENT(fence, fence_enable_signal,
-
-	TP_PROTO(struct fence *fence),
-
-	TP_ARGS(fence)
-);
-
-DEFINE_EVENT(fence, fence_signaled,
-
-	TP_PROTO(struct fence *fence),
-
-	TP_ARGS(fence)
-);
-
-DEFINE_EVENT(fence, fence_wait_start,
-
-	TP_PROTO(struct fence *fence),
-
-	TP_ARGS(fence)
-);
-
-DEFINE_EVENT(fence, fence_wait_end,
-
-	TP_PROTO(struct fence *fence),
-
-	TP_ARGS(fence)
-);
-
-#endif /*  _TRACE_FENCE_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 974e6f02e27e1b46c6c5e600e70ced25079f73eb Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Mon, 1 Aug 2016 11:54:35 +0200
Subject: iio: cros_ec_sensors_core: Add common functions for the ChromeOS EC
 Sensor Hub.

Add the core functions to be able to support the sensors attached behind
the ChromeOS Embedded Controller and used by other IIO cros-ec sensor
drivers.

The cros_ec_sensor_core driver matches with current driver in ChromeOS
4.4 tree, so it includes all the fixes at the moment. The support for
this driver was made by Gwendal Grignou. The original patch and all the
fixes has been squashed and rebased on top of mainline.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
[eballetbo: split, squash and rebase on top of mainline the patches
found in ChromeOS tree]
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 Documentation/ABI/testing/sysfs-bus-iio-cros-ec    |  18 +
 drivers/iio/common/Kconfig                         |   1 +
 drivers/iio/common/Makefile                        |   1 +
 drivers/iio/common/cros_ec_sensors/Kconfig         |  14 +
 drivers/iio/common/cros_ec_sensors/Makefile        |   5 +
 .../common/cros_ec_sensors/cros_ec_sensors_core.c  | 450 +++++++++++++++++++++
 .../common/cros_ec_sensors/cros_ec_sensors_core.h  | 175 ++++++++
 include/linux/mfd/cros_ec.h                        |   9 +
 include/linux/mfd/cros_ec_commands.h               |  99 ++++-
 9 files changed, 767 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-cros-ec
 create mode 100644 drivers/iio/common/cros_ec_sensors/Kconfig
 create mode 100644 drivers/iio/common/cros_ec_sensors/Makefile
 create mode 100644 drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
 create mode 100644 drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio-cros-ec b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
new file mode 100644
index 000000000000..297b9720f024
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
@@ -0,0 +1,18 @@
+What:		/sys/bus/iio/devices/iio:deviceX/calibrate
+Date:		July 2015
+KernelVersion:	4.7
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Writing '1' will perform a FOC (Fast Online Calibration). The
+                corresponding calibration offsets can be read from *_calibbias
+                entries.
+
+What:		/sys/bus/iio/devices/iio:deviceX/location
+Date:		July 2015
+KernelVersion:	4.7
+Contact:	linux-iio@vger.kernel.org
+Description:
+		This attribute returns a string with the physical location where
+                the motion sensor is placed. For example, in a laptop a motion
+                sensor can be located on the base or on the lid. Current valid
+		values are 'base' and 'lid'.
diff --git a/drivers/iio/common/Kconfig b/drivers/iio/common/Kconfig
index 26a6026de614..e108996a9627 100644
--- a/drivers/iio/common/Kconfig
+++ b/drivers/iio/common/Kconfig
@@ -2,6 +2,7 @@
 # IIO common modules
 #
 
+source "drivers/iio/common/cros_ec_sensors/Kconfig"
 source "drivers/iio/common/hid-sensors/Kconfig"
 source "drivers/iio/common/ms_sensors/Kconfig"
 source "drivers/iio/common/ssp_sensors/Kconfig"
diff --git a/drivers/iio/common/Makefile b/drivers/iio/common/Makefile
index 585da6a1b188..6fa760e1bdd5 100644
--- a/drivers/iio/common/Makefile
+++ b/drivers/iio/common/Makefile
@@ -7,6 +7,7 @@
 #
 
 # When adding new entries keep the list in alphabetical order
+obj-y += cros_ec_sensors/
 obj-y += hid-sensors/
 obj-y += ms_sensors/
 obj-y += ssp_sensors/
diff --git a/drivers/iio/common/cros_ec_sensors/Kconfig b/drivers/iio/common/cros_ec_sensors/Kconfig
new file mode 100644
index 000000000000..24743be15a5b
--- /dev/null
+++ b/drivers/iio/common/cros_ec_sensors/Kconfig
@@ -0,0 +1,14 @@
+#
+# Chrome OS Embedded Controller managed sensors library
+#
+config IIO_CROS_EC_SENSORS_CORE
+	tristate "ChromeOS EC Sensors Core"
+	depends on SYSFS && MFD_CROS_EC
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
+	help
+	  Base module for the ChromeOS EC Sensors module.
+	  Contains core functions used by other IIO CrosEC sensor
+	  drivers.
+	  Define common attributes and sysfs interrupt handler.
+
diff --git a/drivers/iio/common/cros_ec_sensors/Makefile b/drivers/iio/common/cros_ec_sensors/Makefile
new file mode 100644
index 000000000000..95b690139bfd
--- /dev/null
+++ b/drivers/iio/common/cros_ec_sensors/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for sensors seen through the ChromeOS EC sensor hub.
+#
+
+obj-$(CONFIG_IIO_CROS_EC_SENSORS_CORE) += cros_ec_sensors_core.o
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
new file mode 100644
index 000000000000..a3be7991355e
--- /dev/null
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
@@ -0,0 +1,450 @@
+/*
+ * cros_ec_sensors_core - Common function for Chrome OS EC sensor driver.
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/kfifo_buf.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/kernel.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/platform_device.h>
+
+#include "cros_ec_sensors_core.h"
+
+static char *cros_ec_loc[] = {
+	[MOTIONSENSE_LOC_BASE] = "base",
+	[MOTIONSENSE_LOC_LID] = "lid",
+	[MOTIONSENSE_LOC_MAX] = "unknown",
+};
+
+int cros_ec_sensors_core_init(struct platform_device *pdev,
+			      struct iio_dev *indio_dev,
+			      bool physical_device)
+{
+	struct device *dev = &pdev->dev;
+	struct cros_ec_sensors_core_state *state = iio_priv(indio_dev);
+	struct cros_ec_dev *ec = dev_get_drvdata(pdev->dev.parent);
+	struct cros_ec_sensor_platform *sensor_platform = dev_get_platdata(dev);
+
+	platform_set_drvdata(pdev, indio_dev);
+
+	state->ec = ec->ec_dev;
+	state->msg = devm_kzalloc(&pdev->dev,
+				max((u16)sizeof(struct ec_params_motion_sense),
+				state->ec->max_response), GFP_KERNEL);
+	if (!state->msg)
+		return -ENOMEM;
+
+	state->resp = (struct ec_response_motion_sense *)state->msg->data;
+
+	mutex_init(&state->cmd_lock);
+
+	/* Set up the host command structure. */
+	state->msg->version = 2;
+	state->msg->command = EC_CMD_MOTION_SENSE_CMD + ec->cmd_offset;
+	state->msg->outsize = sizeof(struct ec_params_motion_sense);
+
+	indio_dev->dev.parent = &pdev->dev;
+	indio_dev->name = pdev->name;
+
+	if (physical_device) {
+		indio_dev->modes = INDIO_DIRECT_MODE;
+
+		state->param.cmd = MOTIONSENSE_CMD_INFO;
+		state->param.info.sensor_num = sensor_platform->sensor_num;
+		if (cros_ec_motion_send_host_cmd(state, 0)) {
+			dev_warn(dev, "Can not access sensor info\n");
+			return -EIO;
+		}
+		state->type = state->resp->info.type;
+		state->loc = state->resp->info.location;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cros_ec_sensors_core_init);
+
+int cros_ec_motion_send_host_cmd(struct cros_ec_sensors_core_state *state,
+				 u16 opt_length)
+{
+	int ret;
+
+	if (opt_length)
+		state->msg->insize = min(opt_length, state->ec->max_response);
+	else
+		state->msg->insize = state->ec->max_response;
+
+	memcpy(state->msg->data, &state->param, sizeof(state->param));
+
+	ret = cros_ec_cmd_xfer_status(state->ec, state->msg);
+	if (ret < 0)
+		return -EIO;
+
+	if (ret &&
+	    state->resp != (struct ec_response_motion_sense *)state->msg->data)
+		memcpy(state->resp, state->msg->data, ret);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cros_ec_motion_send_host_cmd);
+
+static ssize_t cros_ec_sensors_calibrate(struct iio_dev *indio_dev,
+		uintptr_t private, const struct iio_chan_spec *chan,
+		const char *buf, size_t len)
+{
+	struct cros_ec_sensors_core_state *st = iio_priv(indio_dev);
+	int ret, i;
+	bool calibrate;
+
+	ret = strtobool(buf, &calibrate);
+	if (ret < 0)
+		return ret;
+	if (!calibrate)
+		return -EINVAL;
+
+	mutex_lock(&st->cmd_lock);
+	st->param.cmd = MOTIONSENSE_CMD_PERFORM_CALIB;
+	ret = cros_ec_motion_send_host_cmd(st, 0);
+	if (ret != 0) {
+		dev_warn(&indio_dev->dev, "Unable to calibrate sensor\n");
+	} else {
+		/* Save values */
+		for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++)
+			st->calib[i] = st->resp->perform_calib.offset[i];
+	}
+	mutex_unlock(&st->cmd_lock);
+
+	return ret ? ret : len;
+}
+
+static ssize_t cros_ec_sensors_loc(struct iio_dev *indio_dev,
+		uintptr_t private, const struct iio_chan_spec *chan,
+		char *buf)
+{
+	struct cros_ec_sensors_core_state *st = iio_priv(indio_dev);
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", cros_ec_loc[st->loc]);
+}
+
+const struct iio_chan_spec_ext_info cros_ec_sensors_ext_info[] = {
+	{
+		.name = "calibrate",
+		.shared = IIO_SHARED_BY_ALL,
+		.write = cros_ec_sensors_calibrate
+	},
+	{
+		.name = "location",
+		.shared = IIO_SHARED_BY_ALL,
+		.read = cros_ec_sensors_loc
+	},
+	{ },
+};
+EXPORT_SYMBOL_GPL(cros_ec_sensors_ext_info);
+
+/**
+ * cros_ec_sensors_idx_to_reg - convert index into offset in shared memory
+ * @st:		pointer to state information for device
+ * @idx:	sensor index (should be element of enum sensor_index)
+ *
+ * Return:	address to read at
+ */
+static unsigned int cros_ec_sensors_idx_to_reg(
+					struct cros_ec_sensors_core_state *st,
+					unsigned int idx)
+{
+	/*
+	 * When using LPC interface, only space for 2 Accel and one Gyro.
+	 * First halfword of MOTIONSENSE_TYPE_ACCEL is used by angle.
+	 */
+	if (st->type == MOTIONSENSE_TYPE_ACCEL)
+		return EC_MEMMAP_ACC_DATA + sizeof(u16) *
+			(1 + idx + st->param.info.sensor_num *
+			 CROS_EC_SENSOR_MAX_AXIS);
+
+	return EC_MEMMAP_GYRO_DATA + sizeof(u16) * idx;
+}
+
+static int cros_ec_sensors_cmd_read_u8(struct cros_ec_device *ec,
+				       unsigned int offset, u8 *dest)
+{
+	return ec->cmd_readmem(ec, offset, 1, dest);
+}
+
+static int cros_ec_sensors_cmd_read_u16(struct cros_ec_device *ec,
+					 unsigned int offset, u16 *dest)
+{
+	__le16 tmp;
+	int ret = ec->cmd_readmem(ec, offset, 2, &tmp);
+
+	if (ret >= 0)
+		*dest = le16_to_cpu(tmp);
+
+	return ret;
+}
+
+/**
+ * cros_ec_sensors_read_until_not_busy() - read until is not busy
+ *
+ * @st:	pointer to state information for device
+ *
+ * Read from EC status byte until it reads not busy.
+ * Return: 8-bit status if ok, -errno on failure.
+ */
+static int cros_ec_sensors_read_until_not_busy(
+					struct cros_ec_sensors_core_state *st)
+{
+	struct cros_ec_device *ec = st->ec;
+	u8 status;
+	int ret, attempts = 0;
+
+	ret = cros_ec_sensors_cmd_read_u8(ec, EC_MEMMAP_ACC_STATUS, &status);
+	if (ret < 0)
+		return ret;
+
+	while (status & EC_MEMMAP_ACC_STATUS_BUSY_BIT) {
+		/* Give up after enough attempts, return error. */
+		if (attempts++ >= 50)
+			return -EIO;
+
+		/* Small delay every so often. */
+		if (attempts % 5 == 0)
+			msleep(25);
+
+		ret = cros_ec_sensors_cmd_read_u8(ec, EC_MEMMAP_ACC_STATUS,
+						  &status);
+		if (ret < 0)
+			return ret;
+	}
+
+	return status;
+}
+
+/**
+ * read_ec_sensors_data_unsafe() - read acceleration data from EC shared memory
+ * @indio_dev:	pointer to IIO device
+ * @scan_mask:	bitmap of the sensor indices to scan
+ * @data:	location to store data
+ *
+ * This is the unsafe function for reading the EC data. It does not guarantee
+ * that the EC will not modify the data as it is being read in.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+static int cros_ec_sensors_read_data_unsafe(struct iio_dev *indio_dev,
+			 unsigned long scan_mask, s16 *data)
+{
+	struct cros_ec_sensors_core_state *st = iio_priv(indio_dev);
+	struct cros_ec_device *ec = st->ec;
+	unsigned int i;
+	int ret;
+
+	/* Read all sensors enabled in scan_mask. Each value is 2 bytes. */
+	for_each_set_bit(i, &scan_mask, indio_dev->masklength) {
+		ret = cros_ec_sensors_cmd_read_u16(ec,
+					     cros_ec_sensors_idx_to_reg(st, i),
+					     data);
+		if (ret < 0)
+			return ret;
+
+		data++;
+	}
+
+	return 0;
+}
+
+int cros_ec_sensors_read_lpc(struct iio_dev *indio_dev,
+			     unsigned long scan_mask, s16 *data)
+{
+	struct cros_ec_sensors_core_state *st = iio_priv(indio_dev);
+	struct cros_ec_device *ec = st->ec;
+	u8 samp_id = 0xff, status = 0;
+	int ret, attempts = 0;
+
+	/*
+	 * Continually read all data from EC until the status byte after
+	 * all reads reflects that the EC is not busy and the sample id
+	 * matches the sample id from before all reads. This guarantees
+	 * that data read in was not modified by the EC while reading.
+	 */
+	while ((status & (EC_MEMMAP_ACC_STATUS_BUSY_BIT |
+			  EC_MEMMAP_ACC_STATUS_SAMPLE_ID_MASK)) != samp_id) {
+		/* If we have tried to read too many times, return error. */
+		if (attempts++ >= 5)
+			return -EIO;
+
+		/* Read status byte until EC is not busy. */
+		status = cros_ec_sensors_read_until_not_busy(st);
+		if (status < 0)
+			return status;
+
+		/*
+		 * Store the current sample id so that we can compare to the
+		 * sample id after reading the data.
+		 */
+		samp_id = status & EC_MEMMAP_ACC_STATUS_SAMPLE_ID_MASK;
+
+		/* Read all EC data, format it, and store it into data. */
+		ret = cros_ec_sensors_read_data_unsafe(indio_dev, scan_mask,
+						       data);
+		if (ret < 0)
+			return ret;
+
+		/* Read status byte. */
+		ret = cros_ec_sensors_cmd_read_u8(ec, EC_MEMMAP_ACC_STATUS,
+						  &status);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cros_ec_sensors_read_lpc);
+
+int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev,
+			     unsigned long scan_mask, s16 *data)
+{
+	struct cros_ec_sensors_core_state *st = iio_priv(indio_dev);
+	int ret;
+	unsigned int i;
+
+	/* Read all sensor data through a command. */
+	st->param.cmd = MOTIONSENSE_CMD_DATA;
+	ret = cros_ec_motion_send_host_cmd(st, sizeof(st->resp->data));
+	if (ret != 0) {
+		dev_warn(&indio_dev->dev, "Unable to read sensor data\n");
+		return ret;
+	}
+
+	for_each_set_bit(i, &scan_mask, indio_dev->masklength) {
+		*data = st->resp->data.data[i];
+		data++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cros_ec_sensors_read_cmd);
+
+irqreturn_t cros_ec_sensors_capture(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct cros_ec_sensors_core_state *st = iio_priv(indio_dev);
+	int ret;
+
+	mutex_lock(&st->cmd_lock);
+
+	/* Clear capture data. */
+	memset(st->samples, 0, indio_dev->scan_bytes);
+
+	/* Read data based on which channels are enabled in scan mask. */
+	ret = st->read_ec_sensors_data(indio_dev,
+				       *(indio_dev->active_scan_mask),
+				       (s16 *)st->samples);
+	if (ret < 0)
+		goto done;
+
+	iio_push_to_buffers_with_timestamp(indio_dev, st->samples,
+					   iio_get_time_ns(indio_dev));
+
+done:
+	/*
+	 * Tell the core we are done with this trigger and ready for the
+	 * next one.
+	 */
+	iio_trigger_notify_done(indio_dev->trig);
+
+	mutex_unlock(&st->cmd_lock);
+
+	return IRQ_HANDLED;
+}
+EXPORT_SYMBOL_GPL(cros_ec_sensors_capture);
+
+int cros_ec_sensors_core_read(struct cros_ec_sensors_core_state *st,
+			  struct iio_chan_spec const *chan,
+			  int *val, int *val2, long mask)
+{
+	int ret = IIO_VAL_INT;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		st->param.cmd = MOTIONSENSE_CMD_EC_RATE;
+		st->param.ec_rate.data =
+			EC_MOTION_SENSE_NO_VALUE;
+
+		if (cros_ec_motion_send_host_cmd(st, 0))
+			ret = -EIO;
+		else
+			*val = st->resp->ec_rate.ret;
+		break;
+	case IIO_CHAN_INFO_FREQUENCY:
+		st->param.cmd = MOTIONSENSE_CMD_SENSOR_ODR;
+		st->param.sensor_odr.data =
+			EC_MOTION_SENSE_NO_VALUE;
+
+		if (cros_ec_motion_send_host_cmd(st, 0))
+			ret = -EIO;
+		else
+			*val = st->resp->sensor_odr.ret;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cros_ec_sensors_core_read);
+
+int cros_ec_sensors_core_write(struct cros_ec_sensors_core_state *st,
+			       struct iio_chan_spec const *chan,
+			       int val, int val2, long mask)
+{
+	int ret = 0;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_FREQUENCY:
+		st->param.cmd = MOTIONSENSE_CMD_SENSOR_ODR;
+		st->param.sensor_odr.data = val;
+
+		/* Always roundup, so caller gets at least what it asks for. */
+		st->param.sensor_odr.roundup = 1;
+
+		if (cros_ec_motion_send_host_cmd(st, 0))
+			ret = -EIO;
+		break;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		st->param.cmd = MOTIONSENSE_CMD_EC_RATE;
+		st->param.ec_rate.data = val;
+
+		if (cros_ec_motion_send_host_cmd(st, 0))
+			ret = -EIO;
+		else
+			st->curr_sampl_freq = val;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cros_ec_sensors_core_write);
+
+MODULE_DESCRIPTION("ChromeOS EC sensor hub core functions");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.h b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.h
new file mode 100644
index 000000000000..8bc2ca3c2e2e
--- /dev/null
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.h
@@ -0,0 +1,175 @@
+/*
+ * ChromeOS EC sensor hub
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __CROS_EC_SENSORS_CORE_H
+#define __CROS_EC_SENSORS_CORE_H
+
+#include <linux/irqreturn.h>
+
+enum {
+	CROS_EC_SENSOR_X,
+	CROS_EC_SENSOR_Y,
+	CROS_EC_SENSOR_Z,
+	CROS_EC_SENSOR_MAX_AXIS,
+};
+
+/* EC returns sensor values using signed 16 bit registers */
+#define CROS_EC_SENSOR_BITS 16
+
+/*
+ * 4 16 bit channels are allowed.
+ * Good enough for current sensors, they use up to 3 16 bit vectors.
+ */
+#define CROS_EC_SAMPLE_SIZE  (sizeof(s64) * 2)
+
+/* Minimum sampling period to use when device is suspending */
+#define CROS_EC_MIN_SUSPEND_SAMPLING_FREQUENCY 1000  /* 1 second */
+
+/**
+ * struct cros_ec_sensors_core_state - state data for EC sensors IIO driver
+ * @ec:				cros EC device structure
+ * @cmd_lock:			lock used to prevent simultaneous access to the
+ *				commands.
+ * @msg:			cros EC command structure
+ * @param:			motion sensor parameters structure
+ * @resp:			motion sensor response structure
+ * @type:			type of motion sensor
+ * @loc:			location where the motion sensor is placed
+ * @calib:			calibration parameters. Note that trigger
+ *				captured data will always provide the calibrated
+ *				data
+ * @samples:			static array to hold data from a single capture.
+ *				For each channel we need 2 bytes, except for
+ *				the timestamp. The timestamp is always last and
+ *				is always 8-byte aligned.
+ * @read_ec_sensors_data:	function used for accessing sensors values
+ * @cuur_sampl_freq:		current sampling period
+ */
+struct cros_ec_sensors_core_state {
+	struct cros_ec_device *ec;
+	struct mutex cmd_lock;
+
+	struct cros_ec_command *msg;
+	struct ec_params_motion_sense param;
+	struct ec_response_motion_sense *resp;
+
+	enum motionsensor_type type;
+	enum motionsensor_location loc;
+
+	s16 calib[CROS_EC_SENSOR_MAX_AXIS];
+
+	u8 samples[CROS_EC_SAMPLE_SIZE];
+
+	int (*read_ec_sensors_data)(struct iio_dev *indio_dev,
+				    unsigned long scan_mask, s16 *data);
+
+	int curr_sampl_freq;
+};
+
+/**
+ * cros_ec_sensors_read_lpc() - retrieve data from EC shared memory
+ * @indio_dev:	pointer to IIO device
+ * @scan_mask:	bitmap of the sensor indices to scan
+ * @data:	location to store data
+ *
+ * This is the safe function for reading the EC data. It guarantees that the
+ * data sampled was not modified by the EC while being read.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+int cros_ec_sensors_read_lpc(struct iio_dev *indio_dev, unsigned long scan_mask,
+			     s16 *data);
+
+/**
+ * cros_ec_sensors_read_cmd() - retrieve data using the EC command protocol
+ * @indio_dev:	pointer to IIO device
+ * @scan_mask:	bitmap of the sensor indices to scan
+ * @data:	location to store data
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, unsigned long scan_mask,
+			     s16 *data);
+
+/**
+ * cros_ec_sensors_core_init() - basic initialization of the core structure
+ * @pdev:		platform device created for the sensors
+ * @indio_dev:		iio device structure of the device
+ * @physical_device:	true if the device refers to a physical device
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+int cros_ec_sensors_core_init(struct platform_device *pdev,
+			      struct iio_dev *indio_dev, bool physical_device);
+
+/**
+ * cros_ec_sensors_capture() - the trigger handler function
+ * @irq:	the interrupt number.
+ * @p:		a pointer to the poll function.
+ *
+ * On a trigger event occurring, if the pollfunc is attached then this
+ * handler is called as a threaded interrupt (and hence may sleep). It
+ * is responsible for grabbing data from the device and pushing it into
+ * the associated buffer.
+ *
+ * Return: IRQ_HANDLED
+ */
+irqreturn_t cros_ec_sensors_capture(int irq, void *p);
+
+/**
+ * cros_ec_motion_send_host_cmd() - send motion sense host command
+ * @st:		pointer to state information for device
+ * @opt_length:	optional length to reduce the response size, useful on the data
+ *		path. Otherwise, the maximal allowed response size is used
+ *
+ * When called, the sub-command is assumed to be set in param->cmd.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+int cros_ec_motion_send_host_cmd(struct cros_ec_sensors_core_state *st,
+				 u16 opt_length);
+
+/**
+ * cros_ec_sensors_core_read() - function to request a value from the sensor
+ * @st:		pointer to state information for device
+ * @chan:	channel specification structure table
+ * @val:	will contain one element making up the returned value
+ * @val2:	will contain another element making up the returned value
+ * @mask:	specifies which values to be requested
+ *
+ * Return:	the type of value returned by the device
+ */
+int cros_ec_sensors_core_read(struct cros_ec_sensors_core_state *st,
+			      struct iio_chan_spec const *chan,
+			      int *val, int *val2, long mask);
+
+/**
+ * cros_ec_sensors_core_write() - function to write a value to the sensor
+ * @st:		pointer to state information for device
+ * @chan:	channel specification structure table
+ * @val:	first part of value to write
+ * @val2:	second part of value to write
+ * @mask:	specifies which values to write
+ *
+ * Return:	the type of value returned by the device
+ */
+int cros_ec_sensors_core_write(struct cros_ec_sensors_core_state *st,
+			       struct iio_chan_spec const *chan,
+			       int val, int val2, long mask);
+
+/* List of extended channel specification for all sensors */
+extern const struct iio_chan_spec_ext_info cros_ec_sensors_ext_info[];
+
+#endif  /* __CROS_EC_SENSORS_CORE_H */
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 76f7ef4d3a0d..1f85b7aff097 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -148,6 +148,15 @@ struct cros_ec_device {
 	int event_size;
 };
 
+/**
+ * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information
+ *
+ * @sensor_num: Id of the sensor, as reported by the EC.
+ */
+struct cros_ec_sensor_platform {
+	u8 sensor_num;
+};
+
 /* struct cros_ec_platform - ChromeOS EC platform information
  *
  * @ec_name: name of EC device (e.g. 'cros-ec', 'cros-pd', ...)
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 76728ff37d01..8826e0f64b0e 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -1315,6 +1315,24 @@ enum motionsense_command {
 	 */
 	MOTIONSENSE_CMD_KB_WAKE_ANGLE = 5,
 
+	/*
+	 * Returns a single sensor data.
+	 */
+	MOTIONSENSE_CMD_DATA = 6,
+
+	/*
+	 * Perform low level calibration.. On sensors that support it, ask to
+	 * do offset calibration.
+	 */
+	MOTIONSENSE_CMD_PERFORM_CALIB = 10,
+
+	/*
+	 * Sensor Offset command is a setter/getter command for the offset used
+	 * for calibration. The offsets can be calculated by the host, or via
+	 * PERFORM_CALIB command.
+	 */
+	MOTIONSENSE_CMD_SENSOR_OFFSET = 11,
+
 	/* Number of motionsense sub-commands. */
 	MOTIONSENSE_NUM_CMDS
 };
@@ -1335,12 +1353,18 @@ enum motionsensor_id {
 enum motionsensor_type {
 	MOTIONSENSE_TYPE_ACCEL = 0,
 	MOTIONSENSE_TYPE_GYRO = 1,
+	MOTIONSENSE_TYPE_MAG = 2,
+	MOTIONSENSE_TYPE_PROX = 3,
+	MOTIONSENSE_TYPE_LIGHT = 4,
+	MOTIONSENSE_TYPE_ACTIVITY = 5,
+	MOTIONSENSE_TYPE_MAX
 };
 
 /* List of motion sensor locations. */
 enum motionsensor_location {
 	MOTIONSENSE_LOC_BASE = 0,
 	MOTIONSENSE_LOC_LID = 1,
+	MOTIONSENSE_LOC_MAX,
 };
 
 /* List of motion sensor chips. */
@@ -1361,6 +1385,31 @@ enum motionsensor_chip {
  */
 #define EC_MOTION_SENSE_NO_VALUE -1
 
+#define EC_MOTION_SENSE_INVALID_CALIB_TEMP 0x8000
+
+/* Set Calibration information */
+#define MOTION_SENSE_SET_OFFSET	1
+
+struct ec_response_motion_sensor_data {
+	/* Flags for each sensor. */
+	uint8_t flags;
+	/* Sensor number the data comes from */
+	uint8_t sensor_num;
+	/* Each sensor is up to 3-axis. */
+	union {
+		int16_t             data[3];
+		struct {
+			uint16_t    rsvd;
+			uint32_t    timestamp;
+		} __packed;
+		struct {
+			uint8_t     activity; /* motionsensor_activity */
+			uint8_t     state;
+			int16_t     add_info[2];
+		};
+	};
+} __packed;
+
 struct ec_params_motion_sense {
 	uint8_t cmd;
 	union {
@@ -1378,9 +1427,37 @@ struct ec_params_motion_sense {
 			int16_t data;
 		} ec_rate, kb_wake_angle;
 
+		/* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */
+		struct {
+			uint8_t sensor_num;
+
+			/*
+			 * bit 0: If set (MOTION_SENSE_SET_OFFSET), set
+			 * the calibration information in the EC.
+			 * If unset, just retrieve calibration information.
+			 */
+			uint16_t flags;
+
+			/*
+			 * Temperature at calibration, in units of 0.01 C
+			 * 0x8000: invalid / unknown.
+			 * 0x0: 0C
+			 * 0x7fff: +327.67C
+			 */
+			int16_t temp;
+
+			/*
+			 * Offset for calibration.
+			 * Unit:
+			 * Accelerometer: 1/1024 g
+			 * Gyro:          1/1024 deg/s
+			 * Compass:       1/16 uT
+			 */
+			int16_t offset[3];
+		} __packed sensor_offset;
+
 		/* Used for MOTIONSENSE_CMD_INFO. */
 		struct {
-			/* Should be element of enum motionsensor_id. */
 			uint8_t sensor_num;
 		} info;
 
@@ -1410,11 +1487,14 @@ struct ec_response_motion_sense {
 			/* Flags representing the motion sensor module. */
 			uint8_t module_flags;
 
-			/* Flags for each sensor in enum motionsensor_id. */
-			uint8_t sensor_flags[EC_MOTION_SENSOR_COUNT];
+			/* Number of sensors managed directly by the EC. */
+			uint8_t sensor_count;
 
-			/* Array of all sensor data. Each sensor is 3-axis. */
-			int16_t data[3*EC_MOTION_SENSOR_COUNT];
+			/*
+			 * Sensor data is truncated if response_max is too small
+			 * for holding all the data.
+			 */
+			struct ec_response_motion_sensor_data sensor[0];
 		} dump;
 
 		/* Used for MOTIONSENSE_CMD_INFO. */
@@ -1429,6 +1509,9 @@ struct ec_response_motion_sense {
 			uint8_t chip;
 		} info;
 
+		/* Used for MOTIONSENSE_CMD_DATA */
+		struct ec_response_motion_sensor_data data;
+
 		/*
 		 * Used for MOTIONSENSE_CMD_EC_RATE, MOTIONSENSE_CMD_SENSOR_ODR,
 		 * MOTIONSENSE_CMD_SENSOR_RANGE, and
@@ -1438,6 +1521,12 @@ struct ec_response_motion_sense {
 			/* Current value of the parameter queried. */
 			int32_t ret;
 		} ec_rate, sensor_odr, sensor_range, kb_wake_angle;
+
+		/* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */
+		struct {
+			int16_t temp;
+			int16_t offset[3];
+		} sensor_offset, perform_calib;
 	};
 } __packed;
 
-- 
cgit v1.2.3


From e4244ebddae27e9200146bba897f12a3950ce722 Mon Sep 17 00:00:00 2001
From: Vincent Palatin <vpalatin@chromium.org>
Date: Mon, 1 Aug 2016 11:54:37 +0200
Subject: platform/chrome: Introduce a new function to check EC features.

Use the EC_CMD_GET_FEATURES message to check the supported features for
each MCU.

Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
[tomeu: adapted to changes in mainline]
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
[enric: remove references to USB PD feature and do it more generic]
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
For the MFD changes:
  Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/platform/chrome/cros_ec_dev.c | 37 +++++++++++++++
 include/linux/mfd/cros_ec.h           |  1 +
 include/linux/mfd/cros_ec_commands.h  | 84 +++++++++++++++++++++++++++++++++++
 3 files changed, 122 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 8abd80dbcbed..7eb53078b03d 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -87,6 +87,41 @@ exit:
 	return ret;
 }
 
+static int cros_ec_check_features(struct cros_ec_dev *ec, int feature)
+{
+	struct cros_ec_command *msg;
+	int ret;
+
+	if (ec->features[0] == -1U && ec->features[1] == -1U) {
+		/* features bitmap not read yet */
+
+		msg = kmalloc(sizeof(*msg) + sizeof(ec->features), GFP_KERNEL);
+		if (!msg)
+			return -ENOMEM;
+
+		msg->version = 0;
+		msg->command = EC_CMD_GET_FEATURES + ec->cmd_offset;
+		msg->insize = sizeof(ec->features);
+		msg->outsize = 0;
+
+		ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+		if (ret < 0 || msg->result != EC_RES_SUCCESS) {
+			dev_warn(ec->dev, "cannot get EC features: %d/%d\n",
+				 ret, msg->result);
+			memset(ec->features, 0, sizeof(ec->features));
+		}
+
+		memcpy(ec->features, msg->data, sizeof(ec->features));
+
+		dev_dbg(ec->dev, "EC features %08x %08x\n",
+			ec->features[0], ec->features[1]);
+
+		kfree(msg);
+	}
+
+	return ec->features[feature / 32] & EC_FEATURE_MASK_0(feature);
+}
+
 /* Device file ops */
 static int ec_device_open(struct inode *inode, struct file *filp)
 {
@@ -245,6 +280,8 @@ static int ec_device_probe(struct platform_device *pdev)
 	ec->ec_dev = dev_get_drvdata(dev->parent);
 	ec->dev = dev;
 	ec->cmd_offset = ec_platform->cmd_offset;
+	ec->features[0] = -1U; /* Not cached yet */
+	ec->features[1] = -1U; /* Not cached yet */
 	device_initialize(&ec->class_dev);
 	cdev_init(&ec->cdev, &fops);
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 1f85b7aff097..f62043a75f43 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -184,6 +184,7 @@ struct cros_ec_dev {
 	struct cros_ec_device *ec_dev;
 	struct device *dev;
 	u16 cmd_offset;
+	u32 features[2];
 };
 
 /**
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 8826e0f64b0e..1683003603f3 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -713,6 +713,90 @@ struct ec_response_get_set_value {
 /* More than one command can use these structs to get/set paramters. */
 #define EC_CMD_GSV_PAUSE_IN_S5	0x0c
 
+/*****************************************************************************/
+/* List the features supported by the firmware */
+#define EC_CMD_GET_FEATURES  0x0d
+
+/* Supported features */
+enum ec_feature_code {
+	/*
+	 * This image contains a limited set of features. Another image
+	 * in RW partition may support more features.
+	 */
+	EC_FEATURE_LIMITED = 0,
+	/*
+	 * Commands for probing/reading/writing/erasing the flash in the
+	 * EC are present.
+	 */
+	EC_FEATURE_FLASH = 1,
+	/*
+	 * Can control the fan speed directly.
+	 */
+	EC_FEATURE_PWM_FAN = 2,
+	/*
+	 * Can control the intensity of the keyboard backlight.
+	 */
+	EC_FEATURE_PWM_KEYB = 3,
+	/*
+	 * Support Google lightbar, introduced on Pixel.
+	 */
+	EC_FEATURE_LIGHTBAR = 4,
+	/* Control of LEDs  */
+	EC_FEATURE_LED = 5,
+	/* Exposes an interface to control gyro and sensors.
+	 * The host goes through the EC to access these sensors.
+	 * In addition, the EC may provide composite sensors, like lid angle.
+	 */
+	EC_FEATURE_MOTION_SENSE = 6,
+	/* The keyboard is controlled by the EC */
+	EC_FEATURE_KEYB = 7,
+	/* The AP can use part of the EC flash as persistent storage. */
+	EC_FEATURE_PSTORE = 8,
+	/* The EC monitors BIOS port 80h, and can return POST codes. */
+	EC_FEATURE_PORT80 = 9,
+	/*
+	 * Thermal management: include TMP specific commands.
+	 * Higher level than direct fan control.
+	 */
+	EC_FEATURE_THERMAL = 10,
+	/* Can switch the screen backlight on/off */
+	EC_FEATURE_BKLIGHT_SWITCH = 11,
+	/* Can switch the wifi module on/off */
+	EC_FEATURE_WIFI_SWITCH = 12,
+	/* Monitor host events, through for example SMI or SCI */
+	EC_FEATURE_HOST_EVENTS = 13,
+	/* The EC exposes GPIO commands to control/monitor connected devices. */
+	EC_FEATURE_GPIO = 14,
+	/* The EC can send i2c messages to downstream devices. */
+	EC_FEATURE_I2C = 15,
+	/* Command to control charger are included */
+	EC_FEATURE_CHARGER = 16,
+	/* Simple battery support. */
+	EC_FEATURE_BATTERY = 17,
+	/*
+	 * Support Smart battery protocol
+	 * (Common Smart Battery System Interface Specification)
+	 */
+	EC_FEATURE_SMART_BATTERY = 18,
+	/* EC can dectect when the host hangs. */
+	EC_FEATURE_HANG_DETECT = 19,
+	/* Report power information, for pit only */
+	EC_FEATURE_PMU = 20,
+	/* Another Cros EC device is present downstream of this one */
+	EC_FEATURE_SUB_MCU = 21,
+	/* Support USB Power delivery (PD) commands */
+	EC_FEATURE_USB_PD = 22,
+	/* Control USB multiplexer, for audio through USB port for instance. */
+	EC_FEATURE_USB_MUX = 23,
+	/* Motion Sensor code has an internal software FIFO */
+	EC_FEATURE_MOTION_SENSE_FIFO = 24,
+};
+
+#define EC_FEATURE_MASK_0(event_code) (1UL << (event_code % 32))
+#define EC_FEATURE_MASK_1(event_code) (1UL << (event_code - 32))
+struct ec_response_get_features {
+	uint32_t flags[2];
+} __packed;
 
 /*****************************************************************************/
 /* Flash commands */
-- 
cgit v1.2.3


From 0aced355757ddc150f78a6bf4f8d885bd4eaf0e2 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Mon, 19 Sep 2016 13:09:02 +0530
Subject: mfd: tps65218: Remove redundant read wrapper

Currently read directly calls the repmap read function. Hence
remove the redundant wrapper and use regmap read wherever
needed.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/gpio/gpio-tps65218.c           |  3 ++-
 drivers/mfd/tps65218.c                 | 18 ++----------------
 drivers/regulator/tps65218-regulator.c |  5 +++--
 include/linux/mfd/tps65218.h           |  2 --
 4 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-tps65218.c b/drivers/gpio/gpio-tps65218.c
index d779307a9685..46e6dcc089cb 100644
--- a/drivers/gpio/gpio-tps65218.c
+++ b/drivers/gpio/gpio-tps65218.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/gpio/driver.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 #include <linux/mfd/tps65218.h>
 
 struct tps65218_gpio {
@@ -30,7 +31,7 @@ static int tps65218_gpio_get(struct gpio_chip *gc, unsigned offset)
 	unsigned int val;
 	int ret;
 
-	ret = tps65218_reg_read(tps65218, TPS65218_REG_ENABLE2, &val);
+	ret = regmap_read(tps65218->regmap, TPS65218_REG_ENABLE2, &val);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c
index ba610adbdbff..9bca1b1b60ce 100644
--- a/drivers/mfd/tps65218.c
+++ b/drivers/mfd/tps65218.c
@@ -33,20 +33,6 @@
 
 #define TPS65218_PASSWORD_REGS_UNLOCK   0x7D
 
-/**
- * tps65218_reg_read: Read a single tps65218 register.
- *
- * @tps: Device to read from.
- * @reg: Register to read.
- * @val: Contians the value
- */
-int tps65218_reg_read(struct tps65218 *tps, unsigned int reg,
-			unsigned int *val)
-{
-	return regmap_read(tps->regmap, reg, val);
-}
-EXPORT_SYMBOL_GPL(tps65218_reg_read);
-
 /**
  * tps65218_reg_write: Write a single tps65218 register.
  *
@@ -93,7 +79,7 @@ static int tps65218_update_bits(struct tps65218 *tps, unsigned int reg,
 	int ret;
 	unsigned int data;
 
-	ret = tps65218_reg_read(tps, reg, &data);
+	ret = regmap_read(tps->regmap, reg, &data);
 	if (ret) {
 		dev_err(tps->dev, "Read from reg 0x%x failed\n", reg);
 		return ret;
@@ -251,7 +237,7 @@ static int tps65218_probe(struct i2c_client *client,
 	if (ret < 0)
 		return ret;
 
-	ret = tps65218_reg_read(tps, TPS65218_REG_CHIPID, &chipid);
+	ret = regmap_read(tps->regmap, TPS65218_REG_CHIPID, &chipid);
 	if (ret) {
 		dev_err(tps->dev, "Failed to read chipid: %d\n", ret);
 		return ret;
diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c
index eb0f5b13841a..ae16caf4151c 100644
--- a/drivers/regulator/tps65218-regulator.c
+++ b/drivers/regulator/tps65218-regulator.c
@@ -22,6 +22,7 @@
 #include <linux/err.h>
 #include <linux/platform_device.h>
 #include <linux/of_device.h>
+#include <linux/regmap.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
@@ -272,7 +273,7 @@ static int tps65218_pmic_get_current_limit(struct regulator_dev *dev)
 	unsigned int index;
 	struct tps65218 *tps = rdev_get_drvdata(dev);
 
-	retval = tps65218_reg_read(tps, dev->desc->csel_reg, &index);
+	retval = regmap_read(tps->regmap, dev->desc->csel_reg, &index);
 	if (retval < 0)
 		return retval;
 
@@ -383,7 +384,7 @@ static int tps65218_regulator_probe(struct platform_device *pdev)
 		return PTR_ERR(rdev);
 	}
 
-	ret = tps65218_reg_read(tps, regulators[id].bypass_reg, &val);
+	ret = regmap_read(tps->regmap, regulators[id].bypass_reg, &val);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h
index d1db9527fab5..51bef539091c 100644
--- a/include/linux/mfd/tps65218.h
+++ b/include/linux/mfd/tps65218.h
@@ -284,8 +284,6 @@ struct tps65218 {
 	struct regmap *regmap;
 };
 
-int tps65218_reg_read(struct tps65218 *tps, unsigned int reg,
-					unsigned int *val);
 int tps65218_reg_write(struct tps65218 *tps, unsigned int reg,
 			unsigned int val, unsigned int level);
 int tps65218_set_bits(struct tps65218 *tps, unsigned int reg,
-- 
cgit v1.2.3


From 2dc4940360d4c0c38aa9275532c7c0d7542f6258 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Mon, 19 Sep 2016 13:09:06 +0530
Subject: regulator: tps65218: Remove all the compatibles

Remove all the individual compatibles for all the regulators
and introduce id_table and update the driver accordingly
to parse device tree nodes using the regulator framework.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/regulator/tps65218-regulator.c | 150 ++++++++++++---------------------
 include/linux/mfd/tps65218.h           |   1 +
 2 files changed, 57 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c
index ae16caf4151c..9aafbb03482d 100644
--- a/drivers/regulator/tps65218-regulator.c
+++ b/drivers/regulator/tps65218-regulator.c
@@ -31,10 +31,11 @@
 enum tps65218_regulators { DCDC1, DCDC2, DCDC3, DCDC4,
 			   DCDC5, DCDC6, LDO1, LS3 };
 
-#define TPS65218_REGULATOR(_name, _id, _type, _ops, _n, _vr, _vm, _er, _em, \
-			   _cr, _cm, _lr, _nlr, _delay, _fuv, _sr, _sm)	\
+#define TPS65218_REGULATOR(_name, _of, _id, _type, _ops, _n, _vr, _vm, _er, \
+			   _em, _cr, _cm, _lr, _nlr, _delay, _fuv, _sr, _sm) \
 	{							\
 		.name			= _name,		\
+		.of_match		= _of,			\
 		.id			= _id,			\
 		.ops			= &_ops,		\
 		.n_voltages		= _n,			\
@@ -55,14 +56,6 @@ enum tps65218_regulators { DCDC1, DCDC2, DCDC3, DCDC4,
 		.bypass_mask	= _sm,				\
 	}							\
 
-#define TPS65218_INFO(_id, _nm, _min, _max)	\
-	[_id] = {					\
-		.id		= _id,			\
-		.name		= _nm,			\
-		.min_uV		= _min,			\
-		.max_uV		= _max,			\
-	}
-
 static const struct regulator_linear_range dcdc1_dcdc2_ranges[] = {
 	REGULATOR_LINEAR_RANGE(850000, 0x0, 0x32, 10000),
 	REGULATOR_LINEAR_RANGE(1375000, 0x33, 0x3f, 25000),
@@ -78,36 +71,6 @@ static const struct regulator_linear_range dcdc4_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1600000, 0x10, 0x34, 50000),
 };
 
-static struct tps_info tps65218_pmic_regs[] = {
-	TPS65218_INFO(DCDC1, "DCDC1", 850000, 1675000),
-	TPS65218_INFO(DCDC2, "DCDC2", 850000, 1675000),
-	TPS65218_INFO(DCDC3, "DCDC3", 900000, 3400000),
-	TPS65218_INFO(DCDC4, "DCDC4", 1175000, 3400000),
-	TPS65218_INFO(DCDC5, "DCDC5", 1000000, 1000000),
-	TPS65218_INFO(DCDC6, "DCDC6", 1800000, 1800000),
-	TPS65218_INFO(LDO1, "LDO1", 900000, 3400000),
-	TPS65218_INFO(LS3, "LS3", -1, -1),
-};
-
-#define TPS65218_OF_MATCH(comp, label) \
-	{ \
-		.compatible = comp, \
-		.data = &label, \
-	}
-
-static const struct of_device_id tps65218_of_match[] = {
-	TPS65218_OF_MATCH("ti,tps65218-dcdc1", tps65218_pmic_regs[DCDC1]),
-	TPS65218_OF_MATCH("ti,tps65218-dcdc2", tps65218_pmic_regs[DCDC2]),
-	TPS65218_OF_MATCH("ti,tps65218-dcdc3", tps65218_pmic_regs[DCDC3]),
-	TPS65218_OF_MATCH("ti,tps65218-dcdc4", tps65218_pmic_regs[DCDC4]),
-	TPS65218_OF_MATCH("ti,tps65218-dcdc5", tps65218_pmic_regs[DCDC5]),
-	TPS65218_OF_MATCH("ti,tps65218-dcdc6", tps65218_pmic_regs[DCDC6]),
-	TPS65218_OF_MATCH("ti,tps65218-ldo1", tps65218_pmic_regs[LDO1]),
-	TPS65218_OF_MATCH("ti,tps65218-ls3", tps65218_pmic_regs[LS3]),
-	{ }
-};
-MODULE_DEVICE_TABLE(of, tps65218_of_match);
-
 static int tps65218_pmic_set_voltage_sel(struct regulator_dev *dev,
 					 unsigned selector)
 {
@@ -189,7 +152,7 @@ static int tps65218_pmic_set_suspend_disable(struct regulator_dev *dev)
 	if (rid == TPS65218_DCDC_3 && tps->rev == TPS65218_REV_2_1)
 		return 0;
 
-	if (!tps->info[rid]->strobe) {
+	if (!tps->strobes[rid]) {
 		if (rid == TPS65218_DCDC_3)
 			tps->info[rid]->strobe = 3;
 		else
@@ -198,8 +161,7 @@ static int tps65218_pmic_set_suspend_disable(struct regulator_dev *dev)
 
 	return tps65218_set_bits(tps, dev->desc->bypass_reg,
 				 dev->desc->bypass_mask,
-				 tps->info[rid]->strobe,
-				 TPS65218_PROTECT_L1);
+				 tps->strobes[rid], TPS65218_PROTECT_L1);
 }
 
 /* Operations permitted on DCDC1, DCDC2 */
@@ -301,104 +263,104 @@ static struct regulator_ops tps65218_dcdc56_pmic_ops = {
 };
 
 static const struct regulator_desc regulators[] = {
-	TPS65218_REGULATOR("DCDC1", TPS65218_DCDC_1, REGULATOR_VOLTAGE,
-			   tps65218_dcdc12_ops, 64, TPS65218_REG_CONTROL_DCDC1,
+	TPS65218_REGULATOR("DCDC1", "regulator-dcdc1", TPS65218_DCDC_1,
+			   REGULATOR_VOLTAGE, tps65218_dcdc12_ops, 64,
+			   TPS65218_REG_CONTROL_DCDC1,
 			   TPS65218_CONTROL_DCDC1_MASK, TPS65218_REG_ENABLE1,
 			   TPS65218_ENABLE1_DC1_EN, 0, 0, dcdc1_dcdc2_ranges,
 			   2, 4000, 0, TPS65218_REG_SEQ3,
 			   TPS65218_SEQ3_DC1_SEQ_MASK),
-	TPS65218_REGULATOR("DCDC2", TPS65218_DCDC_2, REGULATOR_VOLTAGE,
-			   tps65218_dcdc12_ops, 64, TPS65218_REG_CONTROL_DCDC2,
+	TPS65218_REGULATOR("DCDC2", "regulator-dcdc2", TPS65218_DCDC_2,
+			   REGULATOR_VOLTAGE, tps65218_dcdc12_ops, 64,
+			   TPS65218_REG_CONTROL_DCDC2,
 			   TPS65218_CONTROL_DCDC2_MASK, TPS65218_REG_ENABLE1,
 			   TPS65218_ENABLE1_DC2_EN, 0, 0, dcdc1_dcdc2_ranges,
 			   2, 4000, 0, TPS65218_REG_SEQ3,
 			   TPS65218_SEQ3_DC2_SEQ_MASK),
-	TPS65218_REGULATOR("DCDC3", TPS65218_DCDC_3, REGULATOR_VOLTAGE,
-			   tps65218_ldo1_dcdc34_ops, 64,
+	TPS65218_REGULATOR("DCDC3", "regulator-dcdc3", TPS65218_DCDC_3,
+			   REGULATOR_VOLTAGE, tps65218_ldo1_dcdc34_ops, 64,
 			   TPS65218_REG_CONTROL_DCDC3,
 			   TPS65218_CONTROL_DCDC3_MASK, TPS65218_REG_ENABLE1,
 			   TPS65218_ENABLE1_DC3_EN, 0, 0, ldo1_dcdc3_ranges, 2,
 			   0, 0, TPS65218_REG_SEQ4, TPS65218_SEQ4_DC3_SEQ_MASK),
-	TPS65218_REGULATOR("DCDC4", TPS65218_DCDC_4, REGULATOR_VOLTAGE,
-			   tps65218_ldo1_dcdc34_ops, 53,
+	TPS65218_REGULATOR("DCDC4", "regulator-dcdc4", TPS65218_DCDC_4,
+			   REGULATOR_VOLTAGE, tps65218_ldo1_dcdc34_ops, 53,
 			   TPS65218_REG_CONTROL_DCDC4,
 			   TPS65218_CONTROL_DCDC4_MASK, TPS65218_REG_ENABLE1,
 			   TPS65218_ENABLE1_DC4_EN, 0, 0, dcdc4_ranges, 2,
 			   0, 0, TPS65218_REG_SEQ4, TPS65218_SEQ4_DC4_SEQ_MASK),
-	TPS65218_REGULATOR("DCDC5", TPS65218_DCDC_5, REGULATOR_VOLTAGE,
-			   tps65218_dcdc56_pmic_ops, 1, -1, -1,
-			   TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC5_EN, 0, 0,
-			   NULL, 0, 0, 1000000, TPS65218_REG_SEQ5,
+	TPS65218_REGULATOR("DCDC5", "regulator-dcdc5", TPS65218_DCDC_5,
+			   REGULATOR_VOLTAGE, tps65218_dcdc56_pmic_ops, 1, -1,
+			   -1, TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC5_EN, 0,
+			   0, NULL, 0, 0, 1000000, TPS65218_REG_SEQ5,
 			   TPS65218_SEQ5_DC5_SEQ_MASK),
-	TPS65218_REGULATOR("DCDC6", TPS65218_DCDC_6, REGULATOR_VOLTAGE,
-			   tps65218_dcdc56_pmic_ops, 1, -1, -1,
-			   TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC6_EN, 0, 0,
-			   NULL, 0, 0, 1800000, TPS65218_REG_SEQ5,
+	TPS65218_REGULATOR("DCDC6", "regulator-dcdc6", TPS65218_DCDC_6,
+			   REGULATOR_VOLTAGE, tps65218_dcdc56_pmic_ops, 1, -1,
+			   -1, TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC6_EN, 0,
+			   0, NULL, 0, 0, 1800000, TPS65218_REG_SEQ5,
 			   TPS65218_SEQ5_DC6_SEQ_MASK),
-	TPS65218_REGULATOR("LDO1", TPS65218_LDO_1, REGULATOR_VOLTAGE,
-			   tps65218_ldo1_dcdc34_ops, 64,
+	TPS65218_REGULATOR("LDO1", "regulator-ldo1", TPS65218_LDO_1,
+			   REGULATOR_VOLTAGE, tps65218_ldo1_dcdc34_ops, 64,
 			   TPS65218_REG_CONTROL_LDO1,
 			   TPS65218_CONTROL_LDO1_MASK, TPS65218_REG_ENABLE2,
 			   TPS65218_ENABLE2_LDO1_EN, 0, 0, ldo1_dcdc3_ranges,
 			   2, 0, 0, TPS65218_REG_SEQ6,
 			   TPS65218_SEQ6_LDO1_SEQ_MASK),
-	TPS65218_REGULATOR("LS3", TPS65218_LS_3, REGULATOR_CURRENT,
-			   tps65218_ls3_ops, 0, 0, 0, TPS65218_REG_ENABLE2,
-			   TPS65218_ENABLE2_LS3_EN, TPS65218_REG_CONFIG2,
-			   TPS65218_CONFIG2_LS3ILIM_MASK, NULL, 0, 0, 0, 0, 0),
+	TPS65218_REGULATOR("LS3", "regulator-ls3", TPS65218_LS_3,
+			   REGULATOR_CURRENT, tps65218_ls3_ops, 0, 0, 0,
+			   TPS65218_REG_ENABLE2, TPS65218_ENABLE2_LS3_EN,
+			   TPS65218_REG_CONFIG2, TPS65218_CONFIG2_LS3ILIM_MASK,
+			   NULL, 0, 0, 0, 0, 0),
 };
 
 static int tps65218_regulator_probe(struct platform_device *pdev)
 {
 	struct tps65218 *tps = dev_get_drvdata(pdev->dev.parent);
-	struct regulator_init_data *init_data;
-	const struct tps_info	*template;
 	struct regulator_dev *rdev;
-	const struct of_device_id	*match;
 	struct regulator_config config = { };
-	int id, ret;
+	int i, ret;
 	unsigned int val;
 
-	match = of_match_device(tps65218_of_match, &pdev->dev);
-	if (!match)
-		return -ENODEV;
-
-	template = match->data;
-	id = template->id;
-	init_data = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node,
-					       &regulators[id]);
-
-	platform_set_drvdata(pdev, tps);
-
-	tps->info[id] = &tps65218_pmic_regs[id];
 	config.dev = &pdev->dev;
-	config.init_data = init_data;
+	config.dev->of_node = tps->dev->of_node;
 	config.driver_data = tps;
 	config.regmap = tps->regmap;
-	config.of_node = pdev->dev.of_node;
 
-	rdev = devm_regulator_register(&pdev->dev, &regulators[id], &config);
-	if (IS_ERR(rdev)) {
-		dev_err(tps->dev, "failed to register %s regulator\n",
-			pdev->name);
-		return PTR_ERR(rdev);
-	}
+	/* Allocate memory for strobes */
+	tps->strobes = devm_kzalloc(&pdev->dev, sizeof(u8) *
+				    TPS65218_NUM_REGULATOR, GFP_KERNEL);
 
-	ret = regmap_read(tps->regmap, regulators[id].bypass_reg, &val);
-	if (ret)
-		return ret;
+	for (i = 0; i < ARRAY_SIZE(regulators); i++) {
+		rdev = devm_regulator_register(&pdev->dev, &regulators[i],
+					       &config);
+		if (IS_ERR(rdev)) {
+			dev_err(tps->dev, "failed to register %s regulator\n",
+				pdev->name);
+			return PTR_ERR(rdev);
+		}
 
-	tps->info[id]->strobe = val & regulators[id].bypass_mask;
+		ret = regmap_read(tps->regmap, regulators[i].bypass_reg, &val);
+		if (ret)
+			return ret;
+
+		tps->strobes[i] = val & regulators[i].bypass_mask;
+	}
 
 	return 0;
 }
 
+static const struct platform_device_id tps65218_regulator_id_table[] = {
+	{ "tps65218-regulator", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, tps65218_regulator_id_table);
+
 static struct platform_driver tps65218_regulator_driver = {
 	.driver = {
 		.name = "tps65218-pmic",
-		.of_match_table = tps65218_of_match,
 	},
 	.probe = tps65218_regulator_probe,
+	.id_table = tps65218_regulator_id_table,
 };
 
 module_platform_driver(tps65218_regulator_driver);
diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h
index 51bef539091c..bccd2d68b1e3 100644
--- a/include/linux/mfd/tps65218.h
+++ b/include/linux/mfd/tps65218.h
@@ -282,6 +282,7 @@ struct tps65218 {
 	struct regulator_desc desc[TPS65218_NUM_REGULATOR];
 	struct tps_info *info[TPS65218_NUM_REGULATOR];
 	struct regmap *regmap;
+	u8 *strobes;
 };
 
 int tps65218_reg_write(struct tps65218 *tps, unsigned int reg,
-- 
cgit v1.2.3


From e9a2ea5a1ba09c35258f3663842fb8d8cf2e00c2 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Sat, 22 Oct 2016 06:19:49 -0700
Subject: cacheinfo: Introduce cache id

Cache management software needs an id for each instance of a cache of
a particular type.

The current cacheinfo structure does not provide any information about
the underlying hardware so there is no way to expose it.

Hardware with cache management features provides means (cpuid, enumeration
etc.) to retrieve the hardware id of a particular cache instance. Cache
instances which share hardware have the same hardware id.

Add an 'id' field to struct cacheinfo to store this information. Expose
this information under the /sys/devices/system/cpu/cpu*/cache/index*/
directory as well.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Cc: "Ravi V Shankar" <ravi.v.shankar@intel.com>
Cc: "Tony Luck" <tony.luck@intel.com>
Cc: "David Carrillo-Cisneros" <davidcc@google.com>
Cc: "Sai Prakhya" <sai.praneeth.prakhya@intel.com>
Cc: "Peter Zijlstra" <peterz@infradead.org>
Cc: "Stephane Eranian" <eranian@google.com>
Cc: "Dave Hansen" <dave.hansen@intel.com>
Cc: "Shaohua Li" <shli@fb.com>
Cc: "Nilay Vaish" <nilayvaish@gmail.com>
Cc: "Vikas Shivappa" <vikas.shivappa@linux.intel.com>
Cc: "Ingo Molnar" <mingo@elte.hu>
Cc: "Borislav Petkov" <bp@suse.de>
Cc: "H. Peter Anvin" <h.peter.anvin@intel.com>
Link: http://lkml.kernel.org/r/1477142405-32078-3-git-send-email-fenghua.yu@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/base/cacheinfo.c  | 5 +++++
 include/linux/cacheinfo.h | 3 +++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index e9fd32e91668..00a9688043f4 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -233,6 +233,7 @@ static ssize_t file_name##_show(struct device *dev,		\
 	return sprintf(buf, "%u\n", this_leaf->object);		\
 }
 
+show_one(id, id);
 show_one(level, level);
 show_one(coherency_line_size, coherency_line_size);
 show_one(number_of_sets, number_of_sets);
@@ -314,6 +315,7 @@ static ssize_t write_policy_show(struct device *dev,
 	return n;
 }
 
+static DEVICE_ATTR_RO(id);
 static DEVICE_ATTR_RO(level);
 static DEVICE_ATTR_RO(type);
 static DEVICE_ATTR_RO(coherency_line_size);
@@ -327,6 +329,7 @@ static DEVICE_ATTR_RO(shared_cpu_list);
 static DEVICE_ATTR_RO(physical_line_partition);
 
 static struct attribute *cache_default_attrs[] = {
+	&dev_attr_id.attr,
 	&dev_attr_type.attr,
 	&dev_attr_level.attr,
 	&dev_attr_shared_cpu_map.attr,
@@ -350,6 +353,8 @@ cache_default_attrs_is_visible(struct kobject *kobj,
 	const struct cpumask *mask = &this_leaf->shared_cpu_map;
 	umode_t mode = attr->mode;
 
+	if ((attr == &dev_attr_id.attr) && (this_leaf->attributes & CACHE_ID))
+		return mode;
 	if ((attr == &dev_attr_type.attr) && this_leaf->type)
 		return mode;
 	if ((attr == &dev_attr_level.attr) && this_leaf->level)
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 2189935075b4..0bcbb674da9d 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -18,6 +18,7 @@ enum cache_type {
 
 /**
  * struct cacheinfo - represent a cache leaf node
+ * @id: This cache's id. It is unique among caches with the same (type, level).
  * @type: type of the cache - data, inst or unified
  * @level: represents the hierarchy in the multi-level cache
  * @coherency_line_size: size of each cache line usually representing
@@ -44,6 +45,7 @@ enum cache_type {
  * keeping, the remaining members form the core properties of the cache
  */
 struct cacheinfo {
+	unsigned int id;
 	enum cache_type type;
 	unsigned int level;
 	unsigned int coherency_line_size;
@@ -61,6 +63,7 @@ struct cacheinfo {
 #define CACHE_WRITE_ALLOCATE	BIT(3)
 #define CACHE_ALLOCATE_POLICY_MASK	\
 	(CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE)
+#define CACHE_ID		BIT(4)
 
 	struct device_node *of_node;
 	bool disable_sysfs;
-- 
cgit v1.2.3


From 3cf25904fe467aebeaa77d402b6cf3c6c5d6303b Mon Sep 17 00:00:00 2001
From: Xo Wang <xow@google.com>
Date: Fri, 21 Oct 2016 10:20:12 -0700
Subject: net: phy: broadcom: Update Auxiliary Control Register macros

Add the RXD-to-RXC skew (delay) time bit in the Miscellaneous Control
shadow register and a mask for the shadow selector field.

Remove a re-definition of MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL.

Signed-off-by: Xo Wang <xow@google.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index e3354b74286c..22c4421c916c 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -105,11 +105,12 @@
 #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA	0x0800
 
 #define MII_BCM54XX_AUXCTL_MISC_WREN	0x8000
+#define MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW	0x0100
 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX	0x0200
 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC	0x7000
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC	0x0007
 
-#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL	0x0000
+#define MII_BCM54XX_AUXCTL_SHDWSEL_MASK	0x0007
 
 /*
  * Broadcom LED source encodings.  These are used in BCM5461, BCM5481,
-- 
cgit v1.2.3


From d92ead16be405b6d52ff7b366d1c9865ccc684bd Mon Sep 17 00:00:00 2001
From: Xo Wang <xow@google.com>
Date: Fri, 21 Oct 2016 10:20:13 -0700
Subject: net: phy: broadcom: Add support for BCM54612E

This PHY has internal delays enabled after reset. This clears the
internal delay enables unless the interface specifically requests them.

Signed-off-by: Xo Wang <xow@google.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/brcmphy.h    |  1 +
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 870327efccf7..583ef8a2ec8d 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -337,6 +337,41 @@ static int bcm5481_config_aneg(struct phy_device *phydev)
 	return ret;
 }
 
+static int bcm54612e_config_aneg(struct phy_device *phydev)
+{
+	int ret;
+
+	/* First, auto-negotiate. */
+	ret = genphy_config_aneg(phydev);
+
+	/* Clear TX internal delay unless requested. */
+	if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) &&
+	    (phydev->interface != PHY_INTERFACE_MODE_RGMII_TXID)) {
+		/* Disable TXD to GTXCLK clock delay (default set) */
+		/* Bit 9 is the only field in shadow register 00011 */
+		bcm_phy_write_shadow(phydev, 0x03, 0);
+	}
+
+	/* Clear RX internal delay unless requested. */
+	if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) &&
+	    (phydev->interface != PHY_INTERFACE_MODE_RGMII_RXID)) {
+		u16 reg;
+
+		/* Errata: reads require filling in the write selector field */
+		bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC,
+				     MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC);
+		reg = phy_read(phydev, MII_BCM54XX_AUX_CTL);
+		/* Disable RXD to RXC delay (default set) */
+		reg &= ~MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW;
+		/* Clear shadow selector field */
+		reg &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MASK;
+		bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC,
+				     MII_BCM54XX_AUXCTL_MISC_WREN | reg);
+	}
+
+	return ret;
+}
+
 static int brcm_phy_setbits(struct phy_device *phydev, int reg, int set)
 {
 	int val;
@@ -484,6 +519,18 @@ static struct phy_driver broadcom_drivers[] = {
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm_phy_ack_intr,
 	.config_intr	= bcm_phy_config_intr,
+}, {
+	.phy_id		= PHY_ID_BCM54612E,
+	.phy_id_mask	= 0xfffffff0,
+	.name		= "Broadcom BCM54612E",
+	.features	= PHY_GBIT_FEATURES |
+			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= bcm54xx_config_init,
+	.config_aneg	= bcm54612e_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 }, {
 	.phy_id		= PHY_ID_BCM54616S,
 	.phy_id_mask	= 0xfffffff0,
@@ -600,6 +647,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
 	{ PHY_ID_BCM5411, 0xfffffff0 },
 	{ PHY_ID_BCM5421, 0xfffffff0 },
 	{ PHY_ID_BCM5461, 0xfffffff0 },
+	{ PHY_ID_BCM54612E, 0xfffffff0 },
 	{ PHY_ID_BCM54616S, 0xfffffff0 },
 	{ PHY_ID_BCM5464, 0xfffffff0 },
 	{ PHY_ID_BCM5481, 0xfffffff0 },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 22c4421c916c..60def78c4e12 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -18,6 +18,7 @@
 #define PHY_ID_BCM5421			0x002060e0
 #define PHY_ID_BCM5464			0x002060b0
 #define PHY_ID_BCM5461			0x002060c0
+#define PHY_ID_BCM54612E		0x03625e60
 #define PHY_ID_BCM54616S		0x03625d10
 #define PHY_ID_BCM57780			0x03625d90
 
-- 
cgit v1.2.3


From aa276781a64a5f15ecc21e920960c5b1f84e5fee Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 18 Oct 2016 18:08:34 -0500
Subject: firmware: Add basic support for TI System Control Interface (TI-SCI)
 protocol

Texas Instrument's System Control Interface (TI-SCI) Message Protocol
is used in Texas Instrument's System on Chip (SoC) such as those
in keystone family K2G SoC to communicate between various compute
processors with a central system controller entity.

TI-SCI message protocol provides support for management of various
hardware entities within the SoC. Add support driver to allow
communication with system controller entity within the SoC using the
mailbox client.

We introduce the basic registration and query capability for the
driver protocol as part of this change. Subsequent patches add in
functionality specific to the TI-SCI features.

Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 MAINTAINERS                            |   2 +
 drivers/firmware/Kconfig               |  15 +
 drivers/firmware/Makefile              |   1 +
 drivers/firmware/ti_sci.c              | 790 +++++++++++++++++++++++++++++++++
 drivers/firmware/ti_sci.h              |  93 ++++
 include/linux/soc/ti/ti_sci_protocol.h |  69 +++
 6 files changed, 970 insertions(+)
 create mode 100644 drivers/firmware/ti_sci.c
 create mode 100644 drivers/firmware/ti_sci.h
 create mode 100644 include/linux/soc/ti/ti_sci_protocol.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 2bafadf5747a..467b29fafaca 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11890,6 +11890,8 @@ M:	Santosh Shilimkar <ssantosh@kernel.org>
 L:	linux-arm-kernel@lists.infradead.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/arm/keystone/ti,sci.txt
+F:	drivers/firmware/ti_sci*
+F:	include/linux/soc/ti/ti_sci_protocol.h
 
 THANKO'S RAREMONO AM/FM/SW RADIO RECEIVER USB DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index bca172d42c74..9418d7d1b0a5 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -203,6 +203,21 @@ config QCOM_SCM_64
 	def_bool y
 	depends on QCOM_SCM && ARM64
 
+config TI_SCI_PROTOCOL
+	tristate "TI System Control Interface (TISCI) Message Protocol"
+	depends on TI_MESSAGE_MANAGER
+	help
+	  TI System Control Interface (TISCI) Message Protocol is used to manage
+	  compute systems such as ARM, DSP etc with the system controller in
+	  complex System on Chip(SoC) such as those found on certain keystone
+	  generation SoC from TI.
+
+	  System controller provides various facilities including power
+	  management function support.
+
+	  This protocol library is used by client drivers to use the features
+	  provided by the system controller.
+
 config HAVE_ARM_SMCCC
 	bool
 
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 898ac41fa8b3..dcb52c423151 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_QCOM_SCM)		+= qcom_scm.o
 obj-$(CONFIG_QCOM_SCM_64)	+= qcom_scm-64.o
 obj-$(CONFIG_QCOM_SCM_32)	+= qcom_scm-32.o
 CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch armv7-a\n.arch_extension sec,-DREQUIRES_SEC=1) -march=armv7-a
+obj-$(CONFIG_TI_SCI_PROTOCOL)	+= ti_sci.o
 
 obj-y				+= broadcom/
 obj-y				+= meson/
diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c
new file mode 100644
index 000000000000..5e99d7c18276
--- /dev/null
+++ b/drivers/firmware/ti_sci.c
@@ -0,0 +1,790 @@
+/*
+ * Texas Instruments System Control Interface Protocol Driver
+ *
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
+ *	Nishanth Menon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/bitmap.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/semaphore.h>
+#include <linux/slab.h>
+#include <linux/soc/ti/ti-msgmgr.h>
+#include <linux/soc/ti/ti_sci_protocol.h>
+
+#include "ti_sci.h"
+
+/* List of all TI SCI devices active in system */
+static LIST_HEAD(ti_sci_list);
+/* Protection for the entire list */
+static DEFINE_MUTEX(ti_sci_list_mutex);
+
+/**
+ * struct ti_sci_xfer - Structure representing a message flow
+ * @tx_message:	Transmit message
+ * @rx_len:	Receive message length
+ * @xfer_buf:	Preallocated buffer to store receive message
+ *		Since we work with request-ACK protocol, we can
+ *		reuse the same buffer for the rx path as we
+ *		use for the tx path.
+ * @done:	completion event
+ */
+struct ti_sci_xfer {
+	struct ti_msgmgr_message tx_message;
+	u8 rx_len;
+	u8 *xfer_buf;
+	struct completion done;
+};
+
+/**
+ * struct ti_sci_xfers_info - Structure to manage transfer information
+ * @sem_xfer_count:	Counting Semaphore for managing max simultaneous
+ *			Messages.
+ * @xfer_block:		Preallocated Message array
+ * @xfer_alloc_table:	Bitmap table for allocated messages.
+ *			Index of this bitmap table is also used for message
+ *			sequence identifier.
+ * @xfer_lock:		Protection for message allocation
+ */
+struct ti_sci_xfers_info {
+	struct semaphore sem_xfer_count;
+	struct ti_sci_xfer *xfer_block;
+	unsigned long *xfer_alloc_table;
+	/* protect transfer allocation */
+	spinlock_t xfer_lock;
+};
+
+/**
+ * struct ti_sci_desc - Description of SoC integration
+ * @host_id:		Host identifier representing the compute entity
+ * @max_rx_timeout_ms:	Timeout for communication with SoC (in Milliseconds)
+ * @max_msgs: Maximum number of messages that can be pending
+ *		  simultaneously in the system
+ * @max_msg_size: Maximum size of data per message that can be handled.
+ */
+struct ti_sci_desc {
+	u8 host_id;
+	int max_rx_timeout_ms;
+	int max_msgs;
+	int max_msg_size;
+};
+
+/**
+ * struct ti_sci_info - Structure representing a TI SCI instance
+ * @dev:	Device pointer
+ * @desc:	SoC description for this instance
+ * @d:		Debugfs file entry
+ * @debug_region: Memory region where the debug message are available
+ * @debug_region_size: Debug region size
+ * @debug_buffer: Buffer allocated to copy debug messages.
+ * @handle:	Instance of TI SCI handle to send to clients.
+ * @cl:		Mailbox Client
+ * @chan_tx:	Transmit mailbox channel
+ * @chan_rx:	Receive mailbox channel
+ * @minfo:	Message info
+ * @node:	list head
+ * @users:	Number of users of this instance
+ */
+struct ti_sci_info {
+	struct device *dev;
+	const struct ti_sci_desc *desc;
+	struct dentry *d;
+	void __iomem *debug_region;
+	char *debug_buffer;
+	size_t debug_region_size;
+	struct ti_sci_handle handle;
+	struct mbox_client cl;
+	struct mbox_chan *chan_tx;
+	struct mbox_chan *chan_rx;
+	struct ti_sci_xfers_info minfo;
+	struct list_head node;
+	/* protected by ti_sci_list_mutex */
+	int users;
+};
+
+#define cl_to_ti_sci_info(c)	container_of(c, struct ti_sci_info, cl)
+#define handle_to_ti_sci_info(h) container_of(h, struct ti_sci_info, handle)
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * ti_sci_debug_show() - Helper to dump the debug log
+ * @s:	sequence file pointer
+ * @unused:	unused.
+ *
+ * Return: 0
+ */
+static int ti_sci_debug_show(struct seq_file *s, void *unused)
+{
+	struct ti_sci_info *info = s->private;
+
+	memcpy_fromio(info->debug_buffer, info->debug_region,
+		      info->debug_region_size);
+	/*
+	 * We don't trust firmware to leave NULL terminated last byte (hence
+	 * we have allocated 1 extra 0 byte). Since we cannot guarantee any
+	 * specific data format for debug messages, We just present the data
+	 * in the buffer as is - we expect the messages to be self explanatory.
+	 */
+	seq_puts(s, info->debug_buffer);
+	return 0;
+}
+
+/**
+ * ti_sci_debug_open() - debug file open
+ * @inode:	inode pointer
+ * @file:	file pointer
+ *
+ * Return: result of single_open
+ */
+static int ti_sci_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ti_sci_debug_show, inode->i_private);
+}
+
+/* log file operations */
+static const struct file_operations ti_sci_debug_fops = {
+	.open = ti_sci_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * ti_sci_debugfs_create() - Create log debug file
+ * @pdev:	platform device pointer
+ * @info:	Pointer to SCI entity information
+ *
+ * Return: 0 if all went fine, else corresponding error.
+ */
+static int ti_sci_debugfs_create(struct platform_device *pdev,
+				 struct ti_sci_info *info)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	char debug_name[50] = "ti_sci_debug@";
+
+	/* Debug region is optional */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "debug_messages");
+	info->debug_region = devm_ioremap_resource(dev, res);
+	if (IS_ERR(info->debug_region))
+		return 0;
+	info->debug_region_size = resource_size(res);
+
+	info->debug_buffer = devm_kcalloc(dev, info->debug_region_size + 1,
+					  sizeof(char), GFP_KERNEL);
+	if (!info->debug_buffer)
+		return -ENOMEM;
+	/* Setup NULL termination */
+	info->debug_buffer[info->debug_region_size] = 0;
+
+	info->d = debugfs_create_file(strncat(debug_name, dev_name(dev),
+					      sizeof(debug_name)),
+				      0444, NULL, info, &ti_sci_debug_fops);
+	if (IS_ERR(info->d))
+		return PTR_ERR(info->d);
+
+	dev_dbg(dev, "Debug region => %p, size = %zu bytes, resource: %pr\n",
+		info->debug_region, info->debug_region_size, res);
+	return 0;
+}
+
+/**
+ * ti_sci_debugfs_destroy() - clean up log debug file
+ * @pdev:	platform device pointer
+ * @info:	Pointer to SCI entity information
+ */
+static void ti_sci_debugfs_destroy(struct platform_device *pdev,
+				   struct ti_sci_info *info)
+{
+	if (IS_ERR(info->debug_region))
+		return;
+
+	debugfs_remove(info->d);
+}
+#else /* CONFIG_DEBUG_FS */
+static inline int ti_sci_debugfs_create(struct platform_device *dev,
+					struct ti_sci_info *info)
+{
+	return 0;
+}
+
+static inline void ti_sci_debugfs_destroy(struct platform_device *dev,
+					  struct ti_sci_info *info)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * ti_sci_dump_header_dbg() - Helper to dump a message header.
+ * @dev:	Device pointer corresponding to the SCI entity
+ * @hdr:	pointer to header.
+ */
+static inline void ti_sci_dump_header_dbg(struct device *dev,
+					  struct ti_sci_msg_hdr *hdr)
+{
+	dev_dbg(dev, "MSGHDR:type=0x%04x host=0x%02x seq=0x%02x flags=0x%08x\n",
+		hdr->type, hdr->host, hdr->seq, hdr->flags);
+}
+
+/**
+ * ti_sci_rx_callback() - mailbox client callback for receive messages
+ * @cl:	client pointer
+ * @m:	mailbox message
+ *
+ * Processes one received message to appropriate transfer information and
+ * signals completion of the transfer.
+ *
+ * NOTE: This function will be invoked in IRQ context, hence should be
+ * as optimal as possible.
+ */
+static void ti_sci_rx_callback(struct mbox_client *cl, void *m)
+{
+	struct ti_sci_info *info = cl_to_ti_sci_info(cl);
+	struct device *dev = info->dev;
+	struct ti_sci_xfers_info *minfo = &info->minfo;
+	struct ti_msgmgr_message *mbox_msg = m;
+	struct ti_sci_msg_hdr *hdr = (struct ti_sci_msg_hdr *)mbox_msg->buf;
+	struct ti_sci_xfer *xfer;
+	u8 xfer_id;
+
+	xfer_id = hdr->seq;
+
+	/*
+	 * Are we even expecting this?
+	 * NOTE: barriers were implicit in locks used for modifying the bitmap
+	 */
+	if (!test_bit(xfer_id, minfo->xfer_alloc_table)) {
+		dev_err(dev, "Message for %d is not expected!\n", xfer_id);
+		return;
+	}
+
+	xfer = &minfo->xfer_block[xfer_id];
+
+	/* Is the message of valid length? */
+	if (mbox_msg->len > info->desc->max_msg_size) {
+		dev_err(dev, "Unable to handle %d xfer(max %d)\n",
+			mbox_msg->len, info->desc->max_msg_size);
+		ti_sci_dump_header_dbg(dev, hdr);
+		return;
+	}
+	if (mbox_msg->len < xfer->rx_len) {
+		dev_err(dev, "Recv xfer %d < expected %d length\n",
+			mbox_msg->len, xfer->rx_len);
+		ti_sci_dump_header_dbg(dev, hdr);
+		return;
+	}
+
+	ti_sci_dump_header_dbg(dev, hdr);
+	/* Take a copy to the rx buffer.. */
+	memcpy(xfer->xfer_buf, mbox_msg->buf, xfer->rx_len);
+	complete(&xfer->done);
+}
+
+/**
+ * ti_sci_get_one_xfer() - Allocate one message
+ * @info:	Pointer to SCI entity information
+ * @msg_type:	Message type
+ * @msg_flags:	Flag to set for the message
+ * @tx_message_size: transmit message size
+ * @rx_message_size: receive message size
+ *
+ * Helper function which is used by various command functions that are
+ * exposed to clients of this driver for allocating a message traffic event.
+ *
+ * This function can sleep depending on pending requests already in the system
+ * for the SCI entity. Further, this also holds a spinlock to maintain integrity
+ * of internal data structures.
+ *
+ * Return: 0 if all went fine, else corresponding error.
+ */
+static struct ti_sci_xfer *ti_sci_get_one_xfer(struct ti_sci_info *info,
+					       u16 msg_type, u32 msg_flags,
+					       size_t tx_message_size,
+					       size_t rx_message_size)
+{
+	struct ti_sci_xfers_info *minfo = &info->minfo;
+	struct ti_sci_xfer *xfer;
+	struct ti_sci_msg_hdr *hdr;
+	unsigned long flags;
+	unsigned long bit_pos;
+	u8 xfer_id;
+	int ret;
+	int timeout;
+
+	/* Ensure we have sane transfer sizes */
+	if (rx_message_size > info->desc->max_msg_size ||
+	    tx_message_size > info->desc->max_msg_size ||
+	    rx_message_size < sizeof(*hdr) || tx_message_size < sizeof(*hdr))
+		return ERR_PTR(-ERANGE);
+
+	/*
+	 * Ensure we have only controlled number of pending messages.
+	 * Ideally, we might just have to wait a single message, be
+	 * conservative and wait 5 times that..
+	 */
+	timeout = msecs_to_jiffies(info->desc->max_rx_timeout_ms) * 5;
+	ret = down_timeout(&minfo->sem_xfer_count, timeout);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	/* Keep the locked section as small as possible */
+	spin_lock_irqsave(&minfo->xfer_lock, flags);
+	bit_pos = find_first_zero_bit(minfo->xfer_alloc_table,
+				      info->desc->max_msgs);
+	set_bit(bit_pos, minfo->xfer_alloc_table);
+	spin_unlock_irqrestore(&minfo->xfer_lock, flags);
+
+	/*
+	 * We already ensured in probe that we can have max messages that can
+	 * fit in  hdr.seq - NOTE: this improves access latencies
+	 * to predictable O(1) access, BUT, it opens us to risk if
+	 * remote misbehaves with corrupted message sequence responses.
+	 * If that happens, we are going to be messed up anyways..
+	 */
+	xfer_id = (u8)bit_pos;
+
+	xfer = &minfo->xfer_block[xfer_id];
+
+	hdr = (struct ti_sci_msg_hdr *)xfer->tx_message.buf;
+	xfer->tx_message.len = tx_message_size;
+	xfer->rx_len = (u8)rx_message_size;
+
+	reinit_completion(&xfer->done);
+
+	hdr->seq = xfer_id;
+	hdr->type = msg_type;
+	hdr->host = info->desc->host_id;
+	hdr->flags = msg_flags;
+
+	return xfer;
+}
+
+/**
+ * ti_sci_put_one_xfer() - Release a message
+ * @minfo:	transfer info pointer
+ * @xfer:	message that was reserved by ti_sci_get_one_xfer
+ *
+ * This holds a spinlock to maintain integrity of internal data structures.
+ */
+static void ti_sci_put_one_xfer(struct ti_sci_xfers_info *minfo,
+				struct ti_sci_xfer *xfer)
+{
+	unsigned long flags;
+	struct ti_sci_msg_hdr *hdr;
+	u8 xfer_id;
+
+	hdr = (struct ti_sci_msg_hdr *)xfer->tx_message.buf;
+	xfer_id = hdr->seq;
+
+	/*
+	 * Keep the locked section as small as possible
+	 * NOTE: we might escape with smp_mb and no lock here..
+	 * but just be conservative and symmetric.
+	 */
+	spin_lock_irqsave(&minfo->xfer_lock, flags);
+	clear_bit(xfer_id, minfo->xfer_alloc_table);
+	spin_unlock_irqrestore(&minfo->xfer_lock, flags);
+
+	/* Increment the count for the next user to get through */
+	up(&minfo->sem_xfer_count);
+}
+
+/**
+ * ti_sci_do_xfer() - Do one transfer
+ * @info:	Pointer to SCI entity information
+ * @xfer:	Transfer to initiate and wait for response
+ *
+ * Return: -ETIMEDOUT in case of no response, if transmit error,
+ *	   return corresponding error, else if all goes well,
+ *	   return 0.
+ */
+static inline int ti_sci_do_xfer(struct ti_sci_info *info,
+				 struct ti_sci_xfer *xfer)
+{
+	int ret;
+	int timeout;
+	struct device *dev = info->dev;
+
+	ret = mbox_send_message(info->chan_tx, &xfer->tx_message);
+	if (ret < 0)
+		return ret;
+
+	ret = 0;
+
+	/* And we wait for the response. */
+	timeout = msecs_to_jiffies(info->desc->max_rx_timeout_ms);
+	if (!wait_for_completion_timeout(&xfer->done, timeout)) {
+		dev_err(dev, "Mbox timedout in resp(caller: %pF)\n",
+			(void *)_RET_IP_);
+		ret = -ETIMEDOUT;
+	}
+	/*
+	 * NOTE: we might prefer not to need the mailbox ticker to manage the
+	 * transfer queueing since the protocol layer queues things by itself.
+	 * Unfortunately, we have to kick the mailbox framework after we have
+	 * received our message.
+	 */
+	mbox_client_txdone(info->chan_tx, ret);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_get_revision() - command to get the revision of the SCI entity
+ * @info:	Pointer to SCI entity information
+ *
+ * Updates the SCI information in the internal data structure.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_get_revision(struct ti_sci_info *info)
+{
+	struct device *dev = info->dev;
+	struct ti_sci_handle *handle = &info->handle;
+	struct ti_sci_version_info *ver = &handle->version;
+	struct ti_sci_msg_resp_version *rev_info;
+	struct ti_sci_xfer *xfer;
+	int ret;
+
+	/* No need to setup flags since it is expected to respond */
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_VERSION,
+				   0x0, sizeof(struct ti_sci_msg_hdr),
+				   sizeof(*rev_info));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+
+	rev_info = (struct ti_sci_msg_resp_version *)xfer->xfer_buf;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	ver->abi_major = rev_info->abi_major;
+	ver->abi_minor = rev_info->abi_minor;
+	ver->firmware_revision = rev_info->firmware_revision;
+	strncpy(ver->firmware_description, rev_info->firmware_description,
+		sizeof(ver->firmware_description));
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+	return ret;
+}
+
+/**
+ * ti_sci_get_handle() - Get the TI SCI handle for a device
+ * @dev:	Pointer to device for which we want SCI handle
+ *
+ * NOTE: The function does not track individual clients of the framework
+ * and is expected to be maintained by caller of TI SCI protocol library.
+ * ti_sci_put_handle must be balanced with successful ti_sci_get_handle
+ * Return: pointer to handle if successful, else:
+ * -EPROBE_DEFER if the instance is not ready
+ * -ENODEV if the required node handler is missing
+ * -EINVAL if invalid conditions are encountered.
+ */
+const struct ti_sci_handle *ti_sci_get_handle(struct device *dev)
+{
+	struct device_node *ti_sci_np;
+	struct list_head *p;
+	struct ti_sci_handle *handle = NULL;
+	struct ti_sci_info *info;
+
+	if (!dev) {
+		pr_err("I need a device pointer\n");
+		return ERR_PTR(-EINVAL);
+	}
+	ti_sci_np = of_get_parent(dev->of_node);
+	if (!ti_sci_np) {
+		dev_err(dev, "No OF information\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	mutex_lock(&ti_sci_list_mutex);
+	list_for_each(p, &ti_sci_list) {
+		info = list_entry(p, struct ti_sci_info, node);
+		if (ti_sci_np == info->dev->of_node) {
+			handle = &info->handle;
+			info->users++;
+			break;
+		}
+	}
+	mutex_unlock(&ti_sci_list_mutex);
+	of_node_put(ti_sci_np);
+
+	if (!handle)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	return handle;
+}
+EXPORT_SYMBOL_GPL(ti_sci_get_handle);
+
+/**
+ * ti_sci_put_handle() - Release the handle acquired by ti_sci_get_handle
+ * @handle:	Handle acquired by ti_sci_get_handle
+ *
+ * NOTE: The function does not track individual clients of the framework
+ * and is expected to be maintained by caller of TI SCI protocol library.
+ * ti_sci_put_handle must be balanced with successful ti_sci_get_handle
+ *
+ * Return: 0 is successfully released
+ * if an error pointer was passed, it returns the error value back,
+ * if null was passed, it returns -EINVAL;
+ */
+int ti_sci_put_handle(const struct ti_sci_handle *handle)
+{
+	struct ti_sci_info *info;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	mutex_lock(&ti_sci_list_mutex);
+	if (!WARN_ON(!info->users))
+		info->users--;
+	mutex_unlock(&ti_sci_list_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ti_sci_put_handle);
+
+static void devm_ti_sci_release(struct device *dev, void *res)
+{
+	const struct ti_sci_handle **ptr = res;
+	const struct ti_sci_handle *handle = *ptr;
+	int ret;
+
+	ret = ti_sci_put_handle(handle);
+	if (ret)
+		dev_err(dev, "failed to put handle %d\n", ret);
+}
+
+/**
+ * devm_ti_sci_get_handle() - Managed get handle
+ * @dev:	device for which we want SCI handle for.
+ *
+ * NOTE: This releases the handle once the device resources are
+ * no longer needed. MUST NOT BE released with ti_sci_put_handle.
+ * The function does not track individual clients of the framework
+ * and is expected to be maintained by caller of TI SCI protocol library.
+ *
+ * Return: 0 if all went fine, else corresponding error.
+ */
+const struct ti_sci_handle *devm_ti_sci_get_handle(struct device *dev)
+{
+	const struct ti_sci_handle **ptr;
+	const struct ti_sci_handle *handle;
+
+	ptr = devres_alloc(devm_ti_sci_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+	handle = ti_sci_get_handle(dev);
+
+	if (!IS_ERR(handle)) {
+		*ptr = handle;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return handle;
+}
+EXPORT_SYMBOL_GPL(devm_ti_sci_get_handle);
+
+/* Description for K2G */
+static const struct ti_sci_desc ti_sci_pmmc_k2g_desc = {
+	.host_id = 2,
+	/* Conservative duration */
+	.max_rx_timeout_ms = 1000,
+	/* Limited by MBOX_TX_QUEUE_LEN. K2G can handle upto 128 messages! */
+	.max_msgs = 20,
+	.max_msg_size = 64,
+};
+
+static const struct of_device_id ti_sci_of_match[] = {
+	{.compatible = "ti,k2g-sci", .data = &ti_sci_pmmc_k2g_desc},
+	{ /* Sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, ti_sci_of_match);
+
+static int ti_sci_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *of_id;
+	const struct ti_sci_desc *desc;
+	struct ti_sci_xfer *xfer;
+	struct ti_sci_info *info = NULL;
+	struct ti_sci_xfers_info *minfo;
+	struct mbox_client *cl;
+	int ret = -EINVAL;
+	int i;
+
+	of_id = of_match_device(ti_sci_of_match, dev);
+	if (!of_id) {
+		dev_err(dev, "OF data missing\n");
+		return -EINVAL;
+	}
+	desc = of_id->data;
+
+	info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->dev = dev;
+	info->desc = desc;
+	INIT_LIST_HEAD(&info->node);
+	minfo = &info->minfo;
+
+	/*
+	 * Pre-allocate messages
+	 * NEVER allocate more than what we can indicate in hdr.seq
+	 * if we have data description bug, force a fix..
+	 */
+	if (WARN_ON(desc->max_msgs >=
+		    1 << 8 * sizeof(((struct ti_sci_msg_hdr *)0)->seq)))
+		return -EINVAL;
+
+	minfo->xfer_block = devm_kcalloc(dev,
+					 desc->max_msgs,
+					 sizeof(*minfo->xfer_block),
+					 GFP_KERNEL);
+	if (!minfo->xfer_block)
+		return -ENOMEM;
+
+	minfo->xfer_alloc_table = devm_kzalloc(dev,
+					       BITS_TO_LONGS(desc->max_msgs)
+					       * sizeof(unsigned long),
+					       GFP_KERNEL);
+	if (!minfo->xfer_alloc_table)
+		return -ENOMEM;
+	bitmap_zero(minfo->xfer_alloc_table, desc->max_msgs);
+
+	/* Pre-initialize the buffer pointer to pre-allocated buffers */
+	for (i = 0, xfer = minfo->xfer_block; i < desc->max_msgs; i++, xfer++) {
+		xfer->xfer_buf = devm_kcalloc(dev, 1, desc->max_msg_size,
+					      GFP_KERNEL);
+		if (!xfer->xfer_buf)
+			return -ENOMEM;
+
+		xfer->tx_message.buf = xfer->xfer_buf;
+		init_completion(&xfer->done);
+	}
+
+	ret = ti_sci_debugfs_create(pdev, info);
+	if (ret)
+		dev_warn(dev, "Failed to create debug file\n");
+
+	platform_set_drvdata(pdev, info);
+
+	cl = &info->cl;
+	cl->dev = dev;
+	cl->tx_block = false;
+	cl->rx_callback = ti_sci_rx_callback;
+	cl->knows_txdone = true;
+
+	spin_lock_init(&minfo->xfer_lock);
+	sema_init(&minfo->sem_xfer_count, desc->max_msgs);
+
+	info->chan_rx = mbox_request_channel_byname(cl, "rx");
+	if (IS_ERR(info->chan_rx)) {
+		ret = PTR_ERR(info->chan_rx);
+		goto out;
+	}
+
+	info->chan_tx = mbox_request_channel_byname(cl, "tx");
+	if (IS_ERR(info->chan_tx)) {
+		ret = PTR_ERR(info->chan_tx);
+		goto out;
+	}
+	ret = ti_sci_cmd_get_revision(info);
+	if (ret) {
+		dev_err(dev, "Unable to communicate with TISCI(%d)\n", ret);
+		goto out;
+	}
+
+	dev_info(dev, "ABI: %d.%d (firmware rev 0x%04x '%s')\n",
+		 info->handle.version.abi_major, info->handle.version.abi_minor,
+		 info->handle.version.firmware_revision,
+		 info->handle.version.firmware_description);
+
+	mutex_lock(&ti_sci_list_mutex);
+	list_add_tail(&info->node, &ti_sci_list);
+	mutex_unlock(&ti_sci_list_mutex);
+
+	return of_platform_populate(dev->of_node, NULL, NULL, dev);
+out:
+	if (!IS_ERR(info->chan_tx))
+		mbox_free_channel(info->chan_tx);
+	if (!IS_ERR(info->chan_rx))
+		mbox_free_channel(info->chan_rx);
+	debugfs_remove(info->d);
+	return ret;
+}
+
+static int ti_sci_remove(struct platform_device *pdev)
+{
+	struct ti_sci_info *info;
+	struct device *dev = &pdev->dev;
+	int ret = 0;
+
+	of_platform_depopulate(dev);
+
+	info = platform_get_drvdata(pdev);
+
+	mutex_lock(&ti_sci_list_mutex);
+	if (info->users)
+		ret = -EBUSY;
+	else
+		list_del(&info->node);
+	mutex_unlock(&ti_sci_list_mutex);
+
+	if (!ret) {
+		ti_sci_debugfs_destroy(pdev, info);
+
+		/* Safe to free channels since no more users */
+		mbox_free_channel(info->chan_tx);
+		mbox_free_channel(info->chan_rx);
+	}
+
+	return ret;
+}
+
+static struct platform_driver ti_sci_driver = {
+	.probe = ti_sci_probe,
+	.remove = ti_sci_remove,
+	.driver = {
+		   .name = "ti-sci",
+		   .of_match_table = of_match_ptr(ti_sci_of_match),
+	},
+};
+module_platform_driver(ti_sci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI System Control Interface(SCI) driver");
+MODULE_AUTHOR("Nishanth Menon");
+MODULE_ALIAS("platform:ti-sci");
diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h
new file mode 100644
index 000000000000..e9dc53f26e0e
--- /dev/null
+++ b/drivers/firmware/ti_sci.h
@@ -0,0 +1,93 @@
+/*
+ * Texas Instruments System Control Interface (TISCI) Protocol
+ *
+ * Communication protocol with TI SCI hardware
+ * The system works in a message response protocol
+ * See: http://processors.wiki.ti.com/index.php/TISCI for details
+ *
+ * Copyright (C)  2015-2016 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ *   Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the
+ *   distribution.
+ *
+ *   Neither the name of Texas Instruments Incorporated nor the names of
+ *   its contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __TI_SCI_H
+#define __TI_SCI_H
+
+/* Generic Messages */
+#define TI_SCI_MSG_ENABLE_WDT	0x0000
+#define TI_SCI_MSG_WAKE_RESET	0x0001
+#define TI_SCI_MSG_VERSION	0x0002
+#define TI_SCI_MSG_WAKE_REASON	0x0003
+#define TI_SCI_MSG_GOODBYE	0x0004
+
+/**
+ * struct ti_sci_msg_hdr - Generic Message Header for All messages and responses
+ * @type:	Type of messages: One of TI_SCI_MSG* values
+ * @host:	Host of the message
+ * @seq:	Message identifier indicating a transfer sequence
+ * @flags:	Flag for the message
+ */
+struct ti_sci_msg_hdr {
+	u16 type;
+	u8 host;
+	u8 seq;
+#define TI_SCI_MSG_FLAG(val)			(1 << (val))
+#define TI_SCI_FLAG_REQ_GENERIC_NORESPONSE	0x0
+#define TI_SCI_FLAG_REQ_ACK_ON_RECEIVED		TI_SCI_MSG_FLAG(0)
+#define TI_SCI_FLAG_REQ_ACK_ON_PROCESSED	TI_SCI_MSG_FLAG(1)
+#define TI_SCI_FLAG_RESP_GENERIC_NACK		0x0
+#define TI_SCI_FLAG_RESP_GENERIC_ACK		TI_SCI_MSG_FLAG(1)
+	/* Additional Flags */
+	u32 flags;
+} __packed;
+
+/**
+ * struct ti_sci_msg_resp_version - Response for a message
+ * @hdr:		Generic header
+ * @firmware_description: String describing the firmware
+ * @firmware_revision:	Firmware revision
+ * @abi_major:		Major version of the ABI that firmware supports
+ * @abi_minor:		Minor version of the ABI that firmware supports
+ *
+ * In general, ABI version changes follow the rule that minor version increments
+ * are backward compatible. Major revision changes in ABI may not be
+ * backward compatible.
+ *
+ * Response to a generic message with message type TI_SCI_MSG_VERSION
+ */
+struct ti_sci_msg_resp_version {
+	struct ti_sci_msg_hdr hdr;
+	char firmware_description[32];
+	u16 firmware_revision;
+	u8 abi_major;
+	u8 abi_minor;
+} __packed;
+
+#endif /* __TI_SCI_H */
diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
new file mode 100644
index 000000000000..e73483fd5327
--- /dev/null
+++ b/include/linux/soc/ti/ti_sci_protocol.h
@@ -0,0 +1,69 @@
+/*
+ * Texas Instruments System Control Interface Protocol
+ *
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
+ *	Nishanth Menon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TISCI_PROTOCOL_H
+#define __TISCI_PROTOCOL_H
+
+/**
+ * struct ti_sci_version_info - version information structure
+ * @abi_major:	Major ABI version. Change here implies risk of backward
+ *		compatibility break.
+ * @abi_minor:	Minor ABI version. Change here implies new feature addition,
+ *		or compatible change in ABI.
+ * @firmware_revision:	Firmware revision (not usually used).
+ * @firmware_description: Firmware description (not usually used).
+ */
+struct ti_sci_version_info {
+	u8 abi_major;
+	u8 abi_minor;
+	u16 firmware_revision;
+	char firmware_description[32];
+};
+
+/**
+ * struct ti_sci_handle - Handle returned to TI SCI clients for usage.
+ * @version:	structure containing version information
+ */
+struct ti_sci_handle {
+	struct ti_sci_version_info version;
+};
+
+#if IS_ENABLED(CONFIG_TI_SCI_PROTOCOL)
+const struct ti_sci_handle *ti_sci_get_handle(struct device *dev);
+int ti_sci_put_handle(const struct ti_sci_handle *handle);
+const struct ti_sci_handle *devm_ti_sci_get_handle(struct device *dev);
+
+#else	/* CONFIG_TI_SCI_PROTOCOL */
+
+static inline const struct ti_sci_handle *ti_sci_get_handle(struct device *dev)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline int ti_sci_put_handle(const struct ti_sci_handle *handle)
+{
+	return -EINVAL;
+}
+
+static inline
+const struct ti_sci_handle *devm_ti_sci_get_handle(struct device *dev)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+#endif	/* CONFIG_TI_SCI_PROTOCOL */
+
+#endif	/* __TISCI_PROTOCOL_H */
-- 
cgit v1.2.3


From 9e7d756da7a5b0cc756d1f512f3eaf261834180a Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 18 Oct 2016 18:08:35 -0500
Subject: firmware: ti_sci: Add support for Device control

Texas Instrument's System Control Interface (TI-SCI) Message Protocol
is used in Texas Instrument's System on Chip (SoC) such as those
in keystone family K2G SoC to communicate between various compute
processors with a central system controller entity.

TI-SCI message protocol provides support for management of various
hardware entitites within the SoC. Add support driver to allow
communication with system controller entity within the SoC using the
mailbox client.

We introduce the fundamental device management capability support to
the driver protocol as part of this change.

[d-gerlach@ti.com: Contributed device reset handling]
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/firmware/ti_sci.c              | 433 +++++++++++++++++++++++++++++++++
 drivers/firmware/ti_sci.h              |  98 ++++++++
 include/linux/soc/ti/ti_sci_protocol.h |  91 +++++++
 3 files changed, 622 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c
index 5e99d7c18276..c7b25ccf6f07 100644
--- a/drivers/firmware/ti_sci.c
+++ b/drivers/firmware/ti_sci.c
@@ -494,6 +494,437 @@ fail:
 	return ret;
 }
 
+/**
+ * ti_sci_is_response_ack() - Generic ACK/NACK message checkup
+ * @r:	pointer to response buffer
+ *
+ * Return: true if the response was an ACK, else returns false.
+ */
+static inline bool ti_sci_is_response_ack(void *r)
+{
+	struct ti_sci_msg_hdr *hdr = r;
+
+	return hdr->flags & TI_SCI_FLAG_RESP_GENERIC_ACK ? true : false;
+}
+
+/**
+ * ti_sci_set_device_state() - Set device state helper
+ * @handle:	pointer to TI SCI handle
+ * @id:		Device identifier
+ * @flags:	flags to setup for the device
+ * @state:	State to move the device to
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_set_device_state(const struct ti_sci_handle *handle,
+				   u32 id, u32 flags, u8 state)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_set_device_state *req;
+	struct ti_sci_msg_hdr *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_DEVICE_STATE,
+				   flags | TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_set_device_state *)xfer->xfer_buf;
+	req->id = id;
+	req->state = state;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf;
+
+	ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_get_device_state() - Get device state helper
+ * @handle:	Handle to the device
+ * @id:		Device Identifier
+ * @clcnt:	Pointer to Context Loss Count
+ * @resets:	pointer to resets
+ * @p_state:	pointer to p_state
+ * @c_state:	pointer to c_state
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_get_device_state(const struct ti_sci_handle *handle,
+				   u32 id,  u32 *clcnt,  u32 *resets,
+				    u8 *p_state,  u8 *c_state)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_get_device_state *req;
+	struct ti_sci_msg_resp_get_device_state *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle)
+		return -EINVAL;
+
+	if (!clcnt && !resets && !p_state && !c_state)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	/* Response is expected, so need of any flags */
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_DEVICE_STATE,
+				   0, sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_get_device_state *)xfer->xfer_buf;
+	req->id = id;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_resp_get_device_state *)xfer->xfer_buf;
+	if (!ti_sci_is_response_ack(resp)) {
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	if (clcnt)
+		*clcnt = resp->context_loss_count;
+	if (resets)
+		*resets = resp->resets;
+	if (p_state)
+		*p_state = resp->programmed_state;
+	if (c_state)
+		*c_state = resp->current_state;
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_get_device() - command to request for device managed by TISCI
+ * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * @id:		Device Identifier
+ *
+ * Request for the device - NOTE: the client MUST maintain integrity of
+ * usage count by balancing get_device with put_device. No refcounting is
+ * managed by driver for that purpose.
+ *
+ * NOTE: The request is for exclusive access for the processor.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_get_device(const struct ti_sci_handle *handle, u32 id)
+{
+	return ti_sci_set_device_state(handle, id,
+				       MSG_FLAG_DEVICE_EXCLUSIVE,
+				       MSG_DEVICE_SW_STATE_ON);
+}
+
+/**
+ * ti_sci_cmd_idle_device() - Command to idle a device managed by TISCI
+ * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * @id:		Device Identifier
+ *
+ * Request for the device - NOTE: the client MUST maintain integrity of
+ * usage count by balancing get_device with put_device. No refcounting is
+ * managed by driver for that purpose.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_idle_device(const struct ti_sci_handle *handle, u32 id)
+{
+	return ti_sci_set_device_state(handle, id,
+				       MSG_FLAG_DEVICE_EXCLUSIVE,
+				       MSG_DEVICE_SW_STATE_RETENTION);
+}
+
+/**
+ * ti_sci_cmd_put_device() - command to release a device managed by TISCI
+ * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * @id:		Device Identifier
+ *
+ * Request for the device - NOTE: the client MUST maintain integrity of
+ * usage count by balancing get_device with put_device. No refcounting is
+ * managed by driver for that purpose.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_put_device(const struct ti_sci_handle *handle, u32 id)
+{
+	return ti_sci_set_device_state(handle, id,
+				       0, MSG_DEVICE_SW_STATE_AUTO_OFF);
+}
+
+/**
+ * ti_sci_cmd_dev_is_valid() - Is the device valid
+ * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * @id:		Device Identifier
+ *
+ * Return: 0 if all went fine and the device ID is valid, else return
+ * appropriate error.
+ */
+static int ti_sci_cmd_dev_is_valid(const struct ti_sci_handle *handle, u32 id)
+{
+	u8 unused;
+
+	/* check the device state which will also tell us if the ID is valid */
+	return ti_sci_get_device_state(handle, id, NULL, NULL, NULL, &unused);
+}
+
+/**
+ * ti_sci_cmd_dev_get_clcnt() - Get context loss counter
+ * @handle:	Pointer to TISCI handle
+ * @id:		Device Identifier
+ * @count:	Pointer to Context Loss counter to populate
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_dev_get_clcnt(const struct ti_sci_handle *handle, u32 id,
+				    u32 *count)
+{
+	return ti_sci_get_device_state(handle, id, count, NULL, NULL, NULL);
+}
+
+/**
+ * ti_sci_cmd_dev_is_idle() - Check if the device is requested to be idle
+ * @handle:	Pointer to TISCI handle
+ * @id:		Device Identifier
+ * @r_state:	true if requested to be idle
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_dev_is_idle(const struct ti_sci_handle *handle, u32 id,
+				  bool *r_state)
+{
+	int ret;
+	u8 state;
+
+	if (!r_state)
+		return -EINVAL;
+
+	ret = ti_sci_get_device_state(handle, id, NULL, NULL, &state, NULL);
+	if (ret)
+		return ret;
+
+	*r_state = (state == MSG_DEVICE_SW_STATE_RETENTION);
+
+	return 0;
+}
+
+/**
+ * ti_sci_cmd_dev_is_stop() - Check if the device is requested to be stopped
+ * @handle:	Pointer to TISCI handle
+ * @id:		Device Identifier
+ * @r_state:	true if requested to be stopped
+ * @curr_state:	true if currently stopped.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_dev_is_stop(const struct ti_sci_handle *handle, u32 id,
+				  bool *r_state,  bool *curr_state)
+{
+	int ret;
+	u8 p_state, c_state;
+
+	if (!r_state && !curr_state)
+		return -EINVAL;
+
+	ret =
+	    ti_sci_get_device_state(handle, id, NULL, NULL, &p_state, &c_state);
+	if (ret)
+		return ret;
+
+	if (r_state)
+		*r_state = (p_state == MSG_DEVICE_SW_STATE_AUTO_OFF);
+	if (curr_state)
+		*curr_state = (c_state == MSG_DEVICE_HW_STATE_OFF);
+
+	return 0;
+}
+
+/**
+ * ti_sci_cmd_dev_is_on() - Check if the device is requested to be ON
+ * @handle:	Pointer to TISCI handle
+ * @id:		Device Identifier
+ * @r_state:	true if requested to be ON
+ * @curr_state:	true if currently ON and active
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_dev_is_on(const struct ti_sci_handle *handle, u32 id,
+				bool *r_state,  bool *curr_state)
+{
+	int ret;
+	u8 p_state, c_state;
+
+	if (!r_state && !curr_state)
+		return -EINVAL;
+
+	ret =
+	    ti_sci_get_device_state(handle, id, NULL, NULL, &p_state, &c_state);
+	if (ret)
+		return ret;
+
+	if (r_state)
+		*r_state = (p_state == MSG_DEVICE_SW_STATE_ON);
+	if (curr_state)
+		*curr_state = (c_state == MSG_DEVICE_HW_STATE_ON);
+
+	return 0;
+}
+
+/**
+ * ti_sci_cmd_dev_is_trans() - Check if the device is currently transitioning
+ * @handle:	Pointer to TISCI handle
+ * @id:		Device Identifier
+ * @curr_state:	true if currently transitioning.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_dev_is_trans(const struct ti_sci_handle *handle, u32 id,
+				   bool *curr_state)
+{
+	int ret;
+	u8 state;
+
+	if (!curr_state)
+		return -EINVAL;
+
+	ret = ti_sci_get_device_state(handle, id, NULL, NULL, NULL, &state);
+	if (ret)
+		return ret;
+
+	*curr_state = (state == MSG_DEVICE_HW_STATE_TRANS);
+
+	return 0;
+}
+
+/**
+ * ti_sci_cmd_set_device_resets() - command to set resets for device managed
+ *				    by TISCI
+ * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * @id:		Device Identifier
+ * @reset_state: Device specific reset bit field
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_set_device_resets(const struct ti_sci_handle *handle,
+					u32 id, u32 reset_state)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_set_device_resets *req;
+	struct ti_sci_msg_hdr *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_DEVICE_RESETS,
+				   TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_set_device_resets *)xfer->xfer_buf;
+	req->id = id;
+	req->resets = reset_state;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf;
+
+	ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_get_device_resets() - Get reset state for device managed
+ *				    by TISCI
+ * @handle:		Pointer to TISCI handle
+ * @id:			Device Identifier
+ * @reset_state:	Pointer to reset state to populate
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_get_device_resets(const struct ti_sci_handle *handle,
+					u32 id, u32 *reset_state)
+{
+	return ti_sci_get_device_state(handle, id, NULL, reset_state, NULL,
+				       NULL);
+}
+
+/*
+ * ti_sci_setup_ops() - Setup the operations structures
+ * @info:	pointer to TISCI pointer
+ */
+static void ti_sci_setup_ops(struct ti_sci_info *info)
+{
+	struct ti_sci_ops *ops = &info->handle.ops;
+	struct ti_sci_dev_ops *dops = &ops->dev_ops;
+
+	dops->get_device = ti_sci_cmd_get_device;
+	dops->idle_device = ti_sci_cmd_idle_device;
+	dops->put_device = ti_sci_cmd_put_device;
+
+	dops->is_valid = ti_sci_cmd_dev_is_valid;
+	dops->get_context_loss_count = ti_sci_cmd_dev_get_clcnt;
+	dops->is_idle = ti_sci_cmd_dev_is_idle;
+	dops->is_stop = ti_sci_cmd_dev_is_stop;
+	dops->is_on = ti_sci_cmd_dev_is_on;
+	dops->is_transitioning = ti_sci_cmd_dev_is_trans;
+	dops->set_device_resets = ti_sci_cmd_set_device_resets;
+	dops->get_device_resets = ti_sci_cmd_get_device_resets;
+}
+
 /**
  * ti_sci_get_handle() - Get the TI SCI handle for a device
  * @dev:	Pointer to device for which we want SCI handle
@@ -727,6 +1158,8 @@ static int ti_sci_probe(struct platform_device *pdev)
 		goto out;
 	}
 
+	ti_sci_setup_ops(info);
+
 	dev_info(dev, "ABI: %d.%d (firmware rev 0x%04x '%s')\n",
 		 info->handle.version.abi_major, info->handle.version.abi_minor,
 		 info->handle.version.firmware_revision,
diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h
index e9dc53f26e0e..29ce0532a7ca 100644
--- a/drivers/firmware/ti_sci.h
+++ b/drivers/firmware/ti_sci.h
@@ -47,6 +47,11 @@
 #define TI_SCI_MSG_WAKE_REASON	0x0003
 #define TI_SCI_MSG_GOODBYE	0x0004
 
+/* Device requests */
+#define TI_SCI_MSG_SET_DEVICE_STATE	0x0200
+#define TI_SCI_MSG_GET_DEVICE_STATE	0x0201
+#define TI_SCI_MSG_SET_DEVICE_RESETS	0x0202
+
 /**
  * struct ti_sci_msg_hdr - Generic Message Header for All messages and responses
  * @type:	Type of messages: One of TI_SCI_MSG* values
@@ -90,4 +95,97 @@ struct ti_sci_msg_resp_version {
 	u8 abi_minor;
 } __packed;
 
+/**
+ * struct ti_sci_msg_req_set_device_state - Set the desired state of the device
+ * @hdr:		Generic header
+ * @id:	Indicates which device to modify
+ * @reserved: Reserved space in message, must be 0 for backward compatibility
+ * @state: The desired state of the device.
+ *
+ * Certain flags can also be set to alter the device state:
+ * + MSG_FLAG_DEVICE_WAKE_ENABLED - Configure the device to be a wake source.
+ * The meaning of this flag will vary slightly from device to device and from
+ * SoC to SoC but it generally allows the device to wake the SoC out of deep
+ * suspend states.
+ * + MSG_FLAG_DEVICE_RESET_ISO - Enable reset isolation for this device.
+ * + MSG_FLAG_DEVICE_EXCLUSIVE - Claim this device exclusively. When passed
+ * with STATE_RETENTION or STATE_ON, it will claim the device exclusively.
+ * If another host already has this device set to STATE_RETENTION or STATE_ON,
+ * the message will fail. Once successful, other hosts attempting to set
+ * STATE_RETENTION or STATE_ON will fail.
+ *
+ * Request type is TI_SCI_MSG_SET_DEVICE_STATE, responded with a generic
+ * ACK/NACK message.
+ */
+struct ti_sci_msg_req_set_device_state {
+	/* Additional hdr->flags options */
+#define MSG_FLAG_DEVICE_WAKE_ENABLED	TI_SCI_MSG_FLAG(8)
+#define MSG_FLAG_DEVICE_RESET_ISO	TI_SCI_MSG_FLAG(9)
+#define MSG_FLAG_DEVICE_EXCLUSIVE	TI_SCI_MSG_FLAG(10)
+	struct ti_sci_msg_hdr hdr;
+	u32 id;
+	u32 reserved;
+
+#define MSG_DEVICE_SW_STATE_AUTO_OFF	0
+#define MSG_DEVICE_SW_STATE_RETENTION	1
+#define MSG_DEVICE_SW_STATE_ON		2
+	u8 state;
+} __packed;
+
+/**
+ * struct ti_sci_msg_req_get_device_state - Request to get device.
+ * @hdr:		Generic header
+ * @id:		Device Identifier
+ *
+ * Request type is TI_SCI_MSG_GET_DEVICE_STATE, responded device state
+ * information
+ */
+struct ti_sci_msg_req_get_device_state {
+	struct ti_sci_msg_hdr hdr;
+	u32 id;
+} __packed;
+
+/**
+ * struct ti_sci_msg_resp_get_device_state - Response to get device request.
+ * @hdr:		Generic header
+ * @context_loss_count: Indicates how many times the device has lost context. A
+ *	driver can use this monotonic counter to determine if the device has
+ *	lost context since the last time this message was exchanged.
+ * @resets: Programmed state of the reset lines.
+ * @programmed_state:	The state as programmed by set_device.
+ *			- Uses the MSG_DEVICE_SW_* macros
+ * @current_state:	The actual state of the hardware.
+ *
+ * Response to request TI_SCI_MSG_GET_DEVICE_STATE.
+ */
+struct ti_sci_msg_resp_get_device_state {
+	struct ti_sci_msg_hdr hdr;
+	u32 context_loss_count;
+	u32 resets;
+	u8 programmed_state;
+#define MSG_DEVICE_HW_STATE_OFF		0
+#define MSG_DEVICE_HW_STATE_ON		1
+#define MSG_DEVICE_HW_STATE_TRANS	2
+	u8 current_state;
+} __packed;
+
+/**
+ * struct ti_sci_msg_req_set_device_resets - Set the desired resets
+ *				configuration of the device
+ * @hdr:		Generic header
+ * @id:	Indicates which device to modify
+ * @resets: A bit field of resets for the device. The meaning, behavior,
+ *	and usage of the reset flags are device specific. 0 for a bit
+ *	indicates releasing the reset represented by that bit while 1
+ *	indicates keeping it held.
+ *
+ * Request type is TI_SCI_MSG_SET_DEVICE_RESETS, responded with a generic
+ * ACK/NACK message.
+ */
+struct ti_sci_msg_req_set_device_resets {
+	struct ti_sci_msg_hdr hdr;
+	u32 id;
+	u32 resets;
+} __packed;
+
 #endif /* __TI_SCI_H */
diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
index e73483fd5327..87fa73851471 100644
--- a/include/linux/soc/ti/ti_sci_protocol.h
+++ b/include/linux/soc/ti/ti_sci_protocol.h
@@ -33,12 +33,103 @@ struct ti_sci_version_info {
 	char firmware_description[32];
 };
 
+struct ti_sci_handle;
+
+/**
+ * struct ti_sci_dev_ops - Device control operations
+ * @get_device: Command to request for device managed by TISCI
+ *		Returns 0 for successful exclusive request, else returns
+ *		corresponding error message.
+ * @idle_device: Command to idle a device managed by TISCI
+ *		Returns 0 for successful exclusive request, else returns
+ *		corresponding error message.
+ * @put_device:	Command to release a device managed by TISCI
+ *		Returns 0 for successful release, else returns corresponding
+ *		error message.
+ * @is_valid:	Check if the device ID is a valid ID.
+ *		Returns 0 if the ID is valid, else returns corresponding error.
+ * @get_context_loss_count: Command to retrieve context loss counter - this
+ *		increments every time the device looses context. Overflow
+ *		is possible.
+ *		- count: pointer to u32 which will retrieve counter
+ *		Returns 0 for successful information request and count has
+ *		proper data, else returns corresponding error message.
+ * @is_idle:	Reports back about device idle state
+ *		- req_state: Returns requested idle state
+ *		Returns 0 for successful information request and req_state and
+ *		current_state has proper data, else returns corresponding error
+ *		message.
+ * @is_stop:	Reports back about device stop state
+ *		- req_state: Returns requested stop state
+ *		- current_state: Returns current stop state
+ *		Returns 0 for successful information request and req_state and
+ *		current_state has proper data, else returns corresponding error
+ *		message.
+ * @is_on:	Reports back about device ON(or active) state
+ *		- req_state: Returns requested ON state
+ *		- current_state: Returns current ON state
+ *		Returns 0 for successful information request and req_state and
+ *		current_state has proper data, else returns corresponding error
+ *		message.
+ * @is_transitioning: Reports back if the device is in the middle of transition
+ *		of state.
+ *		-current_state: Returns 'true' if currently transitioning.
+ * @set_device_resets: Command to configure resets for device managed by TISCI.
+ *		-reset_state: Device specific reset bit field
+ *		Returns 0 for successful request, else returns
+ *		corresponding error message.
+ * @get_device_resets: Command to read state of resets for device managed
+ *		by TISCI.
+ *		-reset_state: pointer to u32 which will retrieve resets
+ *		Returns 0 for successful request, else returns
+ *		corresponding error message.
+ *
+ * NOTE: for all these functions, the following parameters are generic in
+ * nature:
+ * -handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * -id:		Device Identifier
+ *
+ * Request for the device - NOTE: the client MUST maintain integrity of
+ * usage count by balancing get_device with put_device. No refcounting is
+ * managed by driver for that purpose.
+ */
+struct ti_sci_dev_ops {
+	int (*get_device)(const struct ti_sci_handle *handle, u32 id);
+	int (*idle_device)(const struct ti_sci_handle *handle, u32 id);
+	int (*put_device)(const struct ti_sci_handle *handle, u32 id);
+	int (*is_valid)(const struct ti_sci_handle *handle, u32 id);
+	int (*get_context_loss_count)(const struct ti_sci_handle *handle,
+				      u32 id, u32 *count);
+	int (*is_idle)(const struct ti_sci_handle *handle, u32 id,
+		       bool *requested_state);
+	int (*is_stop)(const struct ti_sci_handle *handle, u32 id,
+		       bool *req_state, bool *current_state);
+	int (*is_on)(const struct ti_sci_handle *handle, u32 id,
+		     bool *req_state, bool *current_state);
+	int (*is_transitioning)(const struct ti_sci_handle *handle, u32 id,
+				bool *current_state);
+	int (*set_device_resets)(const struct ti_sci_handle *handle, u32 id,
+				 u32 reset_state);
+	int (*get_device_resets)(const struct ti_sci_handle *handle, u32 id,
+				 u32 *reset_state);
+};
+
+/**
+ * struct ti_sci_ops - Function support for TI SCI
+ * @dev_ops:	Device specific operations
+ */
+struct ti_sci_ops {
+	struct ti_sci_dev_ops dev_ops;
+};
+
 /**
  * struct ti_sci_handle - Handle returned to TI SCI clients for usage.
  * @version:	structure containing version information
+ * @ops:	operations that are made available to TI SCI clients
  */
 struct ti_sci_handle {
 	struct ti_sci_version_info version;
+	struct ti_sci_ops ops;
 };
 
 #if IS_ENABLED(CONFIG_TI_SCI_PROTOCOL)
-- 
cgit v1.2.3


From 9f72322050e4762adde66619f048b7317ad12d77 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 18 Oct 2016 18:08:36 -0500
Subject: firmware: ti_sci: Add support for Clock control

Texas Instrument's System Control Interface (TI-SCI) Message Protocol
is used in Texas Instrument's System on Chip (SoC) such as those
in keystone family K2G SoC to communicate between various compute
processors with a central system controller entity.

TI-SCI message protocol provides support for management of various
hardware entities within the SoC. Add support driver to allow
communication with system controller entity within the SoC using the
mailbox client.

In general, we expect to function at a device level of abstraction,
however, for proper operation of hardware blocks, many clocks directly
supplying the hardware block needs to be queried or configured.

Introduce support for the set of SCI message protocol support that
provide us with this capability.

Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/firmware/ti_sci.c              | 685 +++++++++++++++++++++++++++++++++
 drivers/firmware/ti_sci.h              | 289 ++++++++++++++
 include/linux/soc/ti/ti_sci_protocol.h |  78 ++++
 3 files changed, 1052 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c
index c7b25ccf6f07..496a007e5c69 100644
--- a/drivers/firmware/ti_sci.c
+++ b/drivers/firmware/ti_sci.c
@@ -902,6 +902,675 @@ static int ti_sci_cmd_get_device_resets(const struct ti_sci_handle *handle,
 				       NULL);
 }
 
+/**
+ * ti_sci_set_clock_state() - Set clock state helper
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @flags:	Header flags as needed
+ * @state:	State to request for the clock.
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_set_clock_state(const struct ti_sci_handle *handle,
+				  u32 dev_id, u8 clk_id,
+				  u32 flags, u8 state)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_set_clock_state *req;
+	struct ti_sci_msg_hdr *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_CLOCK_STATE,
+				   flags | TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_set_clock_state *)xfer->xfer_buf;
+	req->dev_id = dev_id;
+	req->clk_id = clk_id;
+	req->request_state = state;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf;
+
+	ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_get_clock_state() - Get clock state helper
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @programmed_state:	State requested for clock to move to
+ * @current_state:	State that the clock is currently in
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_get_clock_state(const struct ti_sci_handle *handle,
+				      u32 dev_id, u8 clk_id,
+				      u8 *programmed_state, u8 *current_state)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_get_clock_state *req;
+	struct ti_sci_msg_resp_get_clock_state *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle)
+		return -EINVAL;
+
+	if (!programmed_state && !current_state)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_CLOCK_STATE,
+				   TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_get_clock_state *)xfer->xfer_buf;
+	req->dev_id = dev_id;
+	req->clk_id = clk_id;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_resp_get_clock_state *)xfer->xfer_buf;
+
+	if (!ti_sci_is_response_ack(resp)) {
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	if (programmed_state)
+		*programmed_state = resp->programmed_state;
+	if (current_state)
+		*current_state = resp->current_state;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_get_clock() - Get control of a clock from TI SCI
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @needs_ssc: 'true' if Spread Spectrum clock is desired, else 'false'
+ * @can_change_freq: 'true' if frequency change is desired, else 'false'
+ * @enable_input_term: 'true' if input termination is desired, else 'false'
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_get_clock(const struct ti_sci_handle *handle, u32 dev_id,
+				u8 clk_id, bool needs_ssc, bool can_change_freq,
+				bool enable_input_term)
+{
+	u32 flags = 0;
+
+	flags |= needs_ssc ? MSG_FLAG_CLOCK_ALLOW_SSC : 0;
+	flags |= can_change_freq ? MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE : 0;
+	flags |= enable_input_term ? MSG_FLAG_CLOCK_INPUT_TERM : 0;
+
+	return ti_sci_set_clock_state(handle, dev_id, clk_id, flags,
+				      MSG_CLOCK_SW_STATE_REQ);
+}
+
+/**
+ * ti_sci_cmd_idle_clock() - Idle a clock which is in our control
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ *
+ * NOTE: This clock must have been requested by get_clock previously.
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_idle_clock(const struct ti_sci_handle *handle,
+				 u32 dev_id, u8 clk_id)
+{
+	return ti_sci_set_clock_state(handle, dev_id, clk_id, 0,
+				      MSG_CLOCK_SW_STATE_UNREQ);
+}
+
+/**
+ * ti_sci_cmd_put_clock() - Release a clock from our control back to TISCI
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ *
+ * NOTE: This clock must have been requested by get_clock previously.
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_put_clock(const struct ti_sci_handle *handle,
+				u32 dev_id, u8 clk_id)
+{
+	return ti_sci_set_clock_state(handle, dev_id, clk_id, 0,
+				      MSG_CLOCK_SW_STATE_AUTO);
+}
+
+/**
+ * ti_sci_cmd_clk_is_auto() - Is the clock being auto managed
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @req_state: state indicating if the clock is auto managed
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_clk_is_auto(const struct ti_sci_handle *handle,
+				  u32 dev_id, u8 clk_id, bool *req_state)
+{
+	u8 state = 0;
+	int ret;
+
+	if (!req_state)
+		return -EINVAL;
+
+	ret = ti_sci_cmd_get_clock_state(handle, dev_id, clk_id, &state, NULL);
+	if (ret)
+		return ret;
+
+	*req_state = (state == MSG_CLOCK_SW_STATE_AUTO);
+	return 0;
+}
+
+/**
+ * ti_sci_cmd_clk_is_on() - Is the clock ON
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @req_state: state indicating if the clock is managed by us and enabled
+ * @curr_state: state indicating if the clock is ready for operation
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_clk_is_on(const struct ti_sci_handle *handle, u32 dev_id,
+				u8 clk_id, bool *req_state, bool *curr_state)
+{
+	u8 c_state = 0, r_state = 0;
+	int ret;
+
+	if (!req_state && !curr_state)
+		return -EINVAL;
+
+	ret = ti_sci_cmd_get_clock_state(handle, dev_id, clk_id,
+					 &r_state, &c_state);
+	if (ret)
+		return ret;
+
+	if (req_state)
+		*req_state = (r_state == MSG_CLOCK_SW_STATE_REQ);
+	if (curr_state)
+		*curr_state = (c_state == MSG_CLOCK_HW_STATE_READY);
+	return 0;
+}
+
+/**
+ * ti_sci_cmd_clk_is_off() - Is the clock OFF
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @req_state: state indicating if the clock is managed by us and disabled
+ * @curr_state: state indicating if the clock is NOT ready for operation
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_clk_is_off(const struct ti_sci_handle *handle, u32 dev_id,
+				 u8 clk_id, bool *req_state, bool *curr_state)
+{
+	u8 c_state = 0, r_state = 0;
+	int ret;
+
+	if (!req_state && !curr_state)
+		return -EINVAL;
+
+	ret = ti_sci_cmd_get_clock_state(handle, dev_id, clk_id,
+					 &r_state, &c_state);
+	if (ret)
+		return ret;
+
+	if (req_state)
+		*req_state = (r_state == MSG_CLOCK_SW_STATE_UNREQ);
+	if (curr_state)
+		*curr_state = (c_state == MSG_CLOCK_HW_STATE_NOT_READY);
+	return 0;
+}
+
+/**
+ * ti_sci_cmd_clk_set_parent() - Set the clock source of a specific device clock
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @parent_id:	Parent clock identifier to set
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_clk_set_parent(const struct ti_sci_handle *handle,
+				     u32 dev_id, u8 clk_id, u8 parent_id)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_set_clock_parent *req;
+	struct ti_sci_msg_hdr *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_CLOCK_PARENT,
+				   TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_set_clock_parent *)xfer->xfer_buf;
+	req->dev_id = dev_id;
+	req->clk_id = clk_id;
+	req->parent_id = parent_id;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf;
+
+	ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_clk_get_parent() - Get current parent clock source
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @parent_id:	Current clock parent
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_clk_get_parent(const struct ti_sci_handle *handle,
+				     u32 dev_id, u8 clk_id, u8 *parent_id)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_get_clock_parent *req;
+	struct ti_sci_msg_resp_get_clock_parent *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle || !parent_id)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_CLOCK_PARENT,
+				   TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_get_clock_parent *)xfer->xfer_buf;
+	req->dev_id = dev_id;
+	req->clk_id = clk_id;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_resp_get_clock_parent *)xfer->xfer_buf;
+
+	if (!ti_sci_is_response_ack(resp))
+		ret = -ENODEV;
+	else
+		*parent_id = resp->parent_id;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_clk_get_num_parents() - Get num parents of the current clk source
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @num_parents: Returns he number of parents to the current clock.
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_clk_get_num_parents(const struct ti_sci_handle *handle,
+					  u32 dev_id, u8 clk_id,
+					  u8 *num_parents)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_get_clock_num_parents *req;
+	struct ti_sci_msg_resp_get_clock_num_parents *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle || !num_parents)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_NUM_CLOCK_PARENTS,
+				   TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_get_clock_num_parents *)xfer->xfer_buf;
+	req->dev_id = dev_id;
+	req->clk_id = clk_id;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_resp_get_clock_num_parents *)xfer->xfer_buf;
+
+	if (!ti_sci_is_response_ack(resp))
+		ret = -ENODEV;
+	else
+		*num_parents = resp->num_parents;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_clk_get_match_freq() - Find a good match for frequency
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @min_freq:	The minimum allowable frequency in Hz. This is the minimum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ * @target_freq: The target clock frequency in Hz. A frequency will be
+ *		processed as close to this target frequency as possible.
+ * @max_freq:	The maximum allowable frequency in Hz. This is the maximum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ * @match_freq:	Frequency match in Hz response.
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_clk_get_match_freq(const struct ti_sci_handle *handle,
+					 u32 dev_id, u8 clk_id, u64 min_freq,
+					 u64 target_freq, u64 max_freq,
+					 u64 *match_freq)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_query_clock_freq *req;
+	struct ti_sci_msg_resp_query_clock_freq *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle || !match_freq)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_QUERY_CLOCK_FREQ,
+				   TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_query_clock_freq *)xfer->xfer_buf;
+	req->dev_id = dev_id;
+	req->clk_id = clk_id;
+	req->min_freq_hz = min_freq;
+	req->target_freq_hz = target_freq;
+	req->max_freq_hz = max_freq;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_resp_query_clock_freq *)xfer->xfer_buf;
+
+	if (!ti_sci_is_response_ack(resp))
+		ret = -ENODEV;
+	else
+		*match_freq = resp->freq_hz;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_clk_set_freq() - Set a frequency for clock
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @min_freq:	The minimum allowable frequency in Hz. This is the minimum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ * @target_freq: The target clock frequency in Hz. A frequency will be
+ *		processed as close to this target frequency as possible.
+ * @max_freq:	The maximum allowable frequency in Hz. This is the maximum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_clk_set_freq(const struct ti_sci_handle *handle,
+				   u32 dev_id, u8 clk_id, u64 min_freq,
+				   u64 target_freq, u64 max_freq)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_set_clock_freq *req;
+	struct ti_sci_msg_hdr *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_CLOCK_FREQ,
+				   TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_set_clock_freq *)xfer->xfer_buf;
+	req->dev_id = dev_id;
+	req->clk_id = clk_id;
+	req->min_freq_hz = min_freq;
+	req->target_freq_hz = target_freq;
+	req->max_freq_hz = max_freq;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf;
+
+	ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
+/**
+ * ti_sci_cmd_clk_get_freq() - Get current frequency
+ * @handle:	pointer to TI SCI handle
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @freq:	Currently frequency in Hz
+ *
+ * Return: 0 if all went well, else returns appropriate error value.
+ */
+static int ti_sci_cmd_clk_get_freq(const struct ti_sci_handle *handle,
+				   u32 dev_id, u8 clk_id, u64 *freq)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_get_clock_freq *req;
+	struct ti_sci_msg_resp_get_clock_freq *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle || !freq)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_CLOCK_FREQ,
+				   TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_get_clock_freq *)xfer->xfer_buf;
+	req->dev_id = dev_id;
+	req->clk_id = clk_id;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_resp_get_clock_freq *)xfer->xfer_buf;
+
+	if (!ti_sci_is_response_ack(resp))
+		ret = -ENODEV;
+	else
+		*freq = resp->freq_hz;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
 /*
  * ti_sci_setup_ops() - Setup the operations structures
  * @info:	pointer to TISCI pointer
@@ -910,6 +1579,7 @@ static void ti_sci_setup_ops(struct ti_sci_info *info)
 {
 	struct ti_sci_ops *ops = &info->handle.ops;
 	struct ti_sci_dev_ops *dops = &ops->dev_ops;
+	struct ti_sci_clk_ops *cops = &ops->clk_ops;
 
 	dops->get_device = ti_sci_cmd_get_device;
 	dops->idle_device = ti_sci_cmd_idle_device;
@@ -923,6 +1593,21 @@ static void ti_sci_setup_ops(struct ti_sci_info *info)
 	dops->is_transitioning = ti_sci_cmd_dev_is_trans;
 	dops->set_device_resets = ti_sci_cmd_set_device_resets;
 	dops->get_device_resets = ti_sci_cmd_get_device_resets;
+
+	cops->get_clock = ti_sci_cmd_get_clock;
+	cops->idle_clock = ti_sci_cmd_idle_clock;
+	cops->put_clock = ti_sci_cmd_put_clock;
+	cops->is_auto = ti_sci_cmd_clk_is_auto;
+	cops->is_on = ti_sci_cmd_clk_is_on;
+	cops->is_off = ti_sci_cmd_clk_is_off;
+
+	cops->set_parent = ti_sci_cmd_clk_set_parent;
+	cops->get_parent = ti_sci_cmd_clk_get_parent;
+	cops->get_num_parents = ti_sci_cmd_clk_get_num_parents;
+
+	cops->get_best_match_freq = ti_sci_cmd_clk_get_match_freq;
+	cops->set_freq = ti_sci_cmd_clk_set_freq;
+	cops->get_freq = ti_sci_cmd_clk_get_freq;
 }
 
 /**
diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h
index 29ce0532a7ca..f69907cfc128 100644
--- a/drivers/firmware/ti_sci.h
+++ b/drivers/firmware/ti_sci.h
@@ -52,6 +52,16 @@
 #define TI_SCI_MSG_GET_DEVICE_STATE	0x0201
 #define TI_SCI_MSG_SET_DEVICE_RESETS	0x0202
 
+/* Clock requests */
+#define TI_SCI_MSG_SET_CLOCK_STATE	0x0100
+#define TI_SCI_MSG_GET_CLOCK_STATE	0x0101
+#define TI_SCI_MSG_SET_CLOCK_PARENT	0x0102
+#define TI_SCI_MSG_GET_CLOCK_PARENT	0x0103
+#define TI_SCI_MSG_GET_NUM_CLOCK_PARENTS 0x0104
+#define TI_SCI_MSG_SET_CLOCK_FREQ	0x010c
+#define TI_SCI_MSG_QUERY_CLOCK_FREQ	0x010d
+#define TI_SCI_MSG_GET_CLOCK_FREQ	0x010e
+
 /**
  * struct ti_sci_msg_hdr - Generic Message Header for All messages and responses
  * @type:	Type of messages: One of TI_SCI_MSG* values
@@ -188,4 +198,283 @@ struct ti_sci_msg_req_set_device_resets {
 	u32 resets;
 } __packed;
 
+/**
+ * struct ti_sci_msg_req_set_clock_state - Request to setup a Clock state
+ * @hdr:	Generic Header, Certain flags can be set specific to the clocks:
+ *		MSG_FLAG_CLOCK_ALLOW_SSC: Allow this clock to be modified
+ *		via spread spectrum clocking.
+ *		MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE: Allow this clock's
+ *		frequency to be changed while it is running so long as it
+ *		is within the min/max limits.
+ *		MSG_FLAG_CLOCK_INPUT_TERM: Enable input termination, this
+ *		is only applicable to clock inputs on the SoC pseudo-device.
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @request_state: Request the state for the clock to be set to.
+ *		MSG_CLOCK_SW_STATE_UNREQ: The IP does not require this clock,
+ *		it can be disabled, regardless of the state of the device
+ *		MSG_CLOCK_SW_STATE_AUTO: Allow the System Controller to
+ *		automatically manage the state of this clock. If the device
+ *		is enabled, then the clock is enabled. If the device is set
+ *		to off or retention, then the clock is internally set as not
+ *		being required by the device.(default)
+ *		MSG_CLOCK_SW_STATE_REQ:  Configure the clock to be enabled,
+ *		regardless of the state of the device.
+ *
+ * Normally, all required clocks are managed by TISCI entity, this is used
+ * only for specific control *IF* required. Auto managed state is
+ * MSG_CLOCK_SW_STATE_AUTO, in other states, TISCI entity assume remote
+ * will explicitly control.
+ *
+ * Request type is TI_SCI_MSG_SET_CLOCK_STATE, response is a generic
+ * ACK or NACK message.
+ */
+struct ti_sci_msg_req_set_clock_state {
+	/* Additional hdr->flags options */
+#define MSG_FLAG_CLOCK_ALLOW_SSC		TI_SCI_MSG_FLAG(8)
+#define MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE	TI_SCI_MSG_FLAG(9)
+#define MSG_FLAG_CLOCK_INPUT_TERM		TI_SCI_MSG_FLAG(10)
+	struct ti_sci_msg_hdr hdr;
+	u32 dev_id;
+	u8 clk_id;
+#define MSG_CLOCK_SW_STATE_UNREQ	0
+#define MSG_CLOCK_SW_STATE_AUTO		1
+#define MSG_CLOCK_SW_STATE_REQ		2
+	u8 request_state;
+} __packed;
+
+/**
+ * struct ti_sci_msg_req_get_clock_state - Request for clock state
+ * @hdr:	Generic Header
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to get state of.
+ *
+ * Request type is TI_SCI_MSG_GET_CLOCK_STATE, response is state
+ * of the clock
+ */
+struct ti_sci_msg_req_get_clock_state {
+	struct ti_sci_msg_hdr hdr;
+	u32 dev_id;
+	u8 clk_id;
+} __packed;
+
+/**
+ * struct ti_sci_msg_resp_get_clock_state - Response to get clock state
+ * @hdr:	Generic Header
+ * @programmed_state: Any programmed state of the clock. This is one of
+ *		MSG_CLOCK_SW_STATE* values.
+ * @current_state: Current state of the clock. This is one of:
+ *		MSG_CLOCK_HW_STATE_NOT_READY: Clock is not ready
+ *		MSG_CLOCK_HW_STATE_READY: Clock is ready
+ *
+ * Response to TI_SCI_MSG_GET_CLOCK_STATE.
+ */
+struct ti_sci_msg_resp_get_clock_state {
+	struct ti_sci_msg_hdr hdr;
+	u8 programmed_state;
+#define MSG_CLOCK_HW_STATE_NOT_READY	0
+#define MSG_CLOCK_HW_STATE_READY	1
+	u8 current_state;
+} __packed;
+
+/**
+ * struct ti_sci_msg_req_set_clock_parent - Set the clock parent
+ * @hdr:	Generic Header
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * @parent_id:	The new clock parent is selectable by an index via this
+ *		parameter.
+ *
+ * Request type is TI_SCI_MSG_SET_CLOCK_PARENT, response is generic
+ * ACK / NACK message.
+ */
+struct ti_sci_msg_req_set_clock_parent {
+	struct ti_sci_msg_hdr hdr;
+	u32 dev_id;
+	u8 clk_id;
+	u8 parent_id;
+} __packed;
+
+/**
+ * struct ti_sci_msg_req_get_clock_parent - Get the clock parent
+ * @hdr:	Generic Header
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to get the parent for.
+ *
+ * Request type is TI_SCI_MSG_GET_CLOCK_PARENT, response is parent information
+ */
+struct ti_sci_msg_req_get_clock_parent {
+	struct ti_sci_msg_hdr hdr;
+	u32 dev_id;
+	u8 clk_id;
+} __packed;
+
+/**
+ * struct ti_sci_msg_resp_get_clock_parent - Response with clock parent
+ * @hdr:	Generic Header
+ * @parent_id:	The current clock parent
+ *
+ * Response to TI_SCI_MSG_GET_CLOCK_PARENT.
+ */
+struct ti_sci_msg_resp_get_clock_parent {
+	struct ti_sci_msg_hdr hdr;
+	u8 parent_id;
+} __packed;
+
+/**
+ * struct ti_sci_msg_req_get_clock_num_parents - Request to get clock parents
+ * @hdr:	Generic header
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *
+ * This request provides information about how many clock parent options
+ * are available for a given clock to a device. This is typically used
+ * for input clocks.
+ *
+ * Request type is TI_SCI_MSG_GET_NUM_CLOCK_PARENTS, response is appropriate
+ * message, or NACK in case of inability to satisfy request.
+ */
+struct ti_sci_msg_req_get_clock_num_parents {
+	struct ti_sci_msg_hdr hdr;
+	u32 dev_id;
+	u8 clk_id;
+} __packed;
+
+/**
+ * struct ti_sci_msg_resp_get_clock_num_parents - Response for get clk parents
+ * @hdr:		Generic header
+ * @num_parents:	Number of clock parents
+ *
+ * Response to TI_SCI_MSG_GET_NUM_CLOCK_PARENTS
+ */
+struct ti_sci_msg_resp_get_clock_num_parents {
+	struct ti_sci_msg_hdr hdr;
+	u8 num_parents;
+} __packed;
+
+/**
+ * struct ti_sci_msg_req_query_clock_freq - Request to query a frequency
+ * @hdr:	Generic Header
+ * @dev_id:	Device identifier this request is for
+ * @min_freq_hz: The minimum allowable frequency in Hz. This is the minimum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ * @target_freq_hz: The target clock frequency. A frequency will be found
+ *		as close to this target frequency as possible.
+ * @max_freq_hz: The maximum allowable frequency in Hz. This is the maximum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ * @clk_id:	Clock identifier for the device for this request.
+ *
+ * NOTE: Normally clock frequency management is automatically done by TISCI
+ * entity. In case of specific requests, TISCI evaluates capability to achieve
+ * requested frequency within provided range and responds with
+ * result message.
+ *
+ * Request type is TI_SCI_MSG_QUERY_CLOCK_FREQ, response is appropriate message,
+ * or NACK in case of inability to satisfy request.
+ */
+struct ti_sci_msg_req_query_clock_freq {
+	struct ti_sci_msg_hdr hdr;
+	u32 dev_id;
+	u64 min_freq_hz;
+	u64 target_freq_hz;
+	u64 max_freq_hz;
+	u8 clk_id;
+} __packed;
+
+/**
+ * struct ti_sci_msg_resp_query_clock_freq - Response to a clock frequency query
+ * @hdr:	Generic Header
+ * @freq_hz:	Frequency that is the best match in Hz.
+ *
+ * Response to request type TI_SCI_MSG_QUERY_CLOCK_FREQ. NOTE: if the request
+ * cannot be satisfied, the message will be of type NACK.
+ */
+struct ti_sci_msg_resp_query_clock_freq {
+	struct ti_sci_msg_hdr hdr;
+	u64 freq_hz;
+} __packed;
+
+/**
+ * struct ti_sci_msg_req_set_clock_freq - Request to setup a clock frequency
+ * @hdr:	Generic Header
+ * @dev_id:	Device identifier this request is for
+ * @min_freq_hz: The minimum allowable frequency in Hz. This is the minimum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ * @target_freq_hz: The target clock frequency. The clock will be programmed
+ *		at a rate as close to this target frequency as possible.
+ * @max_freq_hz: The maximum allowable frequency in Hz. This is the maximum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ * @clk_id:	Clock identifier for the device for this request.
+ *
+ * NOTE: Normally clock frequency management is automatically done by TISCI
+ * entity. In case of specific requests, TISCI evaluates capability to achieve
+ * requested range and responds with success/failure message.
+ *
+ * This sets the desired frequency for a clock within an allowable
+ * range. This message will fail on an enabled clock unless
+ * MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE is set for the clock. Additionally,
+ * if other clocks have their frequency modified due to this message,
+ * they also must have the MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE or be disabled.
+ *
+ * Calling set frequency on a clock input to the SoC pseudo-device will
+ * inform the PMMC of that clock's frequency. Setting a frequency of
+ * zero will indicate the clock is disabled.
+ *
+ * Calling set frequency on clock outputs from the SoC pseudo-device will
+ * function similarly to setting the clock frequency on a device.
+ *
+ * Request type is TI_SCI_MSG_SET_CLOCK_FREQ, response is a generic ACK/NACK
+ * message.
+ */
+struct ti_sci_msg_req_set_clock_freq {
+	struct ti_sci_msg_hdr hdr;
+	u32 dev_id;
+	u64 min_freq_hz;
+	u64 target_freq_hz;
+	u64 max_freq_hz;
+	u8 clk_id;
+} __packed;
+
+/**
+ * struct ti_sci_msg_req_get_clock_freq - Request to get the clock frequency
+ * @hdr:	Generic Header
+ * @dev_id:	Device identifier this request is for
+ * @clk_id:	Clock identifier for the device for this request.
+ *
+ * NOTE: Normally clock frequency management is automatically done by TISCI
+ * entity. In some cases, clock frequencies are configured by host.
+ *
+ * Request type is TI_SCI_MSG_GET_CLOCK_FREQ, responded with clock frequency
+ * that the clock is currently at.
+ */
+struct ti_sci_msg_req_get_clock_freq {
+	struct ti_sci_msg_hdr hdr;
+	u32 dev_id;
+	u8 clk_id;
+} __packed;
+
+/**
+ * struct ti_sci_msg_resp_get_clock_freq - Response of clock frequency request
+ * @hdr:	Generic Header
+ * @freq_hz:	Frequency that the clock is currently on, in Hz.
+ *
+ * Response to request type TI_SCI_MSG_GET_CLOCK_FREQ.
+ */
+struct ti_sci_msg_resp_get_clock_freq {
+	struct ti_sci_msg_hdr hdr;
+	u64 freq_hz;
+} __packed;
+
 #endif /* __TI_SCI_H */
diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
index 87fa73851471..76378fddf609 100644
--- a/include/linux/soc/ti/ti_sci_protocol.h
+++ b/include/linux/soc/ti/ti_sci_protocol.h
@@ -114,12 +114,90 @@ struct ti_sci_dev_ops {
 				 u32 *reset_state);
 };
 
+/**
+ * struct ti_sci_clk_ops - Clock control operations
+ * @get_clock:	Request for activation of clock and manage by processor
+ *		- needs_ssc: 'true' if Spread Spectrum clock is desired.
+ *		- can_change_freq: 'true' if frequency change is desired.
+ *		- enable_input_term: 'true' if input termination is desired.
+ * @idle_clock:	Request for Idling a clock managed by processor
+ * @put_clock:	Release the clock to be auto managed by TISCI
+ * @is_auto:	Is the clock being auto managed
+ *		- req_state: state indicating if the clock is auto managed
+ * @is_on:	Is the clock ON
+ *		- req_state: if the clock is requested to be forced ON
+ *		- current_state: if the clock is currently ON
+ * @is_off:	Is the clock OFF
+ *		- req_state: if the clock is requested to be forced OFF
+ *		- current_state: if the clock is currently Gated
+ * @set_parent:	Set the clock source of a specific device clock
+ *		- parent_id: Parent clock identifier to set.
+ * @get_parent:	Get the current clock source of a specific device clock
+ *		- parent_id: Parent clock identifier which is the parent.
+ * @get_num_parents: Get the number of parents of the current clock source
+ *		- num_parents: returns the number of parent clocks.
+ * @get_best_match_freq: Find a best matching frequency for a frequency
+ *		range.
+ *		- match_freq: Best matching frequency in Hz.
+ * @set_freq:	Set the Clock frequency
+ * @get_freq:	Get the Clock frequency
+ *		- current_freq: Frequency in Hz that the clock is at.
+ *
+ * NOTE: for all these functions, the following parameters are generic in
+ * nature:
+ * -handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * -did:	Device identifier this request is for
+ * -cid:	Clock identifier for the device for this request.
+ *		Each device has it's own set of clock inputs. This indexes
+ *		which clock input to modify.
+ * -min_freq:	The minimum allowable frequency in Hz. This is the minimum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ * -target_freq: The target clock frequency in Hz. A frequency will be
+ *		processed as close to this target frequency as possible.
+ * -max_freq:	The maximum allowable frequency in Hz. This is the maximum
+ *		allowable programmed frequency and does not account for clock
+ *		tolerances and jitter.
+ *
+ * Request for the clock - NOTE: the client MUST maintain integrity of
+ * usage count by balancing get_clock with put_clock. No refcounting is
+ * managed by driver for that purpose.
+ */
+struct ti_sci_clk_ops {
+	int (*get_clock)(const struct ti_sci_handle *handle, u32 did, u8 cid,
+			 bool needs_ssc, bool can_change_freq,
+			 bool enable_input_term);
+	int (*idle_clock)(const struct ti_sci_handle *handle, u32 did, u8 cid);
+	int (*put_clock)(const struct ti_sci_handle *handle, u32 did, u8 cid);
+	int (*is_auto)(const struct ti_sci_handle *handle, u32 did, u8 cid,
+		       bool *req_state);
+	int (*is_on)(const struct ti_sci_handle *handle, u32 did, u8 cid,
+		     bool *req_state, bool *current_state);
+	int (*is_off)(const struct ti_sci_handle *handle, u32 did, u8 cid,
+		      bool *req_state, bool *current_state);
+	int (*set_parent)(const struct ti_sci_handle *handle, u32 did, u8 cid,
+			  u8 parent_id);
+	int (*get_parent)(const struct ti_sci_handle *handle, u32 did, u8 cid,
+			  u8 *parent_id);
+	int (*get_num_parents)(const struct ti_sci_handle *handle, u32 did,
+			       u8 cid, u8 *num_parents);
+	int (*get_best_match_freq)(const struct ti_sci_handle *handle, u32 did,
+				   u8 cid, u64 min_freq, u64 target_freq,
+				   u64 max_freq, u64 *match_freq);
+	int (*set_freq)(const struct ti_sci_handle *handle, u32 did, u8 cid,
+			u64 min_freq, u64 target_freq, u64 max_freq);
+	int (*get_freq)(const struct ti_sci_handle *handle, u32 did, u8 cid,
+			u64 *current_freq);
+};
+
 /**
  * struct ti_sci_ops - Function support for TI SCI
  * @dev_ops:	Device specific operations
+ * @clk_ops:	Clock specific operations
  */
 struct ti_sci_ops {
 	struct ti_sci_dev_ops dev_ops;
+	struct ti_sci_clk_ops clk_ops;
 };
 
 /**
-- 
cgit v1.2.3


From 912cffb4ed8612dc99ee0251cc0c9785855162cd Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 18 Oct 2016 18:08:37 -0500
Subject: firmware: ti_sci: Add support for reboot core service

Since system controller now has control over SoC power management, it
needs to be explicitly requested to reboot the SoC. Add support for
it.

In some systems however, SoC needs to toggle a GPIO or send event to an
external entity (like a PMIC) for a system reboot to take place. To
facilitate that, we allow for a DT property to determine if the reboot
handler will be registered and further, the service is also made
available to other drivers (such as PMIC driver) to sequence the
additional operation and trigger the SoC reboot as the last step.

Tested-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/firmware/ti_sci.c              | 83 ++++++++++++++++++++++++++++++++++
 drivers/firmware/ti_sci.h              | 12 +++++
 include/linux/soc/ti/ti_sci_protocol.h | 11 +++++
 3 files changed, 106 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c
index 496a007e5c69..874ff32db366 100644
--- a/drivers/firmware/ti_sci.c
+++ b/drivers/firmware/ti_sci.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/soc/ti/ti-msgmgr.h>
 #include <linux/soc/ti/ti_sci_protocol.h>
+#include <linux/reboot.h>
 
 #include "ti_sci.h"
 
@@ -90,6 +91,7 @@ struct ti_sci_desc {
  * struct ti_sci_info - Structure representing a TI SCI instance
  * @dev:	Device pointer
  * @desc:	SoC description for this instance
+ * @nb:	Reboot Notifier block
  * @d:		Debugfs file entry
  * @debug_region: Memory region where the debug message are available
  * @debug_region_size: Debug region size
@@ -104,6 +106,7 @@ struct ti_sci_desc {
  */
 struct ti_sci_info {
 	struct device *dev;
+	struct notifier_block nb;
 	const struct ti_sci_desc *desc;
 	struct dentry *d;
 	void __iomem *debug_region;
@@ -117,10 +120,12 @@ struct ti_sci_info {
 	struct list_head node;
 	/* protected by ti_sci_list_mutex */
 	int users;
+
 };
 
 #define cl_to_ti_sci_info(c)	container_of(c, struct ti_sci_info, cl)
 #define handle_to_ti_sci_info(h) container_of(h, struct ti_sci_info, handle)
+#define reboot_to_ti_sci_info(n) container_of(n, struct ti_sci_info, nb)
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -1571,6 +1576,52 @@ fail:
 	return ret;
 }
 
+static int ti_sci_cmd_core_reboot(const struct ti_sci_handle *handle)
+{
+	struct ti_sci_info *info;
+	struct ti_sci_msg_req_reboot *req;
+	struct ti_sci_msg_hdr *resp;
+	struct ti_sci_xfer *xfer;
+	struct device *dev;
+	int ret = 0;
+
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	if (!handle)
+		return -EINVAL;
+
+	info = handle_to_ti_sci_info(handle);
+	dev = info->dev;
+
+	xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SYS_RESET,
+				   TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
+				   sizeof(*req), sizeof(*resp));
+	if (IS_ERR(xfer)) {
+		ret = PTR_ERR(xfer);
+		dev_err(dev, "Message alloc failed(%d)\n", ret);
+		return ret;
+	}
+	req = (struct ti_sci_msg_req_reboot *)xfer->xfer_buf;
+
+	ret = ti_sci_do_xfer(info, xfer);
+	if (ret) {
+		dev_err(dev, "Mbox send fail %d\n", ret);
+		goto fail;
+	}
+
+	resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf;
+
+	if (!ti_sci_is_response_ack(resp))
+		ret = -ENODEV;
+	else
+		ret = 0;
+
+fail:
+	ti_sci_put_one_xfer(&info->minfo, xfer);
+
+	return ret;
+}
+
 /*
  * ti_sci_setup_ops() - Setup the operations structures
  * @info:	pointer to TISCI pointer
@@ -1578,9 +1629,12 @@ fail:
 static void ti_sci_setup_ops(struct ti_sci_info *info)
 {
 	struct ti_sci_ops *ops = &info->handle.ops;
+	struct ti_sci_core_ops *core_ops = &ops->core_ops;
 	struct ti_sci_dev_ops *dops = &ops->dev_ops;
 	struct ti_sci_clk_ops *cops = &ops->clk_ops;
 
+	core_ops->reboot_device = ti_sci_cmd_core_reboot;
+
 	dops->get_device = ti_sci_cmd_get_device;
 	dops->idle_device = ti_sci_cmd_idle_device;
 	dops->put_device = ti_sci_cmd_put_device;
@@ -1732,6 +1786,18 @@ const struct ti_sci_handle *devm_ti_sci_get_handle(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(devm_ti_sci_get_handle);
 
+static int tisci_reboot_handler(struct notifier_block *nb, unsigned long mode,
+				void *cmd)
+{
+	struct ti_sci_info *info = reboot_to_ti_sci_info(nb);
+	const struct ti_sci_handle *handle = &info->handle;
+
+	ti_sci_cmd_core_reboot(handle);
+
+	/* call fail OR pass, we should not be here in the first place */
+	return NOTIFY_BAD;
+}
+
 /* Description for K2G */
 static const struct ti_sci_desc ti_sci_pmmc_k2g_desc = {
 	.host_id = 2,
@@ -1759,6 +1825,7 @@ static int ti_sci_probe(struct platform_device *pdev)
 	struct mbox_client *cl;
 	int ret = -EINVAL;
 	int i;
+	int reboot = 0;
 
 	of_id = of_match_device(ti_sci_of_match, dev);
 	if (!of_id) {
@@ -1773,6 +1840,8 @@ static int ti_sci_probe(struct platform_device *pdev)
 
 	info->dev = dev;
 	info->desc = desc;
+	reboot = of_property_read_bool(dev->of_node,
+				       "ti,system-reboot-controller");
 	INIT_LIST_HEAD(&info->node);
 	minfo = &info->minfo;
 
@@ -1845,6 +1914,17 @@ static int ti_sci_probe(struct platform_device *pdev)
 
 	ti_sci_setup_ops(info);
 
+	if (reboot) {
+		info->nb.notifier_call = tisci_reboot_handler;
+		info->nb.priority = 128;
+
+		ret = register_restart_handler(&info->nb);
+		if (ret) {
+			dev_err(dev, "reboot registration fail(%d)\n", ret);
+			return ret;
+		}
+	}
+
 	dev_info(dev, "ABI: %d.%d (firmware rev 0x%04x '%s')\n",
 		 info->handle.version.abi_major, info->handle.version.abi_minor,
 		 info->handle.version.firmware_revision,
@@ -1874,6 +1954,9 @@ static int ti_sci_remove(struct platform_device *pdev)
 
 	info = platform_get_drvdata(pdev);
 
+	if (info->nb.notifier_call)
+		unregister_restart_handler(&info->nb);
+
 	mutex_lock(&ti_sci_list_mutex);
 	if (info->users)
 		ret = -EBUSY;
diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h
index f69907cfc128..9b611e9e6f6d 100644
--- a/drivers/firmware/ti_sci.h
+++ b/drivers/firmware/ti_sci.h
@@ -46,6 +46,7 @@
 #define TI_SCI_MSG_VERSION	0x0002
 #define TI_SCI_MSG_WAKE_REASON	0x0003
 #define TI_SCI_MSG_GOODBYE	0x0004
+#define TI_SCI_MSG_SYS_RESET	0x0005
 
 /* Device requests */
 #define TI_SCI_MSG_SET_DEVICE_STATE	0x0200
@@ -105,6 +106,17 @@ struct ti_sci_msg_resp_version {
 	u8 abi_minor;
 } __packed;
 
+/**
+ * struct ti_sci_msg_req_reboot - Reboot the SoC
+ * @hdr:	Generic Header
+ *
+ * Request type is TI_SCI_MSG_SYS_RESET, responded with a generic
+ * ACK/NACK message.
+ */
+struct ti_sci_msg_req_reboot {
+	struct ti_sci_msg_hdr hdr;
+} __packed;
+
 /**
  * struct ti_sci_msg_req_set_device_state - Set the desired state of the device
  * @hdr:		Generic header
diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
index 76378fddf609..0ccbc138c26a 100644
--- a/include/linux/soc/ti/ti_sci_protocol.h
+++ b/include/linux/soc/ti/ti_sci_protocol.h
@@ -35,6 +35,16 @@ struct ti_sci_version_info {
 
 struct ti_sci_handle;
 
+/**
+ * struct ti_sci_core_ops - SoC Core Operations
+ * @reboot_device: Reboot the SoC
+ *		Returns 0 for successful request(ideally should never return),
+ *		else returns corresponding error value.
+ */
+struct ti_sci_core_ops {
+	int (*reboot_device)(const struct ti_sci_handle *handle);
+};
+
 /**
  * struct ti_sci_dev_ops - Device control operations
  * @get_device: Command to request for device managed by TISCI
@@ -196,6 +206,7 @@ struct ti_sci_clk_ops {
  * @clk_ops:	Clock specific operations
  */
 struct ti_sci_ops {
+	struct ti_sci_core_ops core_ops;
 	struct ti_sci_dev_ops dev_ops;
 	struct ti_sci_clk_ops clk_ops;
 };
-- 
cgit v1.2.3


From 3f817fe718c6cb3ddcc2ab04ba86faecc20ef8fe Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Thu, 27 Oct 2016 00:42:01 +0300
Subject: cfg80211: Define IEEE P802.11ai (FILS) information elements

Define the Element IDs and Element ID Extensions from IEEE
P802.11ai/D11.0. In addition, add a new cfg80211_find_ext_ie() wrapper
to make it easier to find information elements that used the Element ID
Extension field.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 20 ++++++++++++++++++++
 include/net/cfg80211.h    | 21 +++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a80516fd65c8..d428adf51446 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1960,6 +1960,26 @@ enum ieee80211_eid {
 
 	WLAN_EID_VENDOR_SPECIFIC = 221,
 	WLAN_EID_QOS_PARAMETER = 222,
+	WLAN_EID_CAG_NUMBER = 237,
+	WLAN_EID_AP_CSN = 239,
+	WLAN_EID_FILS_INDICATION = 240,
+	WLAN_EID_DILS = 241,
+	WLAN_EID_FRAGMENT = 242,
+	WLAN_EID_EXTENSION = 255
+};
+
+/* Element ID Extensions for Element ID 255 */
+enum ieee80211_eid_ext {
+	WLAN_EID_EXT_ASSOC_DELAY_INFO = 1,
+	WLAN_EID_EXT_FILS_REQ_PARAMS = 2,
+	WLAN_EID_EXT_FILS_KEY_CONFIRM = 3,
+	WLAN_EID_EXT_FILS_SESSION = 4,
+	WLAN_EID_EXT_FILS_HLP_CONTAINER = 5,
+	WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN = 6,
+	WLAN_EID_EXT_KEY_DELIVERY = 7,
+	WLAN_EID_EXT_FILS_WRAPPED_DATA = 8,
+	WLAN_EID_EXT_FILS_PUBLIC_KEY = 12,
+	WLAN_EID_EXT_FILS_NONCE = 13,
 };
 
 /* Action category code */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index dffc265a4fd6..8ca2e9f354f7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4180,6 +4180,27 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
 	return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0);
 }
 
+/**
+ * cfg80211_find_ext_ie - find information element with EID Extension in data
+ *
+ * @ext_eid: element ID Extension
+ * @ies: data consisting of IEs
+ * @len: length of data
+ *
+ * Return: %NULL if the extended element ID could not be found or if
+ * the element is invalid (claims to be longer than the given
+ * data), or a pointer to the first byte of the requested
+ * element, that is the byte containing the element ID.
+ *
+ * Note: There are no checks on the element length other than
+ * having to fit into the given data.
+ */
+static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len)
+{
+	return cfg80211_find_ie_match(WLAN_EID_EXTENSION, ies, len,
+				      &ext_eid, 1, 2);
+}
+
 /**
  * cfg80211_find_vendor_ie - find vendor specific information element in data
  *
-- 
cgit v1.2.3


From 631810603a20874554b2f17adf42b72d0f15eda5 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Thu, 27 Oct 2016 00:42:02 +0300
Subject: cfg80211: Add Fast Initial Link Setup (FILS) auth algs

This defines authentication algorithms for FILS (IEEE 802.11ai).

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |  3 +++
 include/uapi/linux/nl80211.h |  6 ++++++
 net/wireless/nl80211.c       | 21 +++++++++++++++++++--
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d428adf51446..793a0174ba29 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1576,6 +1576,9 @@ struct ieee80211_vht_operation {
 #define WLAN_AUTH_SHARED_KEY 1
 #define WLAN_AUTH_FT 2
 #define WLAN_AUTH_SAE 3
+#define WLAN_AUTH_FILS_SK 4
+#define WLAN_AUTH_FILS_SK_PFS 5
+#define WLAN_AUTH_FILS_PK 6
 #define WLAN_AUTH_LEAP 128
 
 #define WLAN_AUTH_CHALLENGE_LEN 128
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 7825fd4db19e..4dc21265cd12 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3669,6 +3669,9 @@ enum nl80211_bss_status {
  * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r)
  * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP)
  * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals
+ * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key
+ * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS
+ * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key
  * @__NL80211_AUTHTYPE_NUM: internal
  * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm
  * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by
@@ -3681,6 +3684,9 @@ enum nl80211_auth_type {
 	NL80211_AUTHTYPE_FT,
 	NL80211_AUTHTYPE_NETWORK_EAP,
 	NL80211_AUTHTYPE_SAE,
+	NL80211_AUTHTYPE_FILS_SK,
+	NL80211_AUTHTYPE_FILS_SK_PFS,
+	NL80211_AUTHTYPE_FILS_PK,
 
 	/* keep last */
 	__NL80211_AUTHTYPE_NUM,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 704851142eed..ff798620e929 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3778,12 +3778,23 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
 		if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
 		    auth_type == NL80211_AUTHTYPE_SAE)
 			return false;
+		if (!wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_FILS_STA) &&
+		    (auth_type == NL80211_AUTHTYPE_FILS_SK ||
+		     auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+		     auth_type == NL80211_AUTHTYPE_FILS_PK))
+			return false;
 		return true;
 	case NL80211_CMD_CONNECT:
 	case NL80211_CMD_START_AP:
 		/* SAE not supported yet */
 		if (auth_type == NL80211_AUTHTYPE_SAE)
 			return false;
+		/* FILS not supported yet */
+		if (auth_type == NL80211_AUTHTYPE_FILS_SK ||
+		    auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+		    auth_type == NL80211_AUTHTYPE_FILS_PK)
+			return false;
 		return true;
 	default:
 		return false;
@@ -7810,12 +7821,18 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE))
 		return -EINVAL;
 
-	if (auth_type == NL80211_AUTHTYPE_SAE &&
+	if ((auth_type == NL80211_AUTHTYPE_SAE ||
+	     auth_type == NL80211_AUTHTYPE_FILS_SK ||
+	     auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+	     auth_type == NL80211_AUTHTYPE_FILS_PK) &&
 	    !info->attrs[NL80211_ATTR_AUTH_DATA])
 		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_AUTH_DATA]) {
-		if (auth_type != NL80211_AUTHTYPE_SAE)
+		if (auth_type != NL80211_AUTHTYPE_SAE &&
+		    auth_type != NL80211_AUTHTYPE_FILS_SK &&
+		    auth_type != NL80211_AUTHTYPE_FILS_SK_PFS &&
+		    auth_type != NL80211_AUTHTYPE_FILS_PK)
 			return -EINVAL;
 		auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]);
 		auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]);
-- 
cgit v1.2.3


From 348bd456699801920a309c66e382380809fbdf41 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Thu, 27 Oct 2016 00:42:03 +0300
Subject: cfg80211: Add KEK/nonces for FILS association frames

The new nl80211 attributes can be used to provide KEK and nonces to
allow the driver to encrypt and decrypt FILS (Re)Association
Request/Response frames in station mode.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |  3 +++
 include/net/cfg80211.h       |  9 +++++++++
 include/uapi/linux/nl80211.h |  8 ++++++++
 net/wireless/nl80211.c       | 12 ++++++++++++
 4 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 793a0174ba29..fe849329511a 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2096,6 +2096,9 @@ enum ieee80211_key_len {
 #define IEEE80211_GCMP_MIC_LEN		16
 #define IEEE80211_GCMP_PN_LEN		6
 
+#define FILS_NONCE_LEN			16
+#define FILS_MAX_KEK_LEN		64
+
 /* Public action codes */
 enum ieee80211_pub_actioncode {
 	WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8ca2e9f354f7..738b4d8a4666 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1840,6 +1840,12 @@ enum cfg80211_assoc_req_flags {
  * @ht_capa_mask:  The bits of ht_capa which are to be used.
  * @vht_capa: VHT capability override
  * @vht_capa_mask: VHT capability mask indicating which fields to use
+ * @fils_kek: FILS KEK for protecting (Re)Association Request/Response frame or
+ *	%NULL if FILS is not used.
+ * @fils_kek_len: Length of fils_kek in octets
+ * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association
+ *	Request/Response frame or %NULL if FILS is not used. This field starts
+ *	with 16 octets of STA Nonce followed by 16 octets of AP Nonce.
  */
 struct cfg80211_assoc_request {
 	struct cfg80211_bss *bss;
@@ -1851,6 +1857,9 @@ struct cfg80211_assoc_request {
 	struct ieee80211_ht_cap ht_capa;
 	struct ieee80211_ht_cap ht_capa_mask;
 	struct ieee80211_vht_cap vht_capa, vht_capa_mask;
+	const u8 *fils_kek;
+	size_t fils_kek_len;
+	const u8 *fils_nonces;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 4dc21265cd12..a268a009528a 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1944,6 +1944,11 @@ enum nl80211_commands {
  *	attribute.
  * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute.
  *	See &enum nl80211_nan_match_attributes.
+ * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame
+ *	protection.
+ * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association
+ *	Request/Response frame protection. This attribute contains the 16 octet
+ *	STA Nonce followed by 16 octets of AP Nonce.
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2344,6 +2349,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_NAN_FUNC,
 	NL80211_ATTR_NAN_MATCH,
 
+	NL80211_ATTR_FILS_KEK,
+	NL80211_ATTR_FILS_NONCES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ff798620e929..667d5f719c22 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -414,6 +414,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
 	[NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 },
 	[NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
+	[NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY,
+				    .len = FILS_MAX_KEK_LEN },
+	[NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN },
 };
 
 /* policy for the key attributes */
@@ -8033,6 +8036,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		req.flags |= ASSOC_REQ_USE_RRM;
 	}
 
+	if (info->attrs[NL80211_ATTR_FILS_KEK]) {
+		req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]);
+		req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]);
+		if (!info->attrs[NL80211_ATTR_FILS_NONCES])
+			return -EINVAL;
+		req.fils_nonces =
+			nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]);
+	}
+
 	err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
 	if (!err) {
 		wdev_lock(dev->ieee80211_ptr);
-- 
cgit v1.2.3


From d705ff38189fcfbbfa6aa97363d30c23348ad166 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 3 Oct 2016 11:18:33 +0200
Subject: tty: vt, cleanup and document con_scroll

Scrolling helpers scrup and scrdown both accept 'top' and 'bottom' as
unsigned int. Number of lines 'nr' is accepted as int, but all callers
pass down unsigned too. So change the type of 'nr' to unsigned too.
Now, promote unsigned int from the helpers up to the con_scroll
hook which actually accepted all those as signed int.

Next, the 'dir' parameter can have only two values and we define
constants for that: SM_UP and SM_DOWN. Switch them to enum and do
proper type checking on 'dir' too.

Finally, document the behaviour of the hook.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: <linux-fbdev@vger.kernel.org>
Cc: <linux-usb@vger.kernel.org>
Cc: <linux-parisc@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c                     |  6 ++++--
 drivers/usb/misc/sisusbvga/sisusb_con.c | 18 ++++++++++--------
 drivers/video/console/fbcon.c           | 18 ++++++++----------
 drivers/video/console/mdacon.c          |  7 ++++---
 drivers/video/console/newport_con.c     |  8 ++++----
 drivers/video/console/sticon.c          |  7 ++++---
 drivers/video/console/vgacon.c          | 12 +++++-------
 include/linux/console.h                 | 16 +++++++++++-----
 8 files changed, 50 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 06fb39c1d6dd..c4bf96fee32e 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -315,7 +315,8 @@ void schedule_console_callback(void)
 	schedule_work(&console_work);
 }
 
-static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
+static void scrup(struct vc_data *vc, unsigned int t, unsigned int b,
+		unsigned int nr)
 {
 	unsigned short *d, *s;
 
@@ -332,7 +333,8 @@ static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
 		    vc->vc_size_row * nr);
 }
 
-static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
+static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b,
+		unsigned int nr)
 {
 	unsigned short *s;
 	unsigned int step;
diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c
index 460cebf322e3..6331965daa0b 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_con.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_con.c
@@ -808,9 +808,10 @@ sisusbcon_cursor(struct vc_data *c, int mode)
 	mutex_unlock(&sisusb->lock);
 }
 
-static int
+static bool
 sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb,
-					int t, int b, int dir, int lines)
+		unsigned int t, unsigned int b, enum con_scroll dir,
+		unsigned int lines)
 {
 	int cols = sisusb->sisusb_num_columns;
 	int length = ((b - t) * cols) * 2;
@@ -852,8 +853,9 @@ sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb,
 }
 
 /* Interface routine */
-static int
-sisusbcon_scroll(struct vc_data *c, int t, int b, int dir, int lines)
+static bool
+sisusbcon_scroll(struct vc_data *c, unsigned int t, unsigned int b,
+		enum con_scroll dir, unsigned int lines)
 {
 	struct sisusb_usb_data *sisusb;
 	u16 eattr = c->vc_video_erase_char;
@@ -870,17 +872,17 @@ sisusbcon_scroll(struct vc_data *c, int t, int b, int dir, int lines)
 	 */
 
 	if (!lines)
-		return 1;
+		return true;
 
 	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
 	if (!sisusb)
-		return 0;
+		return false;
 
 	/* sisusb->lock is down */
 
 	if (sisusb_is_inactive(c, sisusb)) {
 		mutex_unlock(&sisusb->lock);
-		return 0;
+		return false;
 	}
 
 	/* Special case */
@@ -971,7 +973,7 @@ sisusbcon_scroll(struct vc_data *c, int t, int b, int dir, int lines)
 
 	mutex_unlock(&sisusb->lock);
 
-	return 1;
+	return true;
 }
 
 /* Interface routine */
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index b87f5cfdaea5..a44f5627b82a 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -164,8 +164,6 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s,
 			int count, int ypos, int xpos);
 static void fbcon_clear_margins(struct vc_data *vc, int bottom_only);
 static void fbcon_cursor(struct vc_data *vc, int mode);
-static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
-			int count);
 static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
 			int height, int width);
 static int fbcon_switch(struct vc_data *vc);
@@ -1795,15 +1793,15 @@ static inline void fbcon_softback_note(struct vc_data *vc, int t,
 	softback_curr = softback_in;
 }
 
-static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
-			int count)
+static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
+		enum con_scroll dir, unsigned int count)
 {
 	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
 	struct display *p = &fb_display[vc->vc_num];
 	int scroll_partial = info->flags & FBINFO_PARTIAL_PAN_OK;
 
 	if (fbcon_is_inactive(vc, info))
-		return -EINVAL;
+		return true;
 
 	fbcon_cursor(vc, CM_ERASE);
 
@@ -1831,7 +1829,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
 							(b - count)),
 				    vc->vc_video_erase_char,
 				    vc->vc_size_row * count);
-			return 1;
+			return true;
 			break;
 
 		case SCROLL_WRAP_MOVE:
@@ -1903,7 +1901,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
 							(b - count)),
 				    vc->vc_video_erase_char,
 				    vc->vc_size_row * count);
-			return 1;
+			return true;
 		}
 		break;
 
@@ -1922,7 +1920,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
 							t),
 				    vc->vc_video_erase_char,
 				    vc->vc_size_row * count);
-			return 1;
+			return true;
 			break;
 
 		case SCROLL_WRAP_MOVE:
@@ -1992,10 +1990,10 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
 							t),
 				    vc->vc_video_erase_char,
 				    vc->vc_size_row * count);
-			return 1;
+			return true;
 		}
 	}
-	return 0;
+	return false;
 }
 
 
diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c
index bacbb044d77c..ec192a1bf297 100644
--- a/drivers/video/console/mdacon.c
+++ b/drivers/video/console/mdacon.c
@@ -488,12 +488,13 @@ static void mdacon_cursor(struct vc_data *c, int mode)
 	}
 }
 
-static int mdacon_scroll(struct vc_data *c, int t, int b, int dir, int lines)
+static bool mdacon_scroll(struct vc_data *c, unsigned int t, unsigned int b,
+		enum con_scroll dir, unsigned int lines)
 {
 	u16 eattr = mda_convert_attr(c->vc_video_erase_char);
 
 	if (!lines)
-		return 0;
+		return false;
 
 	if (lines > c->vc_rows)   /* maximum realistic size */
 		lines = c->vc_rows;
@@ -514,7 +515,7 @@ static int mdacon_scroll(struct vc_data *c, int t, int b, int dir, int lines)
 		break;
 	}
 
-	return 0;
+	return false;
 }
 
 
diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index e3b9521e4ec3..1e11614322fe 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -574,8 +574,8 @@ static int newport_font_set(struct vc_data *vc, struct console_font *font, unsig
 	return newport_set_font(vc->vc_num, font);
 }
 
-static int newport_scroll(struct vc_data *vc, int t, int b, int dir,
-			  int lines)
+static bool newport_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
+		enum con_scroll dir, unsigned int lines)
 {
 	int count, x, y;
 	unsigned short *s, *d;
@@ -595,7 +595,7 @@ static int newport_scroll(struct vc_data *vc, int t, int b, int dir,
 					    (vc->vc_color & 0xf0) >> 4);
 		}
 		npregs->cset.topscan = (topscan - 1) & 0x3ff;
-		return 0;
+		return false;
 	}
 
 	count = (b - t - lines) * vc->vc_cols;
@@ -670,7 +670,7 @@ static int newport_scroll(struct vc_data *vc, int t, int b, int dir,
 			}
 		}
 	}
-	return 1;
+	return true;
 }
 
 static int newport_dummy(struct vc_data *c)
diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c
index 3a10ac19598f..79c9bd8d3025 100644
--- a/drivers/video/console/sticon.c
+++ b/drivers/video/console/sticon.c
@@ -153,12 +153,13 @@ static void sticon_cursor(struct vc_data *conp, int mode)
     }
 }
 
-static int sticon_scroll(struct vc_data *conp, int t, int b, int dir, int count)
+static bool sticon_scroll(struct vc_data *conp, unsigned int t,
+		unsigned int b, enum con_scroll dir, unsigned int count)
 {
     struct sti_struct *sti = sticon_sti;
 
     if (vga_is_gfx)
-        return 0;
+        return false;
 
     sticon_cursor(conp, CM_ERASE);
 
@@ -174,7 +175,7 @@ static int sticon_scroll(struct vc_data *conp, int t, int b, int dir, int count)
 	break;
     }
 
-    return 0;
+    return false;
 }
 
 static void sticon_init(struct vc_data *c, int init)
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 11576611a974..4c54a873452e 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -83,8 +83,6 @@ static int vgacon_blank(struct vc_data *c, int blank, int mode_switch);
 static void vgacon_scrolldelta(struct vc_data *c, int lines);
 static int vgacon_set_origin(struct vc_data *c);
 static void vgacon_save_screen(struct vc_data *c);
-static int vgacon_scroll(struct vc_data *c, int t, int b, int dir,
-			 int lines);
 static void vgacon_invert_region(struct vc_data *c, u16 * p, int count);
 static struct uni_pagedir *vgacon_uni_pagedir;
 static int vgacon_refcount;
@@ -1350,17 +1348,17 @@ static void vgacon_save_screen(struct vc_data *c)
 			    c->vc_screenbuf_size > vga_vram_size ? vga_vram_size : c->vc_screenbuf_size);
 }
 
-static int vgacon_scroll(struct vc_data *c, int t, int b, int dir,
-			 int lines)
+static bool vgacon_scroll(struct vc_data *c, unsigned int t, unsigned int b,
+		enum con_scroll dir, unsigned int lines)
 {
 	unsigned long oldo;
 	unsigned int delta;
 
 	if (t || b != c->vc_rows || vga_is_gfx || c->vc_mode != KD_TEXT)
-		return 0;
+		return false;
 
 	if (!vga_hardscroll_enabled || lines >= c->vc_rows / 2)
-		return 0;
+		return false;
 
 	vgacon_restore_screen(c);
 	oldo = c->vc_origin;
@@ -1396,7 +1394,7 @@ static int vgacon_scroll(struct vc_data *c, int t, int b, int dir,
 	c->vc_visible_origin = c->vc_origin;
 	vga_set_mem_top(c);
 	c->vc_pos = (c->vc_pos - oldo) + c->vc_origin;
-	return 1;
+	return true;
 }
 
 
diff --git a/include/linux/console.h b/include/linux/console.h
index 3672809234a7..508b012bd5bd 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -28,9 +28,17 @@ struct tty_struct;
 #define VT100ID "\033[?1;2c"
 #define VT102ID "\033[?6c"
 
+enum con_scroll {
+	SM_UP,
+	SM_DOWN,
+};
+
 /**
  * struct consw - callbacks for consoles
  *
+ * @con_scroll: move lines from @top to @bottom in direction @dir by @lines.
+ *		Return true if no generic handling should be done.
+ *		Invoked by csi_M and printing to the console.
  * @con_set_palette: sets the palette of the console to @table (optional)
  * @con_scrolldelta: the contents of the console should be scrolled by @lines.
  *		     Invoked by user. (optional)
@@ -44,7 +52,9 @@ struct consw {
 	void	(*con_putc)(struct vc_data *, int, int, int);
 	void	(*con_putcs)(struct vc_data *, const unsigned short *, int, int, int);
 	void	(*con_cursor)(struct vc_data *, int);
-	int	(*con_scroll)(struct vc_data *, int, int, int, int);
+	bool	(*con_scroll)(struct vc_data *, unsigned int top,
+			unsigned int bottom, enum con_scroll dir,
+			unsigned int lines);
 	int	(*con_switch)(struct vc_data *);
 	int	(*con_blank)(struct vc_data *, int, int);
 	int	(*con_font_set)(struct vc_data *, struct console_font *, unsigned);
@@ -99,10 +109,6 @@ static inline int con_debug_leave(void)
 }
 #endif
 
-/* scroll */
-#define SM_UP       (1)
-#define SM_DOWN     (2)
-
 /* cursor */
 #define CM_DRAW     (1)
 #define CM_ERASE    (2)
-- 
cgit v1.2.3


From 35cc56f9a30480c8a0cca809cf341614a2144758 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 3 Oct 2016 11:18:35 +0200
Subject: tty: vgacon+sisusb, move scrolldelta to a common helper

The code is mirrorred in scrolldelta implementations of both vgacon
and sisusb. Let's move the code to a separate helper where we will
perform a common cleanup and further changes.

While we are moving the code, make it linear and save one indentation
level. This is done by returning from the "!lines" then-branch
immediatelly. This allows flushing the else-branch 1 level to the
left, obviously.

Few more new lines and comments were added too.

And do not forget to export the helper function given sisusb can be
built as module.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: <linux-fbdev@vger.kernel.org>
Cc: <linux-usb@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c                     | 38 +++++++++++++++++++++++++++++++++
 drivers/usb/misc/sisusbvga/sisusb_con.c | 37 ++------------------------------
 drivers/video/console/vgacon.c          | 27 ++---------------------
 include/linux/vt_kern.h                 |  2 ++
 4 files changed, 44 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index a0b7576747cd..2eab714aab67 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -4279,6 +4279,44 @@ void vcs_scr_updated(struct vc_data *vc)
 	notify_update(vc);
 }
 
+void vc_scrolldelta_helper(struct vc_data *c, int lines,
+		unsigned int rolled_over, void *base, unsigned int size)
+{
+	unsigned long ubase = (unsigned long)base;
+	int margin = c->vc_size_row * 4;
+	int ul, we, p, st;
+
+	/* Turn scrollback off */
+	if (!lines) {
+		c->vc_visible_origin = c->vc_origin;
+		return;
+	}
+
+	/* Do we have already enough to allow jumping from 0 to the end? */
+	if (rolled_over > (c->vc_scr_end - ubase) + margin) {
+		ul = c->vc_scr_end - ubase;
+		we = rolled_over + c->vc_size_row;
+	} else {
+		ul = 0;
+		we = size;
+	}
+
+	p = (c->vc_visible_origin - ubase - ul + we) % we +
+		lines * c->vc_size_row;
+	st = (c->vc_origin - ubase - ul + we) % we;
+
+	/* Only a little piece would be left? Show all incl. the piece! */
+	if (st < 2 * margin)
+		margin = 0;
+	if (p < margin)
+		p = 0;
+	if (p > st - margin)
+		p = st;
+
+	c->vc_visible_origin = ubase + (p + ul) % we;
+}
+EXPORT_SYMBOL_GPL(vc_scrolldelta_helper);
+
 /*
  *	Visible symbols for modules
  */
diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c
index 6331965daa0b..4b5777ec1501 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_con.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_con.c
@@ -686,8 +686,6 @@ static void
 sisusbcon_scrolldelta(struct vc_data *c, int lines)
 {
 	struct sisusb_usb_data *sisusb;
-	int margin = c->vc_size_row * 4;
-	int ul, we, p, st;
 
 	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
 	if (!sisusb)
@@ -700,39 +698,8 @@ sisusbcon_scrolldelta(struct vc_data *c, int lines)
 		return;
 	}
 
-	if (!lines)		/* Turn scrollback off */
-		c->vc_visible_origin = c->vc_origin;
-	else {
-
-		if (sisusb->con_rolled_over >
-				(c->vc_scr_end - sisusb->scrbuf) + margin) {
-
-			ul = c->vc_scr_end - sisusb->scrbuf;
-			we = sisusb->con_rolled_over + c->vc_size_row;
-
-		} else {
-
-			ul = 0;
-			we = sisusb->scrbuf_size;
-
-		}
-
-		p = (c->vc_visible_origin - sisusb->scrbuf - ul + we) % we +
-				lines * c->vc_size_row;
-
-		st = (c->vc_origin - sisusb->scrbuf - ul + we) % we;
-
-		if (st < 2 * margin)
-			margin = 0;
-
-		if (p < margin)
-			p = 0;
-
-		if (p > st - margin)
-			p = st;
-
-		c->vc_visible_origin = sisusb->scrbuf + (p + ul) % we;
-	}
+	vc_scrolldelta_helper(c, lines, sisusb->con_rolled_over,
+			(void *)sisusb->scrbuf, sisusb->scrbuf_size);
 
 	sisusbcon_set_start_address(sisusb, c);
 
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 4c54a873452e..ede6a5a85ccd 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -332,31 +332,8 @@ static void vgacon_restore_screen(struct vc_data *c)
 
 static void vgacon_scrolldelta(struct vc_data *c, int lines)
 {
-	if (!lines)		/* Turn scrollback off */
-		c->vc_visible_origin = c->vc_origin;
-	else {
-		int margin = c->vc_size_row * 4;
-		int ul, we, p, st;
-
-		if (vga_rolled_over >
-		    (c->vc_scr_end - vga_vram_base) + margin) {
-			ul = c->vc_scr_end - vga_vram_base;
-			we = vga_rolled_over + c->vc_size_row;
-		} else {
-			ul = 0;
-			we = vga_vram_size;
-		}
-		p = (c->vc_visible_origin - vga_vram_base - ul + we) % we +
-		    lines * c->vc_size_row;
-		st = (c->vc_origin - vga_vram_base - ul + we) % we;
-		if (st < 2 * margin)
-			margin = 0;
-		if (p < margin)
-			p = 0;
-		if (p > st - margin)
-			p = st;
-		c->vc_visible_origin = vga_vram_base + (p + ul) % we;
-	}
+	vc_scrolldelta_helper(c, lines, vga_rolled_over, (void *)vga_vram_base,
+			vga_vram_size);
 	vga_set_mem_top(c);
 }
 #endif /* CONFIG_VGACON_SOFT_SCROLLBACK */
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 6abd24f258bc..833fdd4794a0 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -191,5 +191,7 @@ extern void vt_set_led_state(int console, int leds);
 extern void vt_kbd_con_start(int console);
 extern void vt_kbd_con_stop(int console);
 
+void vc_scrolldelta_helper(struct vc_data *c, int lines,
+		unsigned int rolled_over, void *_base, unsigned int size);
 
 #endif /* _VT_KERN_H */
-- 
cgit v1.2.3


From a07ea4d9941af5a0c6f0be2a71b51ac9c083c5e5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 24 Oct 2016 14:40:02 +0200
Subject: genetlink: no longer support using static family IDs

Static family IDs have never really been used, the only
use case was the workaround I introduced for those users
that assumed their family ID was also their multicast
group ID.

Additionally, because static family IDs would never be
reserved by the generic netlink code, using a relatively
low ID would only work for built-in families that can be
registered immediately after generic netlink is started,
which is basically only the control family (apart from
the workaround code, which I also had to add code for so
it would reserve those IDs)

Thus, anything other than GENL_ID_GENERATE is flawed and
luckily not used except in the cases I mentioned. Move
those workarounds into a few lines of code, and then get
rid of GENL_ID_GENERATE entirely, making it more robust.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/acpi/event.c                  |  1 -
 drivers/net/gtp.c                     |  1 -
 drivers/net/macsec.c                  |  1 -
 drivers/net/team/team.c               |  1 -
 drivers/net/wireless/mac80211_hwsim.c |  1 -
 drivers/scsi/pmcraid.c                |  6 ------
 drivers/target/target_core_user.c     |  1 -
 drivers/thermal/thermal_core.c        |  1 -
 fs/dlm/netlink.c                      |  1 -
 fs/quota/netlink.c                    |  7 -------
 include/linux/genl_magic_func.h       |  1 -
 include/net/genetlink.h               |  7 ++-----
 include/uapi/linux/genetlink.h        |  1 -
 kernel/taskstats.c                    |  1 -
 net/batman-adv/netlink.c              |  1 -
 net/core/devlink.c                    |  1 -
 net/core/drop_monitor.c               |  1 -
 net/hsr/hsr_netlink.c                 |  1 -
 net/ieee802154/netlink.c              |  1 -
 net/ieee802154/nl802154.c             |  1 -
 net/ipv4/fou.c                        |  1 -
 net/ipv4/tcp_metrics.c                |  1 -
 net/ipv6/ila/ila_xlat.c               |  1 -
 net/irda/irnetlink.c                  |  1 -
 net/l2tp/l2tp_netlink.c               |  1 -
 net/netfilter/ipvs/ip_vs_ctl.c        |  1 -
 net/netlabel/netlabel_calipso.c       |  1 -
 net/netlabel/netlabel_cipso_v4.c      |  1 -
 net/netlabel/netlabel_mgmt.c          |  1 -
 net/netlabel/netlabel_unlabeled.c     |  1 -
 net/netlink/genetlink.c               | 37 +++++++++++++++++++++--------------
 net/nfc/netlink.c                     |  1 -
 net/openvswitch/datapath.c            |  4 ----
 net/tipc/netlink.c                    |  1 -
 net/tipc/netlink_compat.c             |  1 -
 net/wimax/stack.c                     |  1 -
 net/wireless/nl80211.c                |  1 -
 37 files changed, 24 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index e24ea4e796e4..8dfca3d53131 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -83,7 +83,6 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = {
 };
 
 static struct genl_family acpi_event_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.name = ACPI_GENL_FAMILY_NAME,
 	.version = ACPI_GENL_VERSION,
 	.maxattr = ACPI_GENL_ATTR_MAX,
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 97e0cbca0a08..f66737ba1299 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1095,7 +1095,6 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info)
 }
 
 static struct genl_family gtp_genl_family = {
-	.id		= GENL_ID_GENERATE,
 	.name		= "gtp",
 	.version	= 0,
 	.hdrsize	= 0,
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 1a134cb2d52c..a5309b81a786 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1422,7 +1422,6 @@ static void clear_tx_sa(struct macsec_tx_sa *tx_sa)
 }
 
 static struct genl_family macsec_fam = {
-	.id		= GENL_ID_GENERATE,
 	.name		= MACSEC_GENL_NAME,
 	.hdrsize	= 0,
 	.version	= MACSEC_GENL_VERSION,
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index a380649bf6b5..0b50205764ff 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2151,7 +2151,6 @@ static struct rtnl_link_ops team_link_ops __read_mostly = {
  ***********************************/
 
 static struct genl_family team_nl_family = {
-	.id		= GENL_ID_GENERATE,
 	.name		= TEAM_GENL_NAME,
 	.version	= TEAM_GENL_VERSION,
 	.maxattr	= TEAM_ATTR_MAX,
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index e95b79bccf9b..54b6cd62676e 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -589,7 +589,6 @@ struct hwsim_radiotap_ack_hdr {
 
 /* MAC80211_HWSIM netlinf family */
 static struct genl_family hwsim_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = "MAC80211_HWSIM",
 	.version = 1,
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 68a5c347fae9..cc50eb87b28a 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -1369,12 +1369,6 @@ static struct genl_multicast_group pmcraid_mcgrps[] = {
 };
 
 static struct genl_family pmcraid_event_family = {
-	/*
-	 * Due to prior multicast group abuse (the code having assumed that
-	 * the family ID can be used as a multicast group ID) we need to
-	 * statically allocate a family (and thus group) ID.
-	 */
-	.id = GENL_ID_PMCRAID,
 	.name = "pmcraid",
 	.version = 1,
 	.maxattr = PMCRAID_AEN_ATTR_MAX,
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 62bf4fe5704a..313a0ef3cda7 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -148,7 +148,6 @@ static const struct genl_multicast_group tcmu_mcgrps[] = {
 
 /* Our generic netlink family */
 static struct genl_family tcmu_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = "TCM-USER",
 	.version = 1,
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 226b0b4aced6..68d7503f6417 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -2164,7 +2164,6 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = {
 };
 
 static struct genl_family thermal_event_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.name = THERMAL_GENL_FAMILY_NAME,
 	.version = THERMAL_GENL_VERSION,
 	.maxattr = THERMAL_GENL_ATTR_MAX,
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 1e6e227134d7..00d226956264 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -17,7 +17,6 @@ static uint32_t dlm_nl_seqnum;
 static uint32_t listener_nlportid;
 
 static struct genl_family family = {
-	.id		= GENL_ID_GENERATE,
 	.name		= DLM_GENL_NAME,
 	.version	= DLM_GENL_VERSION,
 };
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index 8b252673d454..3965a5cdfaa2 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -13,13 +13,6 @@ static const struct genl_multicast_group quota_mcgrps[] = {
 
 /* Netlink family structure for quota */
 static struct genl_family quota_genl_family = {
-	/*
-	 * Needed due to multicast group ID abuse - old code assumed
-	 * the family ID was also a valid multicast group ID (which
-	 * isn't true) and userspace might thus rely on it. Assign a
-	 * static ID for this group to make dealing with that easier.
-	 */
-	.id = GENL_ID_VFS_DQUOT,
 	.hdrsize = 0,
 	.name = "VFS_DQUOT",
 	.version = 1,
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 667c31101b8b..7c070c1fe457 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -260,7 +260,6 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
  */
 #define ZZZ_genl_family		CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
 static struct genl_family ZZZ_genl_family __read_mostly = {
-	.id = GENL_ID_GENERATE,
 	.name = __stringify(GENL_MAGIC_FAMILY),
 	.version = GENL_MAGIC_VERSION,
 #ifdef GENL_MAGIC_FAMILY_HDRSZ
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index ef9defb3f5bc..43a5c3975a2f 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -20,7 +20,7 @@ struct genl_info;
 
 /**
  * struct genl_family - generic netlink family
- * @id: protocol family idenfitier
+ * @id: protocol family identifier (private)
  * @hdrsize: length of user specific header in bytes
  * @name: name of family
  * @version: protocol version
@@ -48,7 +48,7 @@ struct genl_info;
  * @n_ops: number of operations supported by this family (private)
  */
 struct genl_family {
-	unsigned int		id;
+	unsigned int		id;		/* private */
 	unsigned int		hdrsize;
 	char			name[GENL_NAMSIZ];
 	unsigned int		version;
@@ -149,9 +149,6 @@ static inline int genl_register_family(struct genl_family *family)
  * Registers the specified family and operations from the specified table.
  * Only one family may be registered with the same family name or identifier.
  *
- * The family id may equal GENL_ID_GENERATE causing an unique id to
- * be automatically generated and assigned.
- *
  * Either a doit or dumpit callback must be specified for every registered
  * operation or the function will fail. Only one operation structure per
  * command identifier may be registered.
diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
index 5512c90af7e3..d9b2db4a29c6 100644
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@@ -26,7 +26,6 @@ struct genlmsghdr {
 /*
  * List of reserved static generic netlink identifiers:
  */
-#define GENL_ID_GENERATE	0
 #define GENL_ID_CTRL		NLMSG_MIN_TYPE
 #define GENL_ID_VFS_DQUOT	(NLMSG_MIN_TYPE + 1)
 #define GENL_ID_PMCRAID		(NLMSG_MIN_TYPE + 2)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index b3f05ee20d18..d7a1a9461a10 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -42,7 +42,6 @@ static int family_registered;
 struct kmem_cache *taskstats_cache;
 
 static struct genl_family family = {
-	.id		= GENL_ID_GENERATE,
 	.name		= TASKSTATS_GENL_NAME,
 	.version	= TASKSTATS_GENL_VERSION,
 	.maxattr	= TASKSTATS_CMD_ATTR_MAX,
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 64cb6acbe0a6..a03b0ed7e8dd 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -49,7 +49,6 @@
 #include "translation-table.h"
 
 struct genl_family batadv_netlink_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = BATADV_NL_NAME,
 	.version = 1,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d2fd736de6a2..3008d9c33875 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -342,7 +342,6 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
 }
 
 static struct genl_family devlink_nl_family = {
-	.id		= GENL_ID_GENERATE,
 	.name		= DEVLINK_GENL_NAME,
 	.version	= DEVLINK_GENL_VERSION,
 	.maxattr	= DEVLINK_ATTR_MAX,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 72cfb0c61125..a5320dfcd978 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -60,7 +60,6 @@ struct dm_hw_stat_delta {
 };
 
 static struct genl_family net_drop_monitor_family = {
-	.id             = GENL_ID_GENERATE,
 	.hdrsize        = 0,
 	.name           = "NET_DM",
 	.version        = 2,
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index d4d1617f43a8..2ad039492bee 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -132,7 +132,6 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = {
 };
 
 static struct genl_family hsr_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = "HSR",
 	.version = 1,
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index c8133c07ceee..19144158b696 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -29,7 +29,6 @@ static unsigned int ieee802154_seq_num;
 static DEFINE_SPINLOCK(ieee802154_seq_lock);
 
 struct genl_family nl802154_family = {
-	.id		= GENL_ID_GENERATE,
 	.hdrsize	= 0,
 	.name		= IEEE802154_NL_NAME,
 	.version	= 1,
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 21aabadccd0e..182299858f1d 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -34,7 +34,6 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
 
 /* the netlink family */
 static struct genl_family nl802154_fam = {
-	.id = GENL_ID_GENERATE,		/* don't bother with a hardcoded ID */
 	.name = NL802154_GENL_NAME,	/* have users key off the name instead */
 	.hdrsize = 0,			/* no private header */
 	.version = 1,			/* no particular meaning now */
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index cf50f7e2b012..e3fc527c5d37 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -623,7 +623,6 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
 }
 
 static struct genl_family fou_nl_family = {
-	.id		= GENL_ID_GENERATE,
 	.hdrsize	= 0,
 	.name		= FOU_GENL_NAME,
 	.version	= FOU_GENL_VERSION,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index bf1f3b2b29d1..3da305127b32 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -743,7 +743,6 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
 }
 
 static struct genl_family tcp_metrics_nl_family = {
-	.id		= GENL_ID_GENERATE,
 	.hdrsize	= 0,
 	.name		= TCP_METRICS_GENL_NAME,
 	.version	= TCP_METRICS_GENL_VERSION,
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index e604013dd814..0d57e27d1cdd 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -119,7 +119,6 @@ static const struct rhashtable_params rht_params = {
 };
 
 static struct genl_family ila_nl_family = {
-	.id		= GENL_ID_GENERATE,
 	.hdrsize	= 0,
 	.name		= ILA_GENL_NAME,
 	.version	= ILA_GENL_VERSION,
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index e15c40e86660..f23b81aa91fe 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -25,7 +25,6 @@
 
 
 static struct genl_family irda_nl_family = {
-	.id = GENL_ID_GENERATE,
 	.name = IRDA_NL_NAME,
 	.hdrsize = 0,
 	.version = IRDA_NL_VERSION,
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bf3117771822..4fbf1f41ac52 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -32,7 +32,6 @@
 
 
 static struct genl_family l2tp_nl_family = {
-	.id		= GENL_ID_GENERATE,
 	.name		= L2TP_GENL_NAME,
 	.version	= L2TP_GENL_VERSION,
 	.hdrsize	= 0,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c3c809b2e712..ceed66cdd03e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2841,7 +2841,6 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
 
 /* IPVS genetlink family */
 static struct genl_family ip_vs_genl_family = {
-	.id		= GENL_ID_GENERATE,
 	.hdrsize	= 0,
 	.name		= IPVS_GENL_NAME,
 	.version	= IPVS_GENL_VERSION,
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index 2ec93c5e77bb..152e503b8c5d 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -61,7 +61,6 @@ struct netlbl_domhsh_walk_arg {
 
 /* NetLabel Generic NETLINK CALIPSO family */
 static struct genl_family netlbl_calipso_gnl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_CALIPSO_NAME,
 	.version = NETLBL_PROTO_VERSION,
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 7fd1104ba900..755b284e7ad4 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -60,7 +60,6 @@ struct netlbl_domhsh_walk_arg {
 
 /* NetLabel Generic NETLINK CIPSOv4 family */
 static struct genl_family netlbl_cipsov4_gnl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_CIPSOV4_NAME,
 	.version = NETLBL_PROTO_VERSION,
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index f85d0e07af2d..3b00f2368fcd 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -61,7 +61,6 @@ struct netlbl_domhsh_walk_arg {
 
 /* NetLabel Generic NETLINK CIPSOv4 family */
 static struct genl_family netlbl_mgmt_gnl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_MGMT_NAME,
 	.version = NETLBL_PROTO_VERSION,
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 4528cff9138b..c2ea8d1f653a 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -124,7 +124,6 @@ static u8 netlabel_unlabel_acceptflg;
 
 /* NetLabel Generic NETLINK unlabeled family */
 static struct genl_family netlbl_unlabel_gnl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_UNLABELED_NAME,
 	.version = NETLBL_PROTO_VERSION,
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 01291b7a27bb..f19ec969edee 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -349,8 +349,6 @@ static int genl_validate_ops(const struct genl_family *family)
  *
  * Registers the specified family after validating it first. Only one
  * family may be registered with the same family name or identifier.
- * The family id may equal GENL_ID_GENERATE causing an unique id to
- * be automatically generated and assigned.
  *
  * The family's ops array must already be assigned, you can use the
  * genl_register_family_with_ops() helper function.
@@ -359,13 +357,7 @@ static int genl_validate_ops(const struct genl_family *family)
  */
 int __genl_register_family(struct genl_family *family)
 {
-	int err = -EINVAL, i;
-
-	if (family->id && family->id < GENL_MIN_ID)
-		goto errout;
-
-	if (family->id > GENL_MAX_ID)
-		goto errout;
+	int err, i;
 
 	err = genl_validate_ops(family);
 	if (err)
@@ -378,8 +370,27 @@ int __genl_register_family(struct genl_family *family)
 		goto errout_locked;
 	}
 
-	if (family->id == GENL_ID_GENERATE) {
-		u16 newid = genl_generate_id();
+	if (family == &genl_ctrl) {
+		family->id = GENL_ID_CTRL;
+	} else {
+		u16 newid;
+
+		/* this should be left zero in the struct */
+		WARN_ON(family->id);
+
+		/*
+		 * Sadly, a few cases need to be special-cased
+		 * due to them having previously abused the API
+		 * and having used their family ID also as their
+		 * multicast group ID, so we use reserved IDs
+		 * for both to be sure we can do that mapping.
+		 */
+		if (strcmp(family->name, "pmcraid") == 0)
+			newid = GENL_ID_PMCRAID;
+		else if (strcmp(family->name, "VFS_DQUOT") == 0)
+			newid = GENL_ID_VFS_DQUOT;
+		else
+			newid = genl_generate_id();
 
 		if (!newid) {
 			err = -ENOMEM;
@@ -387,9 +398,6 @@ int __genl_register_family(struct genl_family *family)
 		}
 
 		family->id = newid;
-	} else if (genl_family_find_byid(family->id)) {
-		err = -EEXIST;
-		goto errout_locked;
 	}
 
 	if (family->maxattr && !family->parallel_ops) {
@@ -419,7 +427,6 @@ int __genl_register_family(struct genl_family *family)
 
 errout_locked:
 	genl_unlock_all();
-errout:
 	return err;
 }
 EXPORT_SYMBOL(__genl_register_family);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 79786bf62b88..c230403e066c 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -39,7 +39,6 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = {
 };
 
 static struct genl_family nfc_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = NFC_GENL_NAME,
 	.version = NFC_GENL_VERSION,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 194435aa1165..f9fef7dfba15 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -671,7 +671,6 @@ static const struct genl_ops dp_packet_genl_ops[] = {
 };
 
 static struct genl_family dp_packet_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = sizeof(struct ovs_header),
 	.name = OVS_PACKET_FAMILY,
 	.version = OVS_PACKET_VERSION,
@@ -1436,7 +1435,6 @@ static const struct genl_ops dp_flow_genl_ops[] = {
 };
 
 static struct genl_family dp_flow_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = sizeof(struct ovs_header),
 	.name = OVS_FLOW_FAMILY,
 	.version = OVS_FLOW_VERSION,
@@ -1822,7 +1820,6 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
 };
 
 static struct genl_family dp_datapath_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = sizeof(struct ovs_header),
 	.name = OVS_DATAPATH_FAMILY,
 	.version = OVS_DATAPATH_VERSION,
@@ -2244,7 +2241,6 @@ static const struct genl_ops dp_vport_genl_ops[] = {
 };
 
 struct genl_family dp_vport_genl_family = {
-	.id = GENL_ID_GENERATE,
 	.hdrsize = sizeof(struct ovs_header),
 	.name = OVS_VPORT_FAMILY,
 	.version = OVS_VPORT_VERSION,
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 4b94f3cfe3af..383b8fedabc7 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -136,7 +136,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
  * so we have a separate genl handling for the new API.
  */
 struct genl_family tipc_genl_family = {
-	.id		= GENL_ID_GENERATE,
 	.name		= TIPC_GENL_V2_NAME,
 	.version	= TIPC_GENL_V2_VERSION,
 	.hdrsize	= 0,
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 1fd464764765..f04428e4c8e5 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1216,7 +1216,6 @@ send:
 }
 
 static struct genl_family tipc_genl_compat_family = {
-	.id		= GENL_ID_GENERATE,
 	.name		= TIPC_GENL_NAME,
 	.version	= TIPC_GENL_VERSION,
 	.hdrsize	= TIPC_GENL_HDRLEN,
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 3f816e2971ee..8ac83a41585f 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -573,7 +573,6 @@ size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
 
 
 struct genl_family wimax_gnl_family = {
-	.id = GENL_ID_GENERATE,
 	.name = "WiMAX",
 	.version = WIMAX_GNL_VERSION,
 	.hdrsize = 0,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7d8cb3330c86..714beafe05e0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -39,7 +39,6 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
 
 /* the netlink family */
 static struct genl_family nl80211_fam = {
-	.id = GENL_ID_GENERATE,		/* don't bother with a hardcoded ID */
 	.name = NL80211_GENL_NAME,	/* have users key off the name instead */
 	.hdrsize = 0,			/* no private header */
 	.version = 1,			/* no particular meaning now */
-- 
cgit v1.2.3


From 489111e5c25b93be80340c3113d71903d7c82136 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 24 Oct 2016 14:40:03 +0200
Subject: genetlink: statically initialize families

Instead of providing macros/inline functions to initialize
the families, make all users initialize them statically and
get rid of the macros.

This reduces the kernel code size by about 1.6k on x86-64
(with allyesconfig).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/acpi/event.c                  |  1 +
 drivers/net/gtp.c                     | 21 +++++++----
 drivers/net/macsec.c                  | 21 +++++++----
 drivers/net/team/team.c               | 22 +++++++----
 drivers/net/wireless/mac80211_hwsim.c | 26 +++++++------
 drivers/scsi/pmcraid.c                |  1 +
 drivers/target/target_core_user.c     |  1 +
 drivers/thermal/thermal_core.c        |  1 +
 fs/dlm/netlink.c                      | 15 +++++---
 fs/quota/netlink.c                    |  1 +
 include/linux/drbd_genl.h             |  2 +-
 include/linux/genl_magic_func.h       | 28 ++++++++------
 include/net/genetlink.h               | 71 ++++++-----------------------------
 kernel/taskstats.c                    | 17 ++++++---
 net/batman-adv/netlink.c              | 25 +++++++-----
 net/core/devlink.c                    | 27 +++++++------
 net/core/drop_monitor.c               | 20 ++++++----
 net/hsr/hsr_netlink.c                 | 22 +++++++----
 net/ieee802154/netlink.c              | 23 +++++++-----
 net/ieee802154/nl802154.c             | 34 ++++++++---------
 net/ipv4/fou.c                        | 22 ++++++-----
 net/ipv4/tcp_metrics.c                | 22 ++++++-----
 net/ipv6/ila/ila_xlat.c               | 24 +++++++-----
 net/irda/irnetlink.c                  | 19 ++++++----
 net/l2tp/l2tp_netlink.c               | 25 +++++++-----
 net/netfilter/ipvs/ip_vs_ctl.c        | 22 ++++++-----
 net/netlabel/netlabel_calipso.c       | 20 ++++++----
 net/netlabel/netlabel_cipso_v4.c      | 21 ++++++-----
 net/netlabel/netlabel_mgmt.c          | 20 ++++++----
 net/netlabel/netlabel_unlabeled.c     | 20 ++++++----
 net/netlink/genetlink.c               | 35 +++++++++--------
 net/nfc/netlink.c                     | 24 +++++++-----
 net/openvswitch/datapath.c            |  4 ++
 net/tipc/netlink.c                    | 22 ++++++-----
 net/tipc/netlink_compat.c             | 20 +++++-----
 net/wimax/stack.c                     | 19 +++++-----
 net/wireless/nl80211.c                | 33 ++++++++--------
 37 files changed, 414 insertions(+), 337 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index 8dfca3d53131..1ab12ad7d5ba 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -83,6 +83,7 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = {
 };
 
 static struct genl_family acpi_event_genl_family = {
+	.module = THIS_MODULE,
 	.name = ACPI_GENL_FAMILY_NAME,
 	.version = ACPI_GENL_VERSION,
 	.maxattr = ACPI_GENL_ATTR_MAX,
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index f66737ba1299..0604fd78f826 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1094,13 +1094,7 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
-static struct genl_family gtp_genl_family = {
-	.name		= "gtp",
-	.version	= 0,
-	.hdrsize	= 0,
-	.maxattr	= GTPA_MAX,
-	.netnsok	= true,
-};
+static struct genl_family gtp_genl_family;
 
 static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
 			      u32 type, struct pdp_ctx *pctx)
@@ -1296,6 +1290,17 @@ static const struct genl_ops gtp_genl_ops[] = {
 	},
 };
 
+static struct genl_family gtp_genl_family = {
+	.name		= "gtp",
+	.version	= 0,
+	.hdrsize	= 0,
+	.maxattr	= GTPA_MAX,
+	.netnsok	= true,
+	.module		= THIS_MODULE,
+	.ops		= gtp_genl_ops,
+	.n_ops		= ARRAY_SIZE(gtp_genl_ops),
+};
+
 static int __net_init gtp_net_init(struct net *net)
 {
 	struct gtp_net *gn = net_generic(net, gtp_net_id);
@@ -1335,7 +1340,7 @@ static int __init gtp_init(void)
 	if (err < 0)
 		goto error_out;
 
-	err = genl_register_family_with_ops(&gtp_genl_family, gtp_genl_ops);
+	err = genl_register_family(&gtp_genl_family);
 	if (err < 0)
 		goto unreg_rtnl_link;
 
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index a5309b81a786..63ca7a3c77cf 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1421,13 +1421,7 @@ static void clear_tx_sa(struct macsec_tx_sa *tx_sa)
 	macsec_txsa_put(tx_sa);
 }
 
-static struct genl_family macsec_fam = {
-	.name		= MACSEC_GENL_NAME,
-	.hdrsize	= 0,
-	.version	= MACSEC_GENL_VERSION,
-	.maxattr	= MACSEC_ATTR_MAX,
-	.netnsok	= true,
-};
+static struct genl_family macsec_fam;
 
 static struct net_device *get_dev_from_nl(struct net *net,
 					  struct nlattr **attrs)
@@ -2654,6 +2648,17 @@ static const struct genl_ops macsec_genl_ops[] = {
 	},
 };
 
+static struct genl_family macsec_fam = {
+	.name		= MACSEC_GENL_NAME,
+	.hdrsize	= 0,
+	.version	= MACSEC_GENL_VERSION,
+	.maxattr	= MACSEC_ATTR_MAX,
+	.netnsok	= true,
+	.module		= THIS_MODULE,
+	.ops		= macsec_genl_ops,
+	.n_ops		= ARRAY_SIZE(macsec_genl_ops),
+};
+
 static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 				     struct net_device *dev)
 {
@@ -3461,7 +3466,7 @@ static int __init macsec_init(void)
 	if (err)
 		goto notifier;
 
-	err = genl_register_family_with_ops(&macsec_fam, macsec_genl_ops);
+	err = genl_register_family(&macsec_fam);
 	if (err)
 		goto rtnl;
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 0b50205764ff..46bf7c1216c0 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2150,12 +2150,7 @@ static struct rtnl_link_ops team_link_ops __read_mostly = {
  * Generic netlink custom interface
  ***********************************/
 
-static struct genl_family team_nl_family = {
-	.name		= TEAM_GENL_NAME,
-	.version	= TEAM_GENL_VERSION,
-	.maxattr	= TEAM_ATTR_MAX,
-	.netnsok	= true,
-};
+static struct genl_family team_nl_family;
 
 static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = {
 	[TEAM_ATTR_UNSPEC]			= { .type = NLA_UNSPEC, },
@@ -2745,6 +2740,18 @@ static const struct genl_multicast_group team_nl_mcgrps[] = {
 	{ .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, },
 };
 
+static struct genl_family team_nl_family = {
+	.name		= TEAM_GENL_NAME,
+	.version	= TEAM_GENL_VERSION,
+	.maxattr	= TEAM_ATTR_MAX,
+	.netnsok	= true,
+	.module		= THIS_MODULE,
+	.ops		= team_nl_ops,
+	.n_ops		= ARRAY_SIZE(team_nl_ops),
+	.mcgrps		= team_nl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(team_nl_mcgrps),
+};
+
 static int team_nl_send_multicast(struct sk_buff *skb,
 				  struct team *team, u32 portid)
 {
@@ -2768,8 +2775,7 @@ static int team_nl_send_event_port_get(struct team *team,
 
 static int team_nl_init(void)
 {
-	return genl_register_family_with_ops_groups(&team_nl_family, team_nl_ops,
-						    team_nl_mcgrps);
+	return genl_register_family(&team_nl_family);
 }
 
 static void team_nl_fini(void)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 54b6cd62676e..5d4637e586e8 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -587,14 +587,8 @@ struct hwsim_radiotap_ack_hdr {
 	__le16 rt_chbitmask;
 } __packed;
 
-/* MAC80211_HWSIM netlinf family */
-static struct genl_family hwsim_genl_family = {
-	.hdrsize = 0,
-	.name = "MAC80211_HWSIM",
-	.version = 1,
-	.maxattr = HWSIM_ATTR_MAX,
-	.netnsok = true,
-};
+/* MAC80211_HWSIM netlink family */
+static struct genl_family hwsim_genl_family;
 
 enum hwsim_multicast_groups {
 	HWSIM_MCGRP_CONFIG,
@@ -3234,6 +3228,18 @@ static const struct genl_ops hwsim_ops[] = {
 	},
 };
 
+static struct genl_family hwsim_genl_family = {
+	.name = "MAC80211_HWSIM",
+	.version = 1,
+	.maxattr = HWSIM_ATTR_MAX,
+	.netnsok = true,
+	.module = THIS_MODULE,
+	.ops = hwsim_ops,
+	.n_ops = ARRAY_SIZE(hwsim_ops),
+	.mcgrps = hwsim_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(hwsim_mcgrps),
+};
+
 static void destroy_radio(struct work_struct *work)
 {
 	struct mac80211_hwsim_data *data =
@@ -3287,9 +3293,7 @@ static int hwsim_init_netlink(void)
 
 	printk(KERN_INFO "mac80211_hwsim: initializing netlink\n");
 
-	rc = genl_register_family_with_ops_groups(&hwsim_genl_family,
-						  hwsim_ops,
-						  hwsim_mcgrps);
+	rc = genl_register_family(&hwsim_genl_family);
 	if (rc)
 		goto failure;
 
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index cc50eb87b28a..c0ab7bb8c3ce 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -1369,6 +1369,7 @@ static struct genl_multicast_group pmcraid_mcgrps[] = {
 };
 
 static struct genl_family pmcraid_event_family = {
+	.module = THIS_MODULE,
 	.name = "pmcraid",
 	.version = 1,
 	.maxattr = PMCRAID_AEN_ATTR_MAX,
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 313a0ef3cda7..3483372f5562 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -148,6 +148,7 @@ static const struct genl_multicast_group tcmu_mcgrps[] = {
 
 /* Our generic netlink family */
 static struct genl_family tcmu_genl_family = {
+	.module = THIS_MODULE,
 	.hdrsize = 0,
 	.name = "TCM-USER",
 	.version = 1,
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 68d7503f6417..93b6caab2d9f 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -2164,6 +2164,7 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = {
 };
 
 static struct genl_family thermal_event_genl_family = {
+	.module = THIS_MODULE,
 	.name = THERMAL_GENL_FAMILY_NAME,
 	.version = THERMAL_GENL_VERSION,
 	.maxattr = THERMAL_GENL_ATTR_MAX,
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 00d226956264..04042d69573c 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -16,10 +16,7 @@
 static uint32_t dlm_nl_seqnum;
 static uint32_t listener_nlportid;
 
-static struct genl_family family = {
-	.name		= DLM_GENL_NAME,
-	.version	= DLM_GENL_VERSION,
-};
+static struct genl_family family;
 
 static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
 {
@@ -75,9 +72,17 @@ static struct genl_ops dlm_nl_ops[] = {
 	},
 };
 
+static struct genl_family family = {
+	.name		= DLM_GENL_NAME,
+	.version	= DLM_GENL_VERSION,
+	.ops		= dlm_nl_ops,
+	.n_ops		= ARRAY_SIZE(dlm_nl_ops),
+	.module		= THIS_MODULE,
+};
+
 int __init dlm_netlink_init(void)
 {
-	return genl_register_family_with_ops(&family, dlm_nl_ops);
+	return genl_register_family(&family);
 }
 
 void dlm_netlink_exit(void)
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index 3965a5cdfaa2..9457c7b0dfa2 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -13,6 +13,7 @@ static const struct genl_multicast_group quota_mcgrps[] = {
 
 /* Netlink family structure for quota */
 static struct genl_family quota_genl_family = {
+	.module = THIS_MODULE,
 	.hdrsize = 0,
 	.name = "VFS_DQUOT",
 	.version = 1,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index c934d3a96b5e..2896f93808ae 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -67,7 +67,7 @@
  *	genl_magic_func.h
  *		generates an entry in the static genl_ops array,
  *		and static register/unregister functions to
- *		genl_register_family_with_ops().
+ *		genl_register_family().
  *
  *	flags and handler:
  *		GENL_op_init( .doit = x, .dumpit = y, .flags = something)
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 7c070c1fe457..40c2e39362c8 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -259,15 +259,7 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
  *									{{{2
  */
 #define ZZZ_genl_family		CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
-static struct genl_family ZZZ_genl_family __read_mostly = {
-	.name = __stringify(GENL_MAGIC_FAMILY),
-	.version = GENL_MAGIC_VERSION,
-#ifdef GENL_MAGIC_FAMILY_HDRSZ
-	.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
-#endif
-	.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
-};
-
+static struct genl_family ZZZ_genl_family;
 /*
  * Magic: define multicast groups
  * Magic: define multicast group registration helper
@@ -301,11 +293,23 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
 #undef GENL_mc_group
 #define GENL_mc_group(group)
 
+static struct genl_family ZZZ_genl_family __read_mostly = {
+	.name = __stringify(GENL_MAGIC_FAMILY),
+	.version = GENL_MAGIC_VERSION,
+#ifdef GENL_MAGIC_FAMILY_HDRSZ
+	.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
+#endif
+	.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
+	.ops = ZZZ_genl_ops,
+	.n_ops = ARRAY_SIZE(ZZZ_genl_ops),
+	.mcgrps = ZZZ_genl_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps),
+	.module = THIS_MODULE,
+};
+
 int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
 {
-	return genl_register_family_with_ops_groups(&ZZZ_genl_family,	\
-						    ZZZ_genl_ops,	\
-						    ZZZ_genl_mcgrps);
+	return genl_register_family(&ZZZ_genl_family);
 }
 
 void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 43a5c3975a2f..2298b50cee34 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -39,13 +39,14 @@ struct genl_info;
  *	Note that unbind() will not be called symmetrically if the
  *	generic netlink family is removed while there are still open
  *	sockets.
- * @attrbuf: buffer to store parsed attributes
- * @family_list: family list
- * @mcgrps: multicast groups used by this family (private)
- * @n_mcgrps: number of multicast groups (private)
+ * @attrbuf: buffer to store parsed attributes (private)
+ * @family_list: family list (private)
+ * @mcgrps: multicast groups used by this family
+ * @n_mcgrps: number of multicast groups
  * @mcgrp_offset: starting number of multicast group IDs in this family
- * @ops: the operations supported by this family (private)
- * @n_ops: number of operations supported by this family (private)
+ *	(private)
+ * @ops: the operations supported by this family
+ * @n_ops: number of operations supported by this family
  */
 struct genl_family {
 	unsigned int		id;		/* private */
@@ -64,10 +65,10 @@ struct genl_family {
 	int			(*mcast_bind)(struct net *net, int group);
 	void			(*mcast_unbind)(struct net *net, int group);
 	struct nlattr **	attrbuf;	/* private */
-	const struct genl_ops *	ops;		/* private */
-	const struct genl_multicast_group *mcgrps; /* private */
-	unsigned int		n_ops;		/* private */
-	unsigned int		n_mcgrps;	/* private */
+	const struct genl_ops *	ops;
+	const struct genl_multicast_group *mcgrps;
+	unsigned int		n_ops;
+	unsigned int		n_mcgrps;
 	unsigned int		mcgrp_offset;	/* private */
 	struct list_head	family_list;	/* private */
 	struct module		*module;
@@ -132,55 +133,7 @@ struct genl_ops {
 	u8			flags;
 };
 
-int __genl_register_family(struct genl_family *family);
-
-static inline int genl_register_family(struct genl_family *family)
-{
-	family->module = THIS_MODULE;
-	return __genl_register_family(family);
-}
-
-/**
- * genl_register_family_with_ops - register a generic netlink family with ops
- * @family: generic netlink family
- * @ops: operations to be registered
- * @n_ops: number of elements to register
- *
- * Registers the specified family and operations from the specified table.
- * Only one family may be registered with the same family name or identifier.
- *
- * Either a doit or dumpit callback must be specified for every registered
- * operation or the function will fail. Only one operation structure per
- * command identifier may be registered.
- *
- * See include/net/genetlink.h for more documenation on the operations
- * structure.
- *
- * Return 0 on success or a negative error code.
- */
-static inline int
-_genl_register_family_with_ops_grps(struct genl_family *family,
-				    const struct genl_ops *ops, size_t n_ops,
-				    const struct genl_multicast_group *mcgrps,
-				    size_t n_mcgrps)
-{
-	family->module = THIS_MODULE;
-	family->ops = ops;
-	family->n_ops = n_ops;
-	family->mcgrps = mcgrps;
-	family->n_mcgrps = n_mcgrps;
-	return __genl_register_family(family);
-}
-
-#define genl_register_family_with_ops(family, ops)			\
-	_genl_register_family_with_ops_grps((family),			\
-					    (ops), ARRAY_SIZE(ops),	\
-					    NULL, 0)
-#define genl_register_family_with_ops_groups(family, ops, grps)	\
-	_genl_register_family_with_ops_grps((family),			\
-					    (ops), ARRAY_SIZE(ops),	\
-					    (grps), ARRAY_SIZE(grps))
-
+int genl_register_family(struct genl_family *family);
 int genl_unregister_family(struct genl_family *family);
 void genl_notify(struct genl_family *family, struct sk_buff *skb,
 		 struct genl_info *info, u32 group, gfp_t flags);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index d7a1a9461a10..4075ece592f2 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -41,11 +41,7 @@ static DEFINE_PER_CPU(__u32, taskstats_seqnum);
 static int family_registered;
 struct kmem_cache *taskstats_cache;
 
-static struct genl_family family = {
-	.name		= TASKSTATS_GENL_NAME,
-	.version	= TASKSTATS_GENL_VERSION,
-	.maxattr	= TASKSTATS_CMD_ATTR_MAX,
-};
+static struct genl_family family;
 
 static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = {
 	[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
@@ -650,6 +646,15 @@ static const struct genl_ops taskstats_ops[] = {
 	},
 };
 
+static struct genl_family family = {
+	.name		= TASKSTATS_GENL_NAME,
+	.version	= TASKSTATS_GENL_VERSION,
+	.maxattr	= TASKSTATS_CMD_ATTR_MAX,
+	.module		= THIS_MODULE,
+	.ops		= taskstats_ops,
+	.n_ops		= ARRAY_SIZE(taskstats_ops),
+};
+
 /* Needed early in initialization */
 void __init taskstats_init_early(void)
 {
@@ -666,7 +671,7 @@ static int __init taskstats_init(void)
 {
 	int rc;
 
-	rc = genl_register_family_with_ops(&family, taskstats_ops);
+	rc = genl_register_family(&family);
 	if (rc)
 		return rc;
 
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index a03b0ed7e8dd..e28cec34a016 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -48,13 +48,7 @@
 #include "tp_meter.h"
 #include "translation-table.h"
 
-struct genl_family batadv_netlink_family = {
-	.hdrsize = 0,
-	.name = BATADV_NL_NAME,
-	.version = 1,
-	.maxattr = BATADV_ATTR_MAX,
-	.netnsok = true,
-};
+struct genl_family batadv_netlink_family;
 
 /* multicast groups */
 enum batadv_netlink_multicast_groups {
@@ -609,6 +603,19 @@ static struct genl_ops batadv_netlink_ops[] = {
 
 };
 
+struct genl_family batadv_netlink_family = {
+	.hdrsize = 0,
+	.name = BATADV_NL_NAME,
+	.version = 1,
+	.maxattr = BATADV_ATTR_MAX,
+	.netnsok = true,
+	.module = THIS_MODULE,
+	.ops = batadv_netlink_ops,
+	.n_ops = ARRAY_SIZE(batadv_netlink_ops),
+	.mcgrps = batadv_netlink_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps),
+};
+
 /**
  * batadv_netlink_register - register batadv genl netlink family
  */
@@ -616,9 +623,7 @@ void __init batadv_netlink_register(void)
 {
 	int ret;
 
-	ret = genl_register_family_with_ops_groups(&batadv_netlink_family,
-						   batadv_netlink_ops,
-						   batadv_netlink_mcgrps);
+	ret = genl_register_family(&batadv_netlink_family);
 	if (ret)
 		pr_warn("unable to register netlink family");
 }
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 3008d9c33875..063da8091aef 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -341,14 +341,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
 	mutex_unlock(&devlink_mutex);
 }
 
-static struct genl_family devlink_nl_family = {
-	.name		= DEVLINK_GENL_NAME,
-	.version	= DEVLINK_GENL_VERSION,
-	.maxattr	= DEVLINK_ATTR_MAX,
-	.netnsok	= true,
-	.pre_doit	= devlink_nl_pre_doit,
-	.post_doit	= devlink_nl_post_doit,
-};
+static struct genl_family devlink_nl_family;
 
 enum devlink_multicast_groups {
 	DEVLINK_MCGRP_CONFIG,
@@ -1619,6 +1612,20 @@ static const struct genl_ops devlink_nl_ops[] = {
 	},
 };
 
+static struct genl_family devlink_nl_family = {
+	.name		= DEVLINK_GENL_NAME,
+	.version	= DEVLINK_GENL_VERSION,
+	.maxattr	= DEVLINK_ATTR_MAX,
+	.netnsok	= true,
+	.pre_doit	= devlink_nl_pre_doit,
+	.post_doit	= devlink_nl_post_doit,
+	.module		= THIS_MODULE,
+	.ops		= devlink_nl_ops,
+	.n_ops		= ARRAY_SIZE(devlink_nl_ops),
+	.mcgrps		= devlink_nl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(devlink_nl_mcgrps),
+};
+
 /**
  *	devlink_alloc - Allocate new devlink instance resources
  *
@@ -1841,9 +1848,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister);
 
 static int __init devlink_module_init(void)
 {
-	return genl_register_family_with_ops_groups(&devlink_nl_family,
-						    devlink_nl_ops,
-						    devlink_nl_mcgrps);
+	return genl_register_family(&devlink_nl_family);
 }
 
 static void __exit devlink_module_exit(void)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index a5320dfcd978..80c002794ff6 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -59,11 +59,7 @@ struct dm_hw_stat_delta {
 	unsigned long last_drop_val;
 };
 
-static struct genl_family net_drop_monitor_family = {
-	.hdrsize        = 0,
-	.name           = "NET_DM",
-	.version        = 2,
-};
+static struct genl_family net_drop_monitor_family;
 
 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
 
@@ -350,6 +346,17 @@ static const struct genl_ops dropmon_ops[] = {
 	},
 };
 
+static struct genl_family net_drop_monitor_family = {
+	.hdrsize        = 0,
+	.name           = "NET_DM",
+	.version        = 2,
+	.module		= THIS_MODULE,
+	.ops		= dropmon_ops,
+	.n_ops		= ARRAY_SIZE(dropmon_ops),
+	.mcgrps		= dropmon_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(dropmon_mcgrps),
+};
+
 static struct notifier_block dropmon_net_notifier = {
 	.notifier_call = dropmon_net_event
 };
@@ -366,8 +373,7 @@ static int __init init_net_drop_monitor(void)
 		return -ENOSPC;
 	}
 
-	rc = genl_register_family_with_ops_groups(&net_drop_monitor_family,
-						  dropmon_ops, dropmon_mcgrps);
+	rc = genl_register_family(&net_drop_monitor_family);
 	if (rc) {
 		pr_err("Could not create drop monitor netlink family\n");
 		return rc;
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 2ad039492bee..aab34c7f6f89 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -131,12 +131,7 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = {
 	[HSR_A_IF2_SEQ] = { .type = NLA_U16 },
 };
 
-static struct genl_family hsr_genl_family = {
-	.hdrsize = 0,
-	.name = "HSR",
-	.version = 1,
-	.maxattr = HSR_A_MAX,
-};
+static struct genl_family hsr_genl_family;
 
 static const struct genl_multicast_group hsr_mcgrps[] = {
 	{ .name = "hsr-network", },
@@ -466,6 +461,18 @@ static const struct genl_ops hsr_ops[] = {
 	},
 };
 
+static struct genl_family hsr_genl_family = {
+	.hdrsize = 0,
+	.name = "HSR",
+	.version = 1,
+	.maxattr = HSR_A_MAX,
+	.module = THIS_MODULE,
+	.ops = hsr_ops,
+	.n_ops = ARRAY_SIZE(hsr_ops),
+	.mcgrps = hsr_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(hsr_mcgrps),
+};
+
 int __init hsr_netlink_init(void)
 {
 	int rc;
@@ -474,8 +481,7 @@ int __init hsr_netlink_init(void)
 	if (rc)
 		goto fail_rtnl_link_register;
 
-	rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops,
-						  hsr_mcgrps);
+	rc = genl_register_family(&hsr_genl_family);
 	if (rc)
 		goto fail_genl_register_family;
 
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 19144158b696..08e62470bac2 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -28,13 +28,6 @@
 static unsigned int ieee802154_seq_num;
 static DEFINE_SPINLOCK(ieee802154_seq_lock);
 
-struct genl_family nl802154_family = {
-	.hdrsize	= 0,
-	.name		= IEEE802154_NL_NAME,
-	.version	= 1,
-	.maxattr	= IEEE802154_ATTR_MAX,
-};
-
 /* Requests to userspace */
 struct sk_buff *ieee802154_nl_create(int flags, u8 req)
 {
@@ -138,11 +131,21 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = {
 	[IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, },
 };
 
+struct genl_family nl802154_family = {
+	.hdrsize	= 0,
+	.name		= IEEE802154_NL_NAME,
+	.version	= 1,
+	.maxattr	= IEEE802154_ATTR_MAX,
+	.module		= THIS_MODULE,
+	.ops		= ieee8021154_ops,
+	.n_ops		= ARRAY_SIZE(ieee8021154_ops),
+	.mcgrps		= ieee802154_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(ieee802154_mcgrps),
+};
+
 int __init ieee802154_nl_init(void)
 {
-	return genl_register_family_with_ops_groups(&nl802154_family,
-						    ieee8021154_ops,
-						    ieee802154_mcgrps);
+	return genl_register_family(&nl802154_family);
 }
 
 void ieee802154_nl_exit(void)
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 182299858f1d..f7e75578aedd 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -26,22 +26,8 @@
 #include "rdev-ops.h"
 #include "core.h"
 
-static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
-			     struct genl_info *info);
-
-static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
-			       struct genl_info *info);
-
 /* the netlink family */
-static struct genl_family nl802154_fam = {
-	.name = NL802154_GENL_NAME,	/* have users key off the name instead */
-	.hdrsize = 0,			/* no private header */
-	.version = 1,			/* no particular meaning now */
-	.maxattr = NL802154_ATTR_MAX,
-	.netnsok = true,
-	.pre_doit = nl802154_pre_doit,
-	.post_doit = nl802154_post_doit,
-};
+static struct genl_family nl802154_fam;
 
 /* multicast groups */
 enum nl802154_multicast_groups {
@@ -2476,11 +2462,25 @@ static const struct genl_ops nl802154_ops[] = {
 #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 };
 
+static struct genl_family nl802154_fam = {
+	.name = NL802154_GENL_NAME,	/* have users key off the name instead */
+	.hdrsize = 0,			/* no private header */
+	.version = 1,			/* no particular meaning now */
+	.maxattr = NL802154_ATTR_MAX,
+	.netnsok = true,
+	.pre_doit = nl802154_pre_doit,
+	.post_doit = nl802154_post_doit,
+	.module = THIS_MODULE,
+	.ops = nl802154_ops,
+	.n_ops = ARRAY_SIZE(nl802154_ops),
+	.mcgrps = nl802154_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(nl802154_mcgrps),
+};
+
 /* initialisation/exit functions */
 int nl802154_init(void)
 {
-	return genl_register_family_with_ops_groups(&nl802154_fam, nl802154_ops,
-						    nl802154_mcgrps);
+	return genl_register_family(&nl802154_fam);
 }
 
 void nl802154_exit(void)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index e3fc527c5d37..5b5226a2434f 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -622,13 +622,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
 	return err;
 }
 
-static struct genl_family fou_nl_family = {
-	.hdrsize	= 0,
-	.name		= FOU_GENL_NAME,
-	.version	= FOU_GENL_VERSION,
-	.maxattr	= FOU_ATTR_MAX,
-	.netnsok	= true,
-};
+static struct genl_family fou_nl_family;
 
 static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
 	[FOU_ATTR_PORT] = { .type = NLA_U16, },
@@ -830,6 +824,17 @@ static const struct genl_ops fou_nl_ops[] = {
 	},
 };
 
+static struct genl_family fou_nl_family = {
+	.hdrsize	= 0,
+	.name		= FOU_GENL_NAME,
+	.version	= FOU_GENL_VERSION,
+	.maxattr	= FOU_ATTR_MAX,
+	.netnsok	= true,
+	.module		= THIS_MODULE,
+	.ops		= fou_nl_ops,
+	.n_ops		= ARRAY_SIZE(fou_nl_ops),
+};
+
 size_t fou_encap_hlen(struct ip_tunnel_encap *e)
 {
 	return sizeof(struct udphdr);
@@ -1085,8 +1090,7 @@ static int __init fou_init(void)
 	if (ret)
 		goto exit;
 
-	ret = genl_register_family_with_ops(&fou_nl_family,
-					    fou_nl_ops);
+	ret = genl_register_family(&fou_nl_family);
 	if (ret < 0)
 		goto unregister;
 
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 3da305127b32..bba3c72c4a39 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -742,13 +742,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
 	rcu_read_unlock();
 }
 
-static struct genl_family tcp_metrics_nl_family = {
-	.hdrsize	= 0,
-	.name		= TCP_METRICS_GENL_NAME,
-	.version	= TCP_METRICS_GENL_VERSION,
-	.maxattr	= TCP_METRICS_ATTR_MAX,
-	.netnsok	= true,
-};
+static struct genl_family tcp_metrics_nl_family;
 
 static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
 	[TCP_METRICS_ATTR_ADDR_IPV4]	= { .type = NLA_U32, },
@@ -1115,6 +1109,17 @@ static const struct genl_ops tcp_metrics_nl_ops[] = {
 	},
 };
 
+static struct genl_family tcp_metrics_nl_family = {
+	.hdrsize	= 0,
+	.name		= TCP_METRICS_GENL_NAME,
+	.version	= TCP_METRICS_GENL_VERSION,
+	.maxattr	= TCP_METRICS_ATTR_MAX,
+	.netnsok	= true,
+	.module		= THIS_MODULE,
+	.ops		= tcp_metrics_nl_ops,
+	.n_ops		= ARRAY_SIZE(tcp_metrics_nl_ops),
+};
+
 static unsigned int tcpmhash_entries;
 static int __init set_tcpmhash_entries(char *str)
 {
@@ -1178,8 +1183,7 @@ void __init tcp_metrics_init(void)
 	if (ret < 0)
 		panic("Could not allocate the tcp_metrics hash table\n");
 
-	ret = genl_register_family_with_ops(&tcp_metrics_nl_family,
-					    tcp_metrics_nl_ops);
+	ret = genl_register_family(&tcp_metrics_nl_family);
 	if (ret < 0)
 		panic("Could not register tcp_metrics generic netlink\n");
 }
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 0d57e27d1cdd..97f7b0cc4675 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -118,14 +118,7 @@ static const struct rhashtable_params rht_params = {
 	.obj_cmpfn = ila_cmpfn,
 };
 
-static struct genl_family ila_nl_family = {
-	.hdrsize	= 0,
-	.name		= ILA_GENL_NAME,
-	.version	= ILA_GENL_VERSION,
-	.maxattr	= ILA_ATTR_MAX,
-	.netnsok	= true,
-	.parallel_ops	= true,
-};
+static struct genl_family ila_nl_family;
 
 static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
 	[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
@@ -560,6 +553,18 @@ static const struct genl_ops ila_nl_ops[] = {
 	},
 };
 
+static struct genl_family ila_nl_family = {
+	.hdrsize	= 0,
+	.name		= ILA_GENL_NAME,
+	.version	= ILA_GENL_VERSION,
+	.maxattr	= ILA_ATTR_MAX,
+	.netnsok	= true,
+	.parallel_ops	= true,
+	.module		= THIS_MODULE,
+	.ops		= ila_nl_ops,
+	.n_ops		= ARRAY_SIZE(ila_nl_ops),
+};
+
 #define ILA_HASH_TABLE_SIZE 1024
 
 static __net_init int ila_init_net(struct net *net)
@@ -630,8 +635,7 @@ int ila_xlat_init(void)
 	if (ret)
 		goto exit;
 
-	ret = genl_register_family_with_ops(&ila_nl_family,
-					    ila_nl_ops);
+	ret = genl_register_family(&ila_nl_family);
 	if (ret < 0)
 		goto unregister;
 
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index f23b81aa91fe..07877347c2f7 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -24,12 +24,7 @@
 
 
-static struct genl_family irda_nl_family = {
-	.name = IRDA_NL_NAME,
-	.hdrsize = 0,
-	.version = IRDA_NL_VERSION,
-	.maxattr = IRDA_NL_CMD_MAX,
-};
+static struct genl_family irda_nl_family;
 
 static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info)
 {
@@ -146,9 +141,19 @@ static const struct genl_ops irda_nl_ops[] = {
 
 };
 
+static struct genl_family irda_nl_family = {
+	.name = IRDA_NL_NAME,
+	.hdrsize = 0,
+	.version = IRDA_NL_VERSION,
+	.maxattr = IRDA_NL_CMD_MAX,
+	.module = THIS_MODULE,
+	.ops = irda_nl_ops,
+	.n_ops = ARRAY_SIZE(irda_nl_ops),
+};
+
 int irda_nl_register(void)
 {
-	return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops);
+	return genl_register_family(&irda_nl_family);
 }
 
 void irda_nl_unregister(void)
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 4fbf1f41ac52..e4e8c0769a6b 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -31,13 +31,7 @@
 #include "l2tp_core.h"
 
 
-static struct genl_family l2tp_nl_family = {
-	.name		= L2TP_GENL_NAME,
-	.version	= L2TP_GENL_VERSION,
-	.hdrsize	= 0,
-	.maxattr	= L2TP_ATTR_MAX,
-	.netnsok	= true,
-};
+static struct genl_family l2tp_nl_family;
 
 static const struct genl_multicast_group l2tp_multicast_group[] = {
 	{
@@ -976,6 +970,19 @@ static const struct genl_ops l2tp_nl_ops[] = {
 	},
 };
 
+static struct genl_family l2tp_nl_family = {
+	.name		= L2TP_GENL_NAME,
+	.version	= L2TP_GENL_VERSION,
+	.hdrsize	= 0,
+	.maxattr	= L2TP_ATTR_MAX,
+	.netnsok	= true,
+	.module		= THIS_MODULE,
+	.ops		= l2tp_nl_ops,
+	.n_ops		= ARRAY_SIZE(l2tp_nl_ops),
+	.mcgrps		= l2tp_multicast_group,
+	.n_mcgrps	= ARRAY_SIZE(l2tp_multicast_group),
+};
+
 int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
 {
 	int ret;
@@ -1012,9 +1019,7 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
 static int l2tp_nl_init(void)
 {
 	pr_info("L2TP netlink interface\n");
-	return genl_register_family_with_ops_groups(&l2tp_nl_family,
-						    l2tp_nl_ops,
-						    l2tp_multicast_group);
+	return genl_register_family(&l2tp_nl_family);
 }
 
 static void l2tp_nl_cleanup(void)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ceed66cdd03e..ea3e8aed063f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2840,13 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
  */
 
 /* IPVS genetlink family */
-static struct genl_family ip_vs_genl_family = {
-	.hdrsize	= 0,
-	.name		= IPVS_GENL_NAME,
-	.version	= IPVS_GENL_VERSION,
-	.maxattr	= IPVS_CMD_MAX,
-	.netnsok        = true,         /* Make ipvsadm to work on netns */
-};
+static struct genl_family ip_vs_genl_family;
 
 /* Policy used for first-level command attributes */
 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
@@ -3871,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = {
 	},
 };
 
+static struct genl_family ip_vs_genl_family = {
+	.hdrsize	= 0,
+	.name		= IPVS_GENL_NAME,
+	.version	= IPVS_GENL_VERSION,
+	.maxattr	= IPVS_CMD_MAX,
+	.netnsok        = true,         /* Make ipvsadm to work on netns */
+	.module		= THIS_MODULE,
+	.ops		= ip_vs_genl_ops,
+	.n_ops		= ARRAY_SIZE(ip_vs_genl_ops),
+};
+
 static int __init ip_vs_genl_register(void)
 {
-	return genl_register_family_with_ops(&ip_vs_genl_family,
-					     ip_vs_genl_ops);
+	return genl_register_family(&ip_vs_genl_family);
 }
 
 static void ip_vs_genl_unregister(void)
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index 152e503b8c5d..ca7c9c411a5c 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -60,12 +60,7 @@ struct netlbl_domhsh_walk_arg {
 };
 
 /* NetLabel Generic NETLINK CALIPSO family */
-static struct genl_family netlbl_calipso_gnl_family = {
-	.hdrsize = 0,
-	.name = NETLBL_NLTYPE_CALIPSO_NAME,
-	.version = NETLBL_PROTO_VERSION,
-	.maxattr = NLBL_CALIPSO_A_MAX,
-};
+static struct genl_family netlbl_calipso_gnl_family;
 
 /* NetLabel Netlink attribute policy */
 static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = {
@@ -354,6 +349,16 @@ static const struct genl_ops netlbl_calipso_ops[] = {
 	},
 };
 
+static struct genl_family netlbl_calipso_gnl_family = {
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_CALIPSO_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = NLBL_CALIPSO_A_MAX,
+	.module = THIS_MODULE,
+	.ops = netlbl_calipso_ops,
+	.n_ops = ARRAY_SIZE(netlbl_calipso_ops),
+};
+
 /* NetLabel Generic NETLINK Protocol Functions
  */
 
@@ -367,8 +372,7 @@ static const struct genl_ops netlbl_calipso_ops[] = {
  */
 int __init netlbl_calipso_genl_init(void)
 {
-	return genl_register_family_with_ops(&netlbl_calipso_gnl_family,
-					     netlbl_calipso_ops);
+	return genl_register_family(&netlbl_calipso_gnl_family);
 }
 
 static const struct netlbl_calipso_ops *calipso_ops;
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 755b284e7ad4..a665eae91245 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -59,13 +59,7 @@ struct netlbl_domhsh_walk_arg {
 };
 
 /* NetLabel Generic NETLINK CIPSOv4 family */
-static struct genl_family netlbl_cipsov4_gnl_family = {
-	.hdrsize = 0,
-	.name = NETLBL_NLTYPE_CIPSOV4_NAME,
-	.version = NETLBL_PROTO_VERSION,
-	.maxattr = NLBL_CIPSOV4_A_MAX,
-};
-
+static struct genl_family netlbl_cipsov4_gnl_family;
 /* NetLabel Netlink attribute policy */
 static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
 	[NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
@@ -766,6 +760,16 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
 	},
 };
 
+static struct genl_family netlbl_cipsov4_gnl_family = {
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_CIPSOV4_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = NLBL_CIPSOV4_A_MAX,
+	.module = THIS_MODULE,
+	.ops = netlbl_cipsov4_ops,
+	.n_ops = ARRAY_SIZE(netlbl_cipsov4_ops),
+};
+
 /*
  * NetLabel Generic NETLINK Protocol Functions
  */
@@ -780,6 +784,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
  */
 int __init netlbl_cipsov4_genl_init(void)
 {
-	return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family,
-					     netlbl_cipsov4_ops);
+	return genl_register_family(&netlbl_cipsov4_gnl_family);
 }
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 3b00f2368fcd..ecfe8eb149db 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -60,12 +60,7 @@ struct netlbl_domhsh_walk_arg {
 };
 
 /* NetLabel Generic NETLINK CIPSOv4 family */
-static struct genl_family netlbl_mgmt_gnl_family = {
-	.hdrsize = 0,
-	.name = NETLBL_NLTYPE_MGMT_NAME,
-	.version = NETLBL_PROTO_VERSION,
-	.maxattr = NLBL_MGMT_A_MAX,
-};
+static struct genl_family netlbl_mgmt_gnl_family;
 
 /* NetLabel Netlink attribute policy */
 static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
@@ -833,6 +828,16 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
 	},
 };
 
+static struct genl_family netlbl_mgmt_gnl_family = {
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_MGMT_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = NLBL_MGMT_A_MAX,
+	.module = THIS_MODULE,
+	.ops = netlbl_mgmt_genl_ops,
+	.n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops),
+};
+
 /*
  * NetLabel Generic NETLINK Protocol Functions
  */
@@ -847,6 +852,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
  */
 int __init netlbl_mgmt_genl_init(void)
 {
-	return genl_register_family_with_ops(&netlbl_mgmt_gnl_family,
-					     netlbl_mgmt_genl_ops);
+	return genl_register_family(&netlbl_mgmt_gnl_family);
 }
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index c2ea8d1f653a..5dbbad41114f 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -123,12 +123,7 @@ static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def;
 static u8 netlabel_unlabel_acceptflg;
 
 /* NetLabel Generic NETLINK unlabeled family */
-static struct genl_family netlbl_unlabel_gnl_family = {
-	.hdrsize = 0,
-	.name = NETLBL_NLTYPE_UNLABELED_NAME,
-	.version = NETLBL_PROTO_VERSION,
-	.maxattr = NLBL_UNLABEL_A_MAX,
-};
+static struct genl_family netlbl_unlabel_gnl_family;
 
 /* NetLabel Netlink attribute policy */
 static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
@@ -1377,6 +1372,16 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
 	},
 };
 
+static struct genl_family netlbl_unlabel_gnl_family = {
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_UNLABELED_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = NLBL_UNLABEL_A_MAX,
+	.module = THIS_MODULE,
+	.ops = netlbl_unlabel_genl_ops,
+	.n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops),
+};
+
 /*
  * NetLabel Generic NETLINK Protocol Functions
  */
@@ -1391,8 +1396,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
  */
 int __init netlbl_unlabel_genl_init(void)
 {
-	return genl_register_family_with_ops(&netlbl_unlabel_gnl_family,
-					     netlbl_unlabel_genl_ops);
+	return genl_register_family(&netlbl_unlabel_gnl_family);
 }
 
 /*
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f19ec969edee..ca582ee4ae05 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -344,18 +344,18 @@ static int genl_validate_ops(const struct genl_family *family)
 }
 
 /**
- * __genl_register_family - register a generic netlink family
+ * genl_register_family - register a generic netlink family
  * @family: generic netlink family
  *
  * Registers the specified family after validating it first. Only one
  * family may be registered with the same family name or identifier.
  *
- * The family's ops array must already be assigned, you can use the
- * genl_register_family_with_ops() helper function.
+ * The family's ops, multicast groups and module pointer must already
+ * be assigned.
  *
  * Return 0 on success or a negative error code.
  */
-int __genl_register_family(struct genl_family *family)
+int genl_register_family(struct genl_family *family)
 {
 	int err, i;
 
@@ -429,7 +429,7 @@ errout_locked:
 	genl_unlock_all();
 	return err;
 }
-EXPORT_SYMBOL(__genl_register_family);
+EXPORT_SYMBOL(genl_register_family);
 
 /**
  * genl_unregister_family - unregister generic netlink family
@@ -452,7 +452,6 @@ int genl_unregister_family(struct genl_family *family)
 		genl_unregister_mc_groups(family);
 
 		list_del(&rc->family_list);
-		family->n_ops = 0;
 		up_write(&cb_lock);
 		wait_event(genl_sk_destructing_waitq,
 			   atomic_read(&genl_sk_destructing_cnt) == 0);
@@ -681,13 +680,7 @@ static void genl_rcv(struct sk_buff *skb)
  * Controller
  **************************************************************************/
 
-static struct genl_family genl_ctrl = {
-	.id = GENL_ID_CTRL,
-	.name = "nlctrl",
-	.version = 0x2,
-	.maxattr = CTRL_ATTR_MAX,
-	.netnsok = true,
-};
+static struct genl_family genl_ctrl;
 
 static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
 			  u32 flags, struct sk_buff *skb, u8 cmd)
@@ -997,6 +990,19 @@ static const struct genl_multicast_group genl_ctrl_groups[] = {
 	{ .name = "notify", },
 };
 
+static struct genl_family genl_ctrl = {
+	.module = THIS_MODULE,
+	.ops = genl_ctrl_ops,
+	.n_ops = ARRAY_SIZE(genl_ctrl_ops),
+	.mcgrps = genl_ctrl_groups,
+	.n_mcgrps = ARRAY_SIZE(genl_ctrl_groups),
+	.id = GENL_ID_CTRL,
+	.name = "nlctrl",
+	.version = 0x2,
+	.maxattr = CTRL_ATTR_MAX,
+	.netnsok = true,
+};
+
 static int genl_bind(struct net *net, int group)
 {
 	int i, err = -ENOENT;
@@ -1086,8 +1092,7 @@ static int __init genl_init(void)
 	for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
 		INIT_LIST_HEAD(&family_ht[i]);
 
-	err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops,
-						   genl_ctrl_groups);
+	err = genl_register_family(&genl_ctrl);
 	if (err < 0)
 		goto problem;
 
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index c230403e066c..450b1e5144cc 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -38,13 +38,7 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = {
 	{ .name = NFC_GENL_MCAST_EVENT_NAME, },
 };
 
-static struct genl_family nfc_genl_family = {
-	.hdrsize = 0,
-	.name = NFC_GENL_NAME,
-	.version = NFC_GENL_VERSION,
-	.maxattr = NFC_ATTR_MAX,
-};
-
+static struct genl_family nfc_genl_family;
 static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
 	[NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 },
 	[NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
@@ -1752,6 +1746,18 @@ static const struct genl_ops nfc_genl_ops[] = {
 	},
 };
 
+static struct genl_family nfc_genl_family = {
+	.hdrsize = 0,
+	.name = NFC_GENL_NAME,
+	.version = NFC_GENL_VERSION,
+	.maxattr = NFC_ATTR_MAX,
+	.module = THIS_MODULE,
+	.ops = nfc_genl_ops,
+	.n_ops = ARRAY_SIZE(nfc_genl_ops),
+	.mcgrps = nfc_genl_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps),
+};
+
 
 struct urelease_work {
 	struct	work_struct w;
@@ -1837,9 +1843,7 @@ int __init nfc_genl_init(void)
 {
 	int rc;
 
-	rc = genl_register_family_with_ops_groups(&nfc_genl_family,
-						  nfc_genl_ops,
-						  nfc_genl_mcgrps);
+	rc = genl_register_family(&nfc_genl_family);
 	if (rc)
 		return rc;
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f9fef7dfba15..ad6a111a0014 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -679,6 +679,7 @@ static struct genl_family dp_packet_genl_family = {
 	.parallel_ops = true,
 	.ops = dp_packet_genl_ops,
 	.n_ops = ARRAY_SIZE(dp_packet_genl_ops),
+	.module = THIS_MODULE,
 };
 
 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
@@ -1445,6 +1446,7 @@ static struct genl_family dp_flow_genl_family = {
 	.n_ops = ARRAY_SIZE(dp_flow_genl_ops),
 	.mcgrps = &ovs_dp_flow_multicast_group,
 	.n_mcgrps = 1,
+	.module = THIS_MODULE,
 };
 
 static size_t ovs_dp_cmd_msg_size(void)
@@ -1830,6 +1832,7 @@ static struct genl_family dp_datapath_genl_family = {
 	.n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
 	.mcgrps = &ovs_dp_datapath_multicast_group,
 	.n_mcgrps = 1,
+	.module = THIS_MODULE,
 };
 
 /* Called with ovs_mutex or RCU read lock. */
@@ -2251,6 +2254,7 @@ struct genl_family dp_vport_genl_family = {
 	.n_ops = ARRAY_SIZE(dp_vport_genl_ops),
 	.mcgrps = &ovs_dp_vport_multicast_group,
 	.n_mcgrps = 1,
+	.module = THIS_MODULE,
 };
 
 static struct genl_family * const dp_genl_families[] = {
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 383b8fedabc7..74a405bf107b 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -135,14 +135,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
 /* Users of the legacy API (tipc-config) can't handle that we add operations,
  * so we have a separate genl handling for the new API.
  */
-struct genl_family tipc_genl_family = {
-	.name		= TIPC_GENL_V2_NAME,
-	.version	= TIPC_GENL_V2_VERSION,
-	.hdrsize	= 0,
-	.maxattr	= TIPC_NLA_MAX,
-	.netnsok	= true,
-};
-
 static const struct genl_ops tipc_genl_v2_ops[] = {
 	{
 		.cmd	= TIPC_NL_BEARER_DISABLE,
@@ -257,6 +249,17 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 #endif
 };
 
+struct genl_family tipc_genl_family = {
+	.name		= TIPC_GENL_V2_NAME,
+	.version	= TIPC_GENL_V2_VERSION,
+	.hdrsize	= 0,
+	.maxattr	= TIPC_NLA_MAX,
+	.netnsok	= true,
+	.module		= THIS_MODULE,
+	.ops		= tipc_genl_v2_ops,
+	.n_ops		= ARRAY_SIZE(tipc_genl_v2_ops),
+};
+
 int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
 {
 	u32 maxattr = tipc_genl_family.maxattr;
@@ -272,8 +275,7 @@ int tipc_netlink_start(void)
 {
 	int res;
 
-	res = genl_register_family_with_ops(&tipc_genl_family,
-					    tipc_genl_v2_ops);
+	res = genl_register_family(&tipc_genl_family);
 	if (res) {
 		pr_err("Failed to register netlink interface\n");
 		return res;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index f04428e4c8e5..07b19931e458 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1215,27 +1215,29 @@ send:
 	return err;
 }
 
+static struct genl_ops tipc_genl_compat_ops[] = {
+	{
+		.cmd		= TIPC_GENL_CMD,
+		.doit		= tipc_nl_compat_recv,
+	},
+};
+
 static struct genl_family tipc_genl_compat_family = {
 	.name		= TIPC_GENL_NAME,
 	.version	= TIPC_GENL_VERSION,
 	.hdrsize	= TIPC_GENL_HDRLEN,
 	.maxattr	= 0,
 	.netnsok	= true,
-};
-
-static struct genl_ops tipc_genl_compat_ops[] = {
-	{
-		.cmd		= TIPC_GENL_CMD,
-		.doit		= tipc_nl_compat_recv,
-	},
+	.module		= THIS_MODULE,
+	.ops		= tipc_genl_compat_ops,
+	.n_ops		= ARRAY_SIZE(tipc_genl_compat_ops),
 };
 
 int tipc_netlink_compat_start(void)
 {
 	int res;
 
-	res = genl_register_family_with_ops(&tipc_genl_compat_family,
-					    tipc_genl_compat_ops);
+	res = genl_register_family(&tipc_genl_compat_family);
 	if (res) {
 		pr_err("Failed to register legacy compat interface\n");
 		return res;
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 8ac83a41585f..587e1627681f 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -572,15 +572,20 @@ struct d_level D_LEVEL[] = {
 size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
 
 
+static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
+	{ .name = "msg", },
+};
+
 struct genl_family wimax_gnl_family = {
 	.name = "WiMAX",
 	.version = WIMAX_GNL_VERSION,
 	.hdrsize = 0,
 	.maxattr = WIMAX_GNL_ATTR_MAX,
-};
-
-static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
-	{ .name = "msg", },
+	.module = THIS_MODULE,
+	.ops = wimax_gnl_ops,
+	.n_ops = ARRAY_SIZE(wimax_gnl_ops),
+	.mcgrps = wimax_gnl_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps),
 };
 
 
@@ -595,11 +600,7 @@ int __init wimax_subsys_init(void)
 	d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params,
 		       "wimax.debug");
 
-	snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name),
-		 "WiMAX");
-	result = genl_register_family_with_ops_groups(&wimax_gnl_family,
-						      wimax_gnl_ops,
-						      wimax_gnl_mcgrps);
+	result = genl_register_family(&wimax_gnl_family);
 	if (unlikely(result < 0)) {
 		pr_err("cannot register generic netlink family: %d\n", result);
 		goto error_register_family;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 714beafe05e0..8e5ca3c47593 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -32,21 +32,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
 				   struct cfg80211_crypto_settings *settings,
 				   int cipher_limit);
 
-static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
-			    struct genl_info *info);
-static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
-			      struct genl_info *info);
-
 /* the netlink family */
-static struct genl_family nl80211_fam = {
-	.name = NL80211_GENL_NAME,	/* have users key off the name instead */
-	.hdrsize = 0,			/* no private header */
-	.version = 1,			/* no particular meaning now */
-	.maxattr = NL80211_ATTR_MAX,
-	.netnsok = true,
-	.pre_doit = nl80211_pre_doit,
-	.post_doit = nl80211_post_doit,
-};
+static struct genl_family nl80211_fam;
 
 /* multicast groups */
 enum nl80211_multicast_groups {
@@ -12599,6 +12586,21 @@ static const struct genl_ops nl80211_ops[] = {
 	},
 };
 
+static struct genl_family nl80211_fam = {
+	.name = NL80211_GENL_NAME,	/* have users key off the name instead */
+	.hdrsize = 0,			/* no private header */
+	.version = 1,			/* no particular meaning now */
+	.maxattr = NL80211_ATTR_MAX,
+	.netnsok = true,
+	.pre_doit = nl80211_pre_doit,
+	.post_doit = nl80211_post_doit,
+	.module = THIS_MODULE,
+	.ops = nl80211_ops,
+	.n_ops = ARRAY_SIZE(nl80211_ops),
+	.mcgrps = nl80211_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(nl80211_mcgrps),
+};
+
 /* notification functions */
 
 void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
@@ -14565,8 +14567,7 @@ int nl80211_init(void)
 {
 	int err;
 
-	err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops,
-						   nl80211_mcgrps);
+	err = genl_register_family(&nl80211_fam);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3


From 56989f6d8568c21257dcec0f5e644d5570ba3281 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 24 Oct 2016 14:40:05 +0200
Subject: genetlink: mark families as __ro_after_init

Now genl_register_family() is the only thing (other than the
users themselves, perhaps, but I didn't find any doing that)
writing to the family struct.

In all families that I found, genl_register_family() is only
called from __init functions (some indirectly, in which case
I've add __init annotations to clarifly things), so all can
actually be marked __ro_after_init.

This protects the data structure from accidental corruption.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/acpi/event.c                  |  4 ++--
 drivers/net/gtp.c                     |  2 +-
 drivers/net/macsec.c                  |  2 +-
 drivers/net/team/team.c               |  4 ++--
 drivers/net/wireless/mac80211_hwsim.c |  4 ++--
 drivers/scsi/pmcraid.c                |  4 ++--
 drivers/target/target_core_user.c     |  2 +-
 drivers/thermal/thermal_core.c        |  4 ++--
 fs/dlm/netlink.c                      |  2 +-
 fs/quota/netlink.c                    |  2 +-
 include/linux/genl_magic_func.h       |  2 +-
 kernel/taskstats.c                    |  2 +-
 net/batman-adv/netlink.c              |  2 +-
 net/core/devlink.c                    |  2 +-
 net/core/drop_monitor.c               |  2 +-
 net/hsr/hsr_netlink.c                 |  2 +-
 net/ieee802154/netlink.c              |  2 +-
 net/ieee802154/nl802154.c             |  4 ++--
 net/ipv4/fou.c                        |  2 +-
 net/ipv4/tcp_metrics.c                |  2 +-
 net/ipv6/ila/ila_xlat.c               |  4 ++--
 net/irda/irnetlink.c                  |  4 ++--
 net/l2tp/l2tp_netlink.c               |  4 ++--
 net/netfilter/ipvs/ip_vs_ctl.c        |  2 +-
 net/netlabel/netlabel_calipso.c       |  2 +-
 net/netlabel/netlabel_cipso_v4.c      |  2 +-
 net/netlabel/netlabel_mgmt.c          |  2 +-
 net/netlabel/netlabel_unlabeled.c     |  2 +-
 net/netlink/genetlink.c               |  2 +-
 net/nfc/netlink.c                     |  2 +-
 net/openvswitch/datapath.c            | 10 +++++-----
 net/tipc/netlink.c                    |  4 ++--
 net/tipc/netlink_compat.c             |  4 ++--
 net/wimax/stack.c                     |  2 +-
 net/wireless/nl80211.c                |  4 ++--
 35 files changed, 51 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index 1ab12ad7d5ba..7fceb3b4691b 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -82,7 +82,7 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = {
 	{ .name = ACPI_GENL_MCAST_GROUP_NAME, },
 };
 
-static struct genl_family acpi_event_genl_family = {
+static struct genl_family acpi_event_genl_family __ro_after_init = {
 	.module = THIS_MODULE,
 	.name = ACPI_GENL_FAMILY_NAME,
 	.version = ACPI_GENL_VERSION,
@@ -144,7 +144,7 @@ int acpi_bus_generate_netlink_event(const char *device_class,
 
 EXPORT_SYMBOL(acpi_bus_generate_netlink_event);
 
-static int acpi_event_genetlink_init(void)
+static int __init acpi_event_genetlink_init(void)
 {
 	return genl_register_family(&acpi_event_genl_family);
 }
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 0604fd78f826..719d19f35673 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1290,7 +1290,7 @@ static const struct genl_ops gtp_genl_ops[] = {
 	},
 };
 
-static struct genl_family gtp_genl_family = {
+static struct genl_family gtp_genl_family __ro_after_init = {
 	.name		= "gtp",
 	.version	= 0,
 	.hdrsize	= 0,
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 63ca7a3c77cf..0a715ab9d9cc 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2648,7 +2648,7 @@ static const struct genl_ops macsec_genl_ops[] = {
 	},
 };
 
-static struct genl_family macsec_fam = {
+static struct genl_family macsec_fam __ro_after_init = {
 	.name		= MACSEC_GENL_NAME,
 	.hdrsize	= 0,
 	.version	= MACSEC_GENL_VERSION,
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 46bf7c1216c0..bdc58567d10e 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2740,7 +2740,7 @@ static const struct genl_multicast_group team_nl_mcgrps[] = {
 	{ .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, },
 };
 
-static struct genl_family team_nl_family = {
+static struct genl_family team_nl_family __ro_after_init = {
 	.name		= TEAM_GENL_NAME,
 	.version	= TEAM_GENL_VERSION,
 	.maxattr	= TEAM_ATTR_MAX,
@@ -2773,7 +2773,7 @@ static int team_nl_send_event_port_get(struct team *team,
 					  port);
 }
 
-static int team_nl_init(void)
+static int __init team_nl_init(void)
 {
 	return genl_register_family(&team_nl_family);
 }
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 5d4637e586e8..220e9dc8ccf8 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3228,7 +3228,7 @@ static const struct genl_ops hwsim_ops[] = {
 	},
 };
 
-static struct genl_family hwsim_genl_family = {
+static struct genl_family hwsim_genl_family __ro_after_init = {
 	.name = "MAC80211_HWSIM",
 	.version = 1,
 	.maxattr = HWSIM_ATTR_MAX,
@@ -3287,7 +3287,7 @@ static struct notifier_block hwsim_netlink_notifier = {
 	.notifier_call = mac80211_hwsim_netlink_notify,
 };
 
-static int hwsim_init_netlink(void)
+static int __init hwsim_init_netlink(void)
 {
 	int rc;
 
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index c0ab7bb8c3ce..845affa112f7 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -1368,7 +1368,7 @@ static struct genl_multicast_group pmcraid_mcgrps[] = {
 	{ .name = "events", /* not really used - see ID discussion below */ },
 };
 
-static struct genl_family pmcraid_event_family = {
+static struct genl_family pmcraid_event_family __ro_after_init = {
 	.module = THIS_MODULE,
 	.name = "pmcraid",
 	.version = 1,
@@ -1384,7 +1384,7 @@ static struct genl_family pmcraid_event_family = {
  *	0 if the pmcraid_event_family is successfully registered
  *	with netlink generic, non-zero otherwise
  */
-static int pmcraid_netlink_init(void)
+static int __init pmcraid_netlink_init(void)
 {
 	int result;
 
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 3483372f5562..0f173bf7dbac 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -147,7 +147,7 @@ static const struct genl_multicast_group tcmu_mcgrps[] = {
 };
 
 /* Our generic netlink family */
-static struct genl_family tcmu_genl_family = {
+static struct genl_family tcmu_genl_family __ro_after_init = {
 	.module = THIS_MODULE,
 	.hdrsize = 0,
 	.name = "TCM-USER",
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 93b6caab2d9f..911fd964c742 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -2163,7 +2163,7 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = {
 	{ .name = THERMAL_GENL_MCAST_GROUP_NAME, },
 };
 
-static struct genl_family thermal_event_genl_family = {
+static struct genl_family thermal_event_genl_family __ro_after_init = {
 	.module = THIS_MODULE,
 	.name = THERMAL_GENL_FAMILY_NAME,
 	.version = THERMAL_GENL_VERSION,
@@ -2235,7 +2235,7 @@ int thermal_generate_netlink_event(struct thermal_zone_device *tz,
 }
 EXPORT_SYMBOL_GPL(thermal_generate_netlink_event);
 
-static int genetlink_init(void)
+static int __init genetlink_init(void)
 {
 	return genl_register_family(&thermal_event_genl_family);
 }
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 04042d69573c..0643ae44f342 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -72,7 +72,7 @@ static struct genl_ops dlm_nl_ops[] = {
 	},
 };
 
-static struct genl_family family = {
+static struct genl_family family __ro_after_init = {
 	.name		= DLM_GENL_NAME,
 	.version	= DLM_GENL_VERSION,
 	.ops		= dlm_nl_ops,
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index 9457c7b0dfa2..e99b1a72d9a7 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -12,7 +12,7 @@ static const struct genl_multicast_group quota_mcgrps[] = {
 };
 
 /* Netlink family structure for quota */
-static struct genl_family quota_genl_family = {
+static struct genl_family quota_genl_family __ro_after_init = {
 	.module = THIS_MODULE,
 	.hdrsize = 0,
 	.name = "VFS_DQUOT",
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 40c2e39362c8..377257d8f7e3 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -293,7 +293,7 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
 #undef GENL_mc_group
 #define GENL_mc_group(group)
 
-static struct genl_family ZZZ_genl_family __read_mostly = {
+static struct genl_family ZZZ_genl_family __ro_after_init = {
 	.name = __stringify(GENL_MAGIC_FAMILY),
 	.version = GENL_MAGIC_VERSION,
 #ifdef GENL_MAGIC_FAMILY_HDRSZ
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4075ece592f2..9b7f838511ce 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -646,7 +646,7 @@ static const struct genl_ops taskstats_ops[] = {
 	},
 };
 
-static struct genl_family family = {
+static struct genl_family family __ro_after_init = {
 	.name		= TASKSTATS_GENL_NAME,
 	.version	= TASKSTATS_GENL_VERSION,
 	.maxattr	= TASKSTATS_CMD_ATTR_MAX,
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index e28cec34a016..005012ba9b48 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -603,7 +603,7 @@ static struct genl_ops batadv_netlink_ops[] = {
 
 };
 
-struct genl_family batadv_netlink_family = {
+struct genl_family batadv_netlink_family __ro_after_init = {
 	.hdrsize = 0,
 	.name = BATADV_NL_NAME,
 	.version = 1,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 063da8091aef..c14f8b661db9 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1612,7 +1612,7 @@ static const struct genl_ops devlink_nl_ops[] = {
 	},
 };
 
-static struct genl_family devlink_nl_family = {
+static struct genl_family devlink_nl_family __ro_after_init = {
 	.name		= DEVLINK_GENL_NAME,
 	.version	= DEVLINK_GENL_VERSION,
 	.maxattr	= DEVLINK_ATTR_MAX,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 80c002794ff6..8e0c0635ee97 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -346,7 +346,7 @@ static const struct genl_ops dropmon_ops[] = {
 	},
 };
 
-static struct genl_family net_drop_monitor_family = {
+static struct genl_family net_drop_monitor_family __ro_after_init = {
 	.hdrsize        = 0,
 	.name           = "NET_DM",
 	.version        = 2,
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index aab34c7f6f89..1ab30e7d3f99 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -461,7 +461,7 @@ static const struct genl_ops hsr_ops[] = {
 	},
 };
 
-static struct genl_family hsr_genl_family = {
+static struct genl_family hsr_genl_family __ro_after_init = {
 	.hdrsize = 0,
 	.name = "HSR",
 	.version = 1,
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 08e62470bac2..6bde9e5a5503 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -131,7 +131,7 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = {
 	[IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, },
 };
 
-struct genl_family nl802154_family = {
+struct genl_family nl802154_family __ro_after_init = {
 	.hdrsize	= 0,
 	.name		= IEEE802154_NL_NAME,
 	.version	= 1,
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index f7e75578aedd..fc60cd061f39 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -2462,7 +2462,7 @@ static const struct genl_ops nl802154_ops[] = {
 #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 };
 
-static struct genl_family nl802154_fam = {
+static struct genl_family nl802154_fam __ro_after_init = {
 	.name = NL802154_GENL_NAME,	/* have users key off the name instead */
 	.hdrsize = 0,			/* no private header */
 	.version = 1,			/* no particular meaning now */
@@ -2478,7 +2478,7 @@ static struct genl_family nl802154_fam = {
 };
 
 /* initialisation/exit functions */
-int nl802154_init(void)
+int __init nl802154_init(void)
 {
 	return genl_register_family(&nl802154_fam);
 }
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 5b5226a2434f..6cb57bb8692d 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -824,7 +824,7 @@ static const struct genl_ops fou_nl_ops[] = {
 	},
 };
 
-static struct genl_family fou_nl_family = {
+static struct genl_family fou_nl_family __ro_after_init = {
 	.hdrsize	= 0,
 	.name		= FOU_GENL_NAME,
 	.version	= FOU_GENL_VERSION,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index bba3c72c4a39..d46f4d5b1c62 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1109,7 +1109,7 @@ static const struct genl_ops tcp_metrics_nl_ops[] = {
 	},
 };
 
-static struct genl_family tcp_metrics_nl_family = {
+static struct genl_family tcp_metrics_nl_family __ro_after_init = {
 	.hdrsize	= 0,
 	.name		= TCP_METRICS_GENL_NAME,
 	.version	= TCP_METRICS_GENL_VERSION,
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 97f7b0cc4675..628ae6d85b59 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -553,7 +553,7 @@ static const struct genl_ops ila_nl_ops[] = {
 	},
 };
 
-static struct genl_family ila_nl_family = {
+static struct genl_family ila_nl_family __ro_after_init = {
 	.hdrsize	= 0,
 	.name		= ILA_GENL_NAME,
 	.version	= ILA_GENL_VERSION,
@@ -627,7 +627,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
 	return 0;
 }
 
-int ila_xlat_init(void)
+int __init ila_xlat_init(void)
 {
 	int ret;
 
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index 07877347c2f7..7fc340e574cf 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -141,7 +141,7 @@ static const struct genl_ops irda_nl_ops[] = {
 
 };
 
-static struct genl_family irda_nl_family = {
+static struct genl_family irda_nl_family __ro_after_init = {
 	.name = IRDA_NL_NAME,
 	.hdrsize = 0,
 	.version = IRDA_NL_VERSION,
@@ -151,7 +151,7 @@ static struct genl_family irda_nl_family = {
 	.n_ops = ARRAY_SIZE(irda_nl_ops),
 };
 
-int irda_nl_register(void)
+int __init irda_nl_register(void)
 {
 	return genl_register_family(&irda_nl_family);
 }
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index e4e8c0769a6b..59aa2d204e4a 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -970,7 +970,7 @@ static const struct genl_ops l2tp_nl_ops[] = {
 	},
 };
 
-static struct genl_family l2tp_nl_family = {
+static struct genl_family l2tp_nl_family __ro_after_init = {
 	.name		= L2TP_GENL_NAME,
 	.version	= L2TP_GENL_VERSION,
 	.hdrsize	= 0,
@@ -1016,7 +1016,7 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
 }
 EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
 
-static int l2tp_nl_init(void)
+static int __init l2tp_nl_init(void)
 {
 	pr_info("L2TP netlink interface\n");
 	return genl_register_family(&l2tp_nl_family);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ea3e8aed063f..6b85ded4f91d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3865,7 +3865,7 @@ static const struct genl_ops ip_vs_genl_ops[] = {
 	},
 };
 
-static struct genl_family ip_vs_genl_family = {
+static struct genl_family ip_vs_genl_family __ro_after_init = {
 	.hdrsize	= 0,
 	.name		= IPVS_GENL_NAME,
 	.version	= IPVS_GENL_VERSION,
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index ca7c9c411a5c..d177dd066504 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -349,7 +349,7 @@ static const struct genl_ops netlbl_calipso_ops[] = {
 	},
 };
 
-static struct genl_family netlbl_calipso_gnl_family = {
+static struct genl_family netlbl_calipso_gnl_family __ro_after_init = {
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_CALIPSO_NAME,
 	.version = NETLBL_PROTO_VERSION,
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index a665eae91245..4149d3e63589 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -760,7 +760,7 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
 	},
 };
 
-static struct genl_family netlbl_cipsov4_gnl_family = {
+static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = {
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_CIPSOV4_NAME,
 	.version = NETLBL_PROTO_VERSION,
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index ecfe8eb149db..21e0095b1d14 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -828,7 +828,7 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
 	},
 };
 
-static struct genl_family netlbl_mgmt_gnl_family = {
+static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = {
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_MGMT_NAME,
 	.version = NETLBL_PROTO_VERSION,
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 5dbbad41114f..22dc1b9d6362 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1372,7 +1372,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
 	},
 };
 
-static struct genl_family netlbl_unlabel_gnl_family = {
+static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = {
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_UNLABELED_NAME,
 	.version = NETLBL_PROTO_VERSION,
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 85659921e7b2..df0cbcddda2c 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -936,7 +936,7 @@ static const struct genl_multicast_group genl_ctrl_groups[] = {
 	{ .name = "notify", },
 };
 
-static struct genl_family genl_ctrl = {
+static struct genl_family genl_ctrl __ro_after_init = {
 	.module = THIS_MODULE,
 	.ops = genl_ctrl_ops,
 	.n_ops = ARRAY_SIZE(genl_ctrl_ops),
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 450b1e5144cc..03f3d5c7beb8 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1746,7 +1746,7 @@ static const struct genl_ops nfc_genl_ops[] = {
 	},
 };
 
-static struct genl_family nfc_genl_family = {
+static struct genl_family nfc_genl_family __ro_after_init = {
 	.hdrsize = 0,
 	.name = NFC_GENL_NAME,
 	.version = NFC_GENL_VERSION,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ad6a111a0014..fa8760176b7d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -670,7 +670,7 @@ static const struct genl_ops dp_packet_genl_ops[] = {
 	}
 };
 
-static struct genl_family dp_packet_genl_family = {
+static struct genl_family dp_packet_genl_family __ro_after_init = {
 	.hdrsize = sizeof(struct ovs_header),
 	.name = OVS_PACKET_FAMILY,
 	.version = OVS_PACKET_VERSION,
@@ -1435,7 +1435,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
 	},
 };
 
-static struct genl_family dp_flow_genl_family = {
+static struct genl_family dp_flow_genl_family __ro_after_init = {
 	.hdrsize = sizeof(struct ovs_header),
 	.name = OVS_FLOW_FAMILY,
 	.version = OVS_FLOW_VERSION,
@@ -1821,7 +1821,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
 	},
 };
 
-static struct genl_family dp_datapath_genl_family = {
+static struct genl_family dp_datapath_genl_family __ro_after_init = {
 	.hdrsize = sizeof(struct ovs_header),
 	.name = OVS_DATAPATH_FAMILY,
 	.version = OVS_DATAPATH_VERSION,
@@ -2243,7 +2243,7 @@ static const struct genl_ops dp_vport_genl_ops[] = {
 	},
 };
 
-struct genl_family dp_vport_genl_family = {
+struct genl_family dp_vport_genl_family __ro_after_init = {
 	.hdrsize = sizeof(struct ovs_header),
 	.name = OVS_VPORT_FAMILY,
 	.version = OVS_VPORT_VERSION,
@@ -2272,7 +2272,7 @@ static void dp_unregister_genl(int n_families)
 		genl_unregister_family(dp_genl_families[i]);
 }
 
-static int dp_register_genl(void)
+static int __init dp_register_genl(void)
 {
 	int err;
 	int i;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 74a405bf107b..26ca8dd64ded 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -249,7 +249,7 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 #endif
 };
 
-struct genl_family tipc_genl_family = {
+struct genl_family tipc_genl_family __ro_after_init = {
 	.name		= TIPC_GENL_V2_NAME,
 	.version	= TIPC_GENL_V2_VERSION,
 	.hdrsize	= 0,
@@ -271,7 +271,7 @@ int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
 	return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy);
 }
 
-int tipc_netlink_start(void)
+int __init tipc_netlink_start(void)
 {
 	int res;
 
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 07b19931e458..e1ae8a8a2b8e 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1222,7 +1222,7 @@ static struct genl_ops tipc_genl_compat_ops[] = {
 	},
 };
 
-static struct genl_family tipc_genl_compat_family = {
+static struct genl_family tipc_genl_compat_family __ro_after_init = {
 	.name		= TIPC_GENL_NAME,
 	.version	= TIPC_GENL_VERSION,
 	.hdrsize	= TIPC_GENL_HDRLEN,
@@ -1233,7 +1233,7 @@ static struct genl_family tipc_genl_compat_family = {
 	.n_ops		= ARRAY_SIZE(tipc_genl_compat_ops),
 };
 
-int tipc_netlink_compat_start(void)
+int __init tipc_netlink_compat_start(void)
 {
 	int res;
 
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 587e1627681f..5db731512014 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -576,7 +576,7 @@ static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
 	{ .name = "msg", },
 };
 
-struct genl_family wimax_gnl_family = {
+struct genl_family wimax_gnl_family __ro_after_init = {
 	.name = "WiMAX",
 	.version = WIMAX_GNL_VERSION,
 	.hdrsize = 0,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8e5ca3c47593..271707dacfea 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12586,7 +12586,7 @@ static const struct genl_ops nl80211_ops[] = {
 	},
 };
 
-static struct genl_family nl80211_fam = {
+static struct genl_family nl80211_fam __ro_after_init = {
 	.name = NL80211_GENL_NAME,	/* have users key off the name instead */
 	.hdrsize = 0,			/* no private header */
 	.version = 1,			/* no particular meaning now */
@@ -14563,7 +14563,7 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
 
 /* initialisation/exit functions */
 
-int nl80211_init(void)
+int __init nl80211_init(void)
 {
 	int err;
 
-- 
cgit v1.2.3


From 7f847dd31736f1284538e54f46cf10e63929eb7f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 20 Oct 2016 22:07:53 +0200
Subject: debugfs: improve DEFINE_DEBUGFS_ATTRIBUTE for !CONFIG_DEBUG_FS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The slp_s0_residency_usec debugfs file currently uses
DEFINE_DEBUGFS_ATTRIBUTE(), but that macro cannot really be used to
define files outside of the debugfs code, as it has no reference to
the get/set functions if CONFIG_DEBUG_FS is not defined:

drivers/platform/x86/intel_pmc_core.c:80:12: error: ‘pmc_core_dev_state_get’ defined but not used [-Werror=unused-function]

This fixes the macro to always contain the reference, and instead rely
on the stubbed-out debugfs_create_file to not actually refer to
its arguments so the compiler can still drop the reference.
This works because the attribute definition is always 'static',
and the dead-code removal silently drops all static symbols
that are not used.

Fixes: c64688081490 ("debugfs: add support for self-protecting attribute file fops")
Fixes: df2294fb6428 ("intel_pmc_core: Convert to DEFINE_DEBUGFS_ATTRIBUTE")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[nicstange@gmail.com: Add dummy implementations of debugfs_attr_read() and
  debugfs_attr_write() in order to protect against possibly broken dead
  code elimination and to improve readability.
  Correct CONFIG_DEBUGFS_FS -> CONFIG_DEBUG_FS typo in changelog.]
Signed-off-by: Nicolai Stange <nicstange@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/debugfs.h | 44 +++++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 4d3f0d1aec73..1b413a9aab81 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -62,6 +62,21 @@ static inline const struct file_operations *debugfs_real_fops(struct file *filp)
 	return filp->f_path.dentry->d_fsdata;
 }
 
+#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt)		\
+static int __fops ## _open(struct inode *inode, struct file *file)	\
+{									\
+	__simple_attr_check_format(__fmt, 0ull);			\
+	return simple_attr_open(inode, file, __get, __set, __fmt);	\
+}									\
+static const struct file_operations __fops = {				\
+	.owner	 = THIS_MODULE,						\
+	.open	 = __fops ## _open,					\
+	.release = simple_attr_release,					\
+	.read	 = debugfs_attr_read,					\
+	.write	 = debugfs_attr_write,					\
+	.llseek  = generic_file_llseek,					\
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 struct dentry *debugfs_create_file(const char *name, umode_t mode,
@@ -99,21 +114,6 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf,
 ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
 			size_t len, loff_t *ppos);
 
-#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt)		\
-static int __fops ## _open(struct inode *inode, struct file *file)	\
-{									\
-	__simple_attr_check_format(__fmt, 0ull);			\
-	return simple_attr_open(inode, file, __get, __set, __fmt);	\
-}									\
-static const struct file_operations __fops = {				\
-	.owner	 = THIS_MODULE,					\
-	.open	 = __fops ## _open,					\
-	.release = simple_attr_release,				\
-	.read	 = debugfs_attr_read,					\
-	.write	 = debugfs_attr_write,					\
-	.llseek  = generic_file_llseek,				\
-}
-
 struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
                 struct dentry *new_dir, const char *new_name);
 
@@ -233,8 +233,18 @@ static inline void debugfs_use_file_finish(int srcu_idx)
 	__releases(&debugfs_srcu)
 { }
 
-#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt)	\
-	static const struct file_operations __fops = { 0 }
+static inline ssize_t debugfs_attr_read(struct file *file, char __user *buf,
+					size_t len, loff_t *ppos)
+{
+	return -ENODEV;
+}
+
+static inline ssize_t debugfs_attr_write(struct file *file,
+					const char __user *buf,
+					size_t len, loff_t *ppos)
+{
+	return -ENODEV;
+}
 
 static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
                 struct dentry *new_dir, char *new_name)
-- 
cgit v1.2.3


From 5be149bdd36c8765c9e785f70b888d028ada40af Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 19 Oct 2016 16:33:27 +0300
Subject: mei: bus: add module_mei_cl_driver helper macro

Add module_mei_cl_driver helper macro for eliminating the boilerplate
code from mei_cl drivers registration. The macro is intended for
drivers which in their init/exit sections does only register/unregister
of a mei_cl driver.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mei_cl_bus.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index e746919530f5..e6fbd98ea90e 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -74,6 +74,19 @@ int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
 
 void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv);
 
+/**
+ * module_mei_cl_driver - Helper macro for registering mei cl driver
+ *
+ * @__mei_cldrv mei_cl_driver structure
+ *
+ *  Helper macro for mei cl drivers which do not do anything special in module
+ *  init/exit, for eliminating a boilerplate code.
+ */
+#define module_mei_cl_driver(__mei_cldrv) \
+	module_driver(__mei_cldrv, \
+		      mei_cldev_driver_register,\
+		      mei_cldev_driver_unregister)
+
 ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length);
 ssize_t  mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
 
-- 
cgit v1.2.3


From 1e4edb3fe93ff9f7b678a8f1b8d9df717edf6ad9 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 19 Oct 2016 16:33:31 +0300
Subject: mei: bus: remove rx callback context

The callback context is redunant as all the information can be
retrived from the device struture of its private data.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c     | 6 ++----
 drivers/nfc/mei_phy.c      | 5 ++---
 drivers/watchdog/mei_wdt.c | 6 ++----
 include/linux/mei_cl_bus.h | 6 ++----
 4 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 8cac7ef9ad0d..89a694ca624c 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -228,7 +228,7 @@ static void mei_cl_bus_event_work(struct work_struct *work)
 	bus = cldev->bus;
 
 	if (cldev->event_cb)
-		cldev->event_cb(cldev, cldev->events, cldev->event_context);
+		cldev->event_cb(cldev, cldev->events);
 
 	cldev->events = 0;
 
@@ -301,7 +301,6 @@ bool mei_cl_bus_rx_event(struct mei_cl *cl)
  * @cldev: me client devices
  * @event_cb: callback function
  * @events_mask: requested events bitmask
- * @context: driver context data
  *
  * Return: 0 on success
  *         -EALREADY if an callback is already registered
@@ -309,7 +308,7 @@ bool mei_cl_bus_rx_event(struct mei_cl *cl)
  */
 int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
 				unsigned long events_mask,
-				mei_cldev_event_cb_t event_cb, void *context)
+				mei_cldev_event_cb_t event_cb)
 {
 	struct mei_device *bus = cldev->bus;
 	int ret;
@@ -320,7 +319,6 @@ int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
 	cldev->events = 0;
 	cldev->events_mask = events_mask;
 	cldev->event_cb = event_cb;
-	cldev->event_context = context;
 	INIT_WORK(&cldev->event_work, mei_cl_bus_event_work);
 
 	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c
index 66dfd81ffb18..07b4239585fa 100644
--- a/drivers/nfc/mei_phy.c
+++ b/drivers/nfc/mei_phy.c
@@ -297,8 +297,7 @@ static int mei_nfc_recv(struct nfc_mei_phy *phy, u8 *buf, size_t length)
 }
 
 
-static void nfc_mei_event_cb(struct mei_cl_device *cldev, u32 events,
-			     void *context)
+static void nfc_mei_event_cb(struct mei_cl_device *cldev, u32 events)
 {
 	struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev);
 
@@ -360,7 +359,7 @@ static int nfc_mei_phy_enable(void *phy_id)
 	}
 
 	r = mei_cldev_register_event_cb(phy->cldev, BIT(MEI_CL_EVENT_RX),
-				     nfc_mei_event_cb, phy);
+					nfc_mei_event_cb);
 	if (r) {
 		pr_err("Event cb registration failed %d\n", r);
 		goto err;
diff --git a/drivers/watchdog/mei_wdt.c b/drivers/watchdog/mei_wdt.c
index 116be477c8fd..e0af52265511 100644
--- a/drivers/watchdog/mei_wdt.c
+++ b/drivers/watchdog/mei_wdt.c
@@ -501,10 +501,8 @@ static void mei_wdt_notify_event(struct mei_cl_device *cldev)
  *
  * @cldev: bus device
  * @events: event mask
- * @context: callback context
  */
-static void mei_wdt_event(struct mei_cl_device *cldev,
-			  u32 events, void *context)
+static void mei_wdt_event(struct mei_cl_device *cldev, u32 events)
 {
 	if (events & BIT(MEI_CL_EVENT_RX))
 		mei_wdt_event_rx(cldev);
@@ -626,7 +624,7 @@ static int mei_wdt_probe(struct mei_cl_device *cldev,
 	ret = mei_cldev_register_event_cb(wdt->cldev,
 					  BIT(MEI_CL_EVENT_RX) |
 					  BIT(MEI_CL_EVENT_NOTIF),
-					  mei_wdt_event, NULL);
+					  mei_wdt_event);
 
 	/* on legacy devices notification is not supported
 	 * this doesn't fail the registration for RX event
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index e6fbd98ea90e..4adb2e7c9f84 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -9,7 +9,7 @@ struct mei_cl_device;
 struct mei_device;
 
 typedef void (*mei_cldev_event_cb_t)(struct mei_cl_device *cldev,
-				     u32 events, void *context);
+				     u32 events);
 
 /**
  * struct mei_cl_device - MEI device handle
@@ -27,7 +27,6 @@ typedef void (*mei_cldev_event_cb_t)(struct mei_cl_device *cldev,
  * @event_work: async work to execute event callback
  * @event_cb: Drivers register this callback to get asynchronous ME
  *	events (e.g. Rx buffer pending) notifications.
- * @event_context: event callback run context
  * @events_mask: Events bit mask requested by driver.
  * @events: Events bitmask sent to the driver.
  *
@@ -46,7 +45,6 @@ struct mei_cl_device {
 
 	struct work_struct event_work;
 	mei_cldev_event_cb_t event_cb;
-	void *event_context;
 	unsigned long events_mask;
 	unsigned long events;
 
@@ -92,7 +90,7 @@ ssize_t  mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
 
 int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
 				unsigned long event_mask,
-				mei_cldev_event_cb_t read_cb, void *context);
+				mei_cldev_event_cb_t read_cb);
 
 #define MEI_CL_EVENT_RX 0
 #define MEI_CL_EVENT_TX 1
-- 
cgit v1.2.3


From f71082f815bf0b9599b97546cf1dd057c789c3c1 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sat, 22 Oct 2016 18:36:05 -0400
Subject: miscdevice: introduce builtin_misc_device

This is basically the same as module_misc_device but without the
presence of an exit/remove function.   Similar in nature to the
commit f309d4443130bf814e991f836e919dca22df37ae ("platform_device:
better support builtin boilerplate avoidance").

Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/miscdevice.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 722698a43d79..a426cb55dc43 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -71,6 +71,13 @@ struct miscdevice  {
 extern int misc_register(struct miscdevice *misc);
 extern void misc_deregister(struct miscdevice *misc);
 
+/*
+ * Helper macro for drivers that don't do anything special in the initcall.
+ * This helps in eleminating of boilerplate code.
+ */
+#define builtin_misc_device(__misc_device) \
+	builtin_driver(__misc_device, misc_register)
+
 /*
  * Helper macro for drivers that don't do anything special in module init / exit
  * call. This helps in eleminating of boilerplate code.
-- 
cgit v1.2.3


From c4aebd0332da831a3403faf2035af45059ab6b7c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:09 +0200
Subject: block: remove bio_is_rw

With the addition of the zoned operations the tests in this function
became incorrect.  But I think it's much better to just open code the
allow operations in the only caller anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c |  2 +-
 include/linux/bio.h   | 11 -----------
 2 files changed, 1 insertion(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 63f72f00c72e..5384713d48bc 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -172,7 +172,7 @@ bool bio_integrity_enabled(struct bio *bio)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	if (!bio_is_rw(bio))
+	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
 		return false;
 
 	/* Already protected? */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 97cb48f03dc7..87ce64dafb93 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -83,17 +83,6 @@ static inline bool bio_no_advance_iter(struct bio *bio)
 	       bio_op(bio) == REQ_OP_WRITE_SAME;
 }
 
-static inline bool bio_is_rw(struct bio *bio)
-{
-	if (!bio_has_data(bio))
-		return false;
-
-	if (bio_no_advance_iter(bio))
-		return false;
-
-	return true;
-}
-
 static inline bool bio_mergeable(struct bio *bio)
 {
 	if (bio->bi_opf & REQ_NOMERGE_FLAGS)
-- 
cgit v1.2.3


From bd1c1c21741cbd6e894960bcbc8b36f719590064 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:10 +0200
Subject: block: REQ_NOMERGE is common to the bio and request

So move it into the common setion of the request flags.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dd50dce89a80..b54142534793 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -158,6 +158,7 @@ enum rq_flag_bits {
 	__REQ_META,		/* metadata io request */
 	__REQ_PRIO,		/* boost priority in cfq */
 
+	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
@@ -171,7 +172,6 @@ enum rq_flag_bits {
 	/* request only flags */
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
-	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_STARTED,		/* drive already may have started this one */
 	__REQ_DONTPREP,		/* don't call prep for this one */
 	__REQ_QUEUED,		/* uses queueing */
-- 
cgit v1.2.3


From 188bd2b16b3c6ea87a90df20f33db0adcdb75f0c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:11 +0200
Subject: block: move REQ_RAHEAD to common flags

The information that am I/O is a read-ahead can be useful for drivers.
In fact the NVMe driver already checks it, even if it won't ever be set
at the moment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index b54142534793..44f9bca332e5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -163,9 +163,9 @@ enum rq_flag_bits {
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
+	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 
 	/* bio only flags */
-	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_THROTTLED,	/* This bio has already been subjected to
 				 * throttling rules. Don't do it again. */
 
@@ -205,7 +205,7 @@ enum rq_flag_bits {
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
 	(REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
-	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE)
+	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_RAHEAD)
 #define REQ_CLONE_MASK		REQ_COMMON_MASK
 
 /* This mask is used for both bio and request merge checking */
-- 
cgit v1.2.3


From 8d2bbd4c8236e9e38e6b36ac9e2c54fdcfe5b335 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:12 +0200
Subject: block: replace REQ_THROTTLED with a bio flag

It's the last bio-only REQ_* flag, and we have space for it in the bio
bi_flags field.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-throttle.c      | 10 +++++-----
 include/linux/blk_types.h |  8 ++------
 2 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a3ea8260c94c..a6bb4fe326c3 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -818,13 +818,13 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 	tg->io_disp[rw]++;
 
 	/*
-	 * REQ_THROTTLED is used to prevent the same bio to be throttled
+	 * BIO_THROTTLED is used to prevent the same bio to be throttled
 	 * more than once as a throttled bio will go through blk-throtl the
 	 * second time when it eventually gets issued.  Set it when a bio
 	 * is being charged to a tg.
 	 */
-	if (!(bio->bi_opf & REQ_THROTTLED))
-		bio->bi_opf |= REQ_THROTTLED;
+	if (!bio_flagged(bio, BIO_THROTTLED))
+		bio_set_flag(bio, BIO_THROTTLED);
 }
 
 /**
@@ -1401,7 +1401,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	/* see throtl_charge_bio() */
-	if ((bio->bi_opf & REQ_THROTTLED) || !tg->has_rules[rw])
+	if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw])
 		goto out;
 
 	spin_lock_irq(q->queue_lock);
@@ -1480,7 +1480,7 @@ out:
 	 * being issued.
 	 */
 	if (!throttled)
-		bio->bi_opf &= ~REQ_THROTTLED;
+		bio_clear_flag(bio, BIO_THROTTLED);
 	return throttled;
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 44f9bca332e5..6df722de2e22 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -119,6 +119,8 @@ struct bio {
 #define BIO_QUIET	6	/* Make BIO Quiet */
 #define BIO_CHAIN	7	/* chained bio, ->bi_remaining in effect */
 #define BIO_REFFED	8	/* bio has elevated ->bi_cnt */
+#define BIO_THROTTLED	9	/* This bio has already been subjected to
+				 * throttling rules. Don't do it again. */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
@@ -165,10 +167,6 @@ enum rq_flag_bits {
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 
-	/* bio only flags */
-	__REQ_THROTTLED,	/* This bio has already been subjected to
-				 * throttling rules. Don't do it again. */
-
 	/* request only flags */
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
@@ -213,8 +211,6 @@ enum rq_flag_bits {
 	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ)
 
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
-#define REQ_THROTTLED		(1ULL << __REQ_THROTTLED)
-
 #define REQ_SORTED		(1ULL << __REQ_SORTED)
 #define REQ_SOFTBARRIER		(1ULL << __REQ_SOFTBARRIER)
 #define REQ_FUA			(1ULL << __REQ_FUA)
-- 
cgit v1.2.3


From e806402130c9c494e22c73ae9ead4e79d2a5811c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:13 +0200
Subject: block: split out request-only flags into a new namespace

A lot of the REQ_* flags are only used on struct requests, and only of
use to the block layer and a few drivers that dig into struct request
internals.

This patch adds a new req_flags_t rq_flags field to struct request for
them, and thus dramatically shrinks the number of common requests.  It
also removes the unfortunate situation where we have to fit the fields
from the same enum into 32 bits for struct bio and 64 bits for
struct request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/biodoc.txt              |  2 +-
 block/blk-core.c                            | 71 ++++++++++++++-------------
 block/blk-exec.c                            |  2 +-
 block/blk-flush.c                           |  9 ++--
 block/blk-map.c                             |  4 +-
 block/blk-merge.c                           |  8 +--
 block/blk-mq.c                              | 19 ++++----
 block/blk-tag.c                             |  6 +--
 block/blk.h                                 |  4 +-
 block/elevator.c                            | 32 ++++++------
 drivers/block/pktcdvd.c                     |  2 +-
 drivers/ide/ide-atapi.c                     |  6 +--
 drivers/ide/ide-cd.c                        | 46 +++++++++---------
 drivers/ide/ide-cd.h                        |  2 +-
 drivers/ide/ide-cd_ioctl.c                  |  6 +--
 drivers/ide/ide-io.c                        |  6 +--
 drivers/ide/ide-pm.c                        |  4 +-
 drivers/md/dm-rq.c                          | 12 ++---
 drivers/memstick/core/ms_block.c            |  2 +-
 drivers/memstick/core/mspro_block.c         |  2 +-
 drivers/mmc/card/block.c                    |  4 +-
 drivers/mmc/card/queue.c                    |  4 +-
 drivers/nvme/host/pci.c                     |  4 +-
 drivers/scsi/device_handler/scsi_dh_alua.c  |  8 +--
 drivers/scsi/device_handler/scsi_dh_emc.c   |  2 +-
 drivers/scsi/device_handler/scsi_dh_hp_sw.c |  2 +-
 drivers/scsi/device_handler/scsi_dh_rdac.c  |  2 +-
 drivers/scsi/osd/osd_initiator.c            |  2 +-
 drivers/scsi/osst.c                         |  2 +-
 drivers/scsi/scsi_error.c                   |  2 +-
 drivers/scsi/scsi_lib.c                     | 75 +++++++++++++++++------------
 drivers/scsi/sd.c                           |  6 +--
 drivers/scsi/sd_zbc.c                       |  2 +-
 drivers/scsi/st.c                           |  2 +-
 drivers/scsi/ufs/ufshcd.c                   |  6 +--
 include/linux/blk_types.h                   | 39 +--------------
 include/linux/blkdev.h                      | 49 ++++++++++++++++++-
 include/scsi/scsi_device.h                  |  4 +-
 38 files changed, 242 insertions(+), 218 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 918e1e0d0e78..6acea160298c 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -348,7 +348,7 @@ Drivers can now specify a request prepare function (q->prep_rq_fn) that the
 block layer would invoke to pre-build device commands for a given request,
 or perform other preparatory processing for the request. This is routine is
 called by elv_next_request(), i.e. typically just before servicing a request.
-(The prepare function would not be called for requests that have REQ_DONTPREP
+(The prepare function would not be called for requests that have RQF_DONTPREP
 enabled)
 
 Aside:
diff --git a/block/blk-core.c b/block/blk-core.c
index e4eda5d2aa56..fd416651a676 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -145,13 +145,13 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 	if (error)
 		bio->bi_error = error;
 
-	if (unlikely(rq->cmd_flags & REQ_QUIET))
+	if (unlikely(rq->rq_flags & RQF_QUIET))
 		bio_set_flag(bio, BIO_QUIET);
 
 	bio_advance(bio, nbytes);
 
 	/* don't actually finish bio if it's part of flush sequence */
-	if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
 		bio_endio(bio);
 }
 
@@ -899,7 +899,7 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(struct request_list *rl, struct request *rq)
 {
-	if (rq->cmd_flags & REQ_ELVPRIV) {
+	if (rq->rq_flags & RQF_ELVPRIV) {
 		elv_put_request(rl->q, rq);
 		if (rq->elv.icq)
 			put_io_context(rq->elv.icq->ioc);
@@ -961,14 +961,14 @@ static void __freed_request(struct request_list *rl, int sync)
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
-static void freed_request(struct request_list *rl, int op, unsigned int flags)
+static void freed_request(struct request_list *rl, bool sync,
+		req_flags_t rq_flags)
 {
 	struct request_queue *q = rl->q;
-	int sync = rw_is_sync(op, flags);
 
 	q->nr_rqs[sync]--;
 	rl->count[sync]--;
-	if (flags & REQ_ELVPRIV)
+	if (rq_flags & RQF_ELVPRIV)
 		q->nr_rqs_elvpriv--;
 
 	__freed_request(rl, sync);
@@ -1079,6 +1079,7 @@ static struct request *__get_request(struct request_list *rl, int op,
 	struct io_cq *icq = NULL;
 	const bool is_sync = rw_is_sync(op, op_flags) != 0;
 	int may_queue;
+	req_flags_t rq_flags = RQF_ALLOCED;
 
 	if (unlikely(blk_queue_dying(q)))
 		return ERR_PTR(-ENODEV);
@@ -1127,7 +1128,7 @@ static struct request *__get_request(struct request_list *rl, int op,
 
 	/*
 	 * Decide whether the new request will be managed by elevator.  If
-	 * so, mark @op_flags and increment elvpriv.  Non-zero elvpriv will
+	 * so, mark @rq_flags and increment elvpriv.  Non-zero elvpriv will
 	 * prevent the current elevator from being destroyed until the new
 	 * request is freed.  This guarantees icq's won't be destroyed and
 	 * makes creating new ones safe.
@@ -1136,14 +1137,14 @@ static struct request *__get_request(struct request_list *rl, int op,
 	 * it will be created after releasing queue_lock.
 	 */
 	if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
-		op_flags |= REQ_ELVPRIV;
+		rq_flags |= RQF_ELVPRIV;
 		q->nr_rqs_elvpriv++;
 		if (et->icq_cache && ioc)
 			icq = ioc_lookup_icq(ioc, q);
 	}
 
 	if (blk_queue_io_stat(q))
-		op_flags |= REQ_IO_STAT;
+		rq_flags |= RQF_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 
 	/* allocate and init request */
@@ -1153,10 +1154,11 @@ static struct request *__get_request(struct request_list *rl, int op,
 
 	blk_rq_init(q, rq);
 	blk_rq_set_rl(rq, rl);
-	req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED);
+	req_set_op_attrs(rq, op, op_flags);
+	rq->rq_flags = rq_flags;
 
 	/* init elvpriv */
-	if (op_flags & REQ_ELVPRIV) {
+	if (rq_flags & RQF_ELVPRIV) {
 		if (unlikely(et->icq_cache && !icq)) {
 			if (ioc)
 				icq = ioc_create_icq(ioc, q, gfp_mask);
@@ -1195,7 +1197,7 @@ fail_elvpriv:
 	printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n",
 			   __func__, dev_name(q->backing_dev_info.dev));
 
-	rq->cmd_flags &= ~REQ_ELVPRIV;
+	rq->rq_flags &= ~RQF_ELVPRIV;
 	rq->elv.icq = NULL;
 
 	spin_lock_irq(q->queue_lock);
@@ -1212,7 +1214,7 @@ fail_alloc:
 	 * queue, but this is pretty rare.
 	 */
 	spin_lock_irq(q->queue_lock);
-	freed_request(rl, op, op_flags);
+	freed_request(rl, is_sync, rq_flags);
 
 	/*
 	 * in the very unlikely event that allocation failed and no
@@ -1347,7 +1349,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
 
-	if (rq->cmd_flags & REQ_QUEUED)
+	if (rq->rq_flags & RQF_QUEUED)
 		blk_queue_end_tag(q, rq);
 
 	BUG_ON(blk_queued_rq(rq));
@@ -1409,7 +1411,7 @@ EXPORT_SYMBOL_GPL(part_round_stats);
 #ifdef CONFIG_PM
 static void blk_pm_put_request(struct request *rq)
 {
-	if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending)
+	if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending)
 		pm_runtime_mark_last_busy(rq->q->dev);
 }
 #else
@@ -1421,6 +1423,8 @@ static inline void blk_pm_put_request(struct request *rq) {}
  */
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
+	req_flags_t rq_flags = req->rq_flags;
+
 	if (unlikely(!q))
 		return;
 
@@ -1440,16 +1444,15 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
 	 */
-	if (req->cmd_flags & REQ_ALLOCED) {
-		unsigned int flags = req->cmd_flags;
-		int op = req_op(req);
+	if (rq_flags & RQF_ALLOCED) {
 		struct request_list *rl = blk_rq_rl(req);
+		bool sync = rw_is_sync(req_op(req), req->cmd_flags);
 
 		BUG_ON(!list_empty(&req->queuelist));
 		BUG_ON(ELV_ON_HASH(req));
 
 		blk_free_request(rl, req);
-		freed_request(rl, op, flags);
+		freed_request(rl, sync, rq_flags);
 		blk_put_rl(rl);
 	}
 }
@@ -2214,7 +2217,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
 	unsigned int bytes = 0;
 	struct bio *bio;
 
-	if (!(rq->cmd_flags & REQ_MIXED_MERGE))
+	if (!(rq->rq_flags & RQF_MIXED_MERGE))
 		return blk_rq_bytes(rq);
 
 	/*
@@ -2257,7 +2260,7 @@ void blk_account_io_done(struct request *req)
 	 * normal IO on queueing nor completion.  Accounting the
 	 * containing request is enough.
 	 */
-	if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
+	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
@@ -2285,7 +2288,7 @@ static struct request *blk_pm_peek_request(struct request_queue *q,
 					   struct request *rq)
 {
 	if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
-	    (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM))))
+	    (q->rpm_status != RPM_ACTIVE && !(rq->rq_flags & RQF_PM))))
 		return NULL;
 	else
 		return rq;
@@ -2361,13 +2364,13 @@ struct request *blk_peek_request(struct request_queue *q)
 		if (!rq)
 			break;
 
-		if (!(rq->cmd_flags & REQ_STARTED)) {
+		if (!(rq->rq_flags & RQF_STARTED)) {
 			/*
 			 * This is the first time the device driver
 			 * sees this request (possibly after
 			 * requeueing).  Notify IO scheduler.
 			 */
-			if (rq->cmd_flags & REQ_SORTED)
+			if (rq->rq_flags & RQF_SORTED)
 				elv_activate_rq(q, rq);
 
 			/*
@@ -2375,7 +2378,7 @@ struct request *blk_peek_request(struct request_queue *q)
 			 * it, a request that has been delayed should
 			 * not be passed by new incoming requests
 			 */
-			rq->cmd_flags |= REQ_STARTED;
+			rq->rq_flags |= RQF_STARTED;
 			trace_block_rq_issue(q, rq);
 		}
 
@@ -2384,7 +2387,7 @@ struct request *blk_peek_request(struct request_queue *q)
 			q->boundary_rq = NULL;
 		}
 
-		if (rq->cmd_flags & REQ_DONTPREP)
+		if (rq->rq_flags & RQF_DONTPREP)
 			break;
 
 		if (q->dma_drain_size && blk_rq_bytes(rq)) {
@@ -2407,11 +2410,11 @@ struct request *blk_peek_request(struct request_queue *q)
 			/*
 			 * the request may have been (partially) prepped.
 			 * we need to keep this request in the front to
-			 * avoid resource deadlock.  REQ_STARTED will
+			 * avoid resource deadlock.  RQF_STARTED will
 			 * prevent other fs requests from passing this one.
 			 */
 			if (q->dma_drain_size && blk_rq_bytes(rq) &&
-			    !(rq->cmd_flags & REQ_DONTPREP)) {
+			    !(rq->rq_flags & RQF_DONTPREP)) {
 				/*
 				 * remove the space for the drain we added
 				 * so that we don't add it again
@@ -2424,7 +2427,7 @@ struct request *blk_peek_request(struct request_queue *q)
 		} else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
 			int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
 
-			rq->cmd_flags |= REQ_QUIET;
+			rq->rq_flags |= RQF_QUIET;
 			/*
 			 * Mark this request as started so we don't trigger
 			 * any debug logic in the end I/O path.
@@ -2561,7 +2564,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		req->errors = 0;
 
 	if (error && req->cmd_type == REQ_TYPE_FS &&
-	    !(req->cmd_flags & REQ_QUIET)) {
+	    !(req->rq_flags & RQF_QUIET)) {
 		char *error_type;
 
 		switch (error) {
@@ -2634,7 +2637,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		req->__sector += total_bytes >> 9;
 
 	/* mixed attributes always follow the first bio */
-	if (req->cmd_flags & REQ_MIXED_MERGE) {
+	if (req->rq_flags & RQF_MIXED_MERGE) {
 		req->cmd_flags &= ~REQ_FAILFAST_MASK;
 		req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
 	}
@@ -2687,7 +2690,7 @@ void blk_unprep_request(struct request *req)
 {
 	struct request_queue *q = req->q;
 
-	req->cmd_flags &= ~REQ_DONTPREP;
+	req->rq_flags &= ~RQF_DONTPREP;
 	if (q->unprep_rq_fn)
 		q->unprep_rq_fn(q, req);
 }
@@ -2698,7 +2701,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
  */
 void blk_finish_request(struct request *req, int error)
 {
-	if (req->cmd_flags & REQ_QUEUED)
+	if (req->rq_flags & RQF_QUEUED)
 		blk_queue_end_tag(req->q, req);
 
 	BUG_ON(blk_queued_rq(req));
@@ -2708,7 +2711,7 @@ void blk_finish_request(struct request *req, int error)
 
 	blk_delete_timer(req);
 
-	if (req->cmd_flags & REQ_DONTPREP)
+	if (req->rq_flags & RQF_DONTPREP)
 		blk_unprep_request(req);
 
 	blk_account_io_done(req);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 7ea04325d02f..3ecb00a6cf45 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -72,7 +72,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	spin_lock_irq(q->queue_lock);
 
 	if (unlikely(blk_queue_dying(q))) {
-		rq->cmd_flags |= REQ_QUIET; 
+		rq->rq_flags |= RQF_QUIET;
 		rq->errors = -ENXIO;
 		__blk_end_request_all(rq, rq->errors);
 		spin_unlock_irq(q->queue_lock);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6a14b68b9135..3990b9cfbda5 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -56,7 +56,7 @@
  * Once while executing DATA and again after the whole sequence is
  * complete.  The first completion updates the contained bio but doesn't
  * finish it so that the bio submitter is notified only after the whole
- * sequence is complete.  This is implemented by testing REQ_FLUSH_SEQ in
+ * sequence is complete.  This is implemented by testing RQF_FLUSH_SEQ in
  * req_bio_endio().
  *
  * The above peculiarity requires that each FLUSH/FUA request has only one
@@ -127,7 +127,7 @@ static void blk_flush_restore_request(struct request *rq)
 	rq->bio = rq->biotail;
 
 	/* make @rq a normal request */
-	rq->cmd_flags &= ~REQ_FLUSH_SEQ;
+	rq->rq_flags &= ~RQF_FLUSH_SEQ;
 	rq->end_io = rq->flush.saved_end_io;
 }
 
@@ -330,7 +330,8 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	}
 
 	flush_rq->cmd_type = REQ_TYPE_FS;
-	req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH | REQ_FLUSH_SEQ);
+	req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH);
+	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
 	flush_rq->rq_disk = first_rq->rq_disk;
 	flush_rq->end_io = flush_end_io;
 
@@ -433,7 +434,7 @@ void blk_insert_flush(struct request *rq)
 	 */
 	memset(&rq->flush, 0, sizeof(rq->flush));
 	INIT_LIST_HEAD(&rq->flush.list);
-	rq->cmd_flags |= REQ_FLUSH_SEQ;
+	rq->rq_flags |= RQF_FLUSH_SEQ;
 	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
 	if (q->mq_ops) {
 		rq->end_io = mq_flush_data_end_io;
diff --git a/block/blk-map.c b/block/blk-map.c
index b8657fa8dc9a..2c5ae5fef473 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -135,7 +135,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 	} while (iov_iter_count(&i));
 
 	if (!bio_flagged(bio, BIO_USER_MAPPED))
-		rq->cmd_flags |= REQ_COPY_USER;
+		rq->rq_flags |= RQF_COPY_USER;
 	return 0;
 
 unmap_rq:
@@ -232,7 +232,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 	if (do_copy)
-		rq->cmd_flags |= REQ_COPY_USER;
+		rq->rq_flags |= RQF_COPY_USER;
 
 	ret = blk_rq_append_bio(rq, bio);
 	if (unlikely(ret)) {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 2642e5fc8b69..fda6a12fc776 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -456,7 +456,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 	if (rq->bio)
 		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
 
-	if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
+	if (unlikely(rq->rq_flags & RQF_COPY_USER) &&
 	    (blk_rq_bytes(rq) & q->dma_pad_mask)) {
 		unsigned int pad_len =
 			(q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
@@ -634,7 +634,7 @@ void blk_rq_set_mixed_merge(struct request *rq)
 	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
 	struct bio *bio;
 
-	if (rq->cmd_flags & REQ_MIXED_MERGE)
+	if (rq->rq_flags & RQF_MIXED_MERGE)
 		return;
 
 	/*
@@ -647,7 +647,7 @@ void blk_rq_set_mixed_merge(struct request *rq)
 			     (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
 		bio->bi_opf |= ff;
 	}
-	rq->cmd_flags |= REQ_MIXED_MERGE;
+	rq->rq_flags |= RQF_MIXED_MERGE;
 }
 
 static void blk_account_io_merge(struct request *req)
@@ -709,7 +709,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	 * makes sure that all involved bios have mixable attributes
 	 * set properly.
 	 */
-	if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
+	if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
 	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
 	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
 		blk_rq_set_mixed_merge(req);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d74a74a9f9ef..b49c6658eb05 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -142,14 +142,13 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 			       struct request *rq, int op,
 			       unsigned int op_flags)
 {
-	if (blk_queue_io_stat(q))
-		op_flags |= REQ_IO_STAT;
-
 	INIT_LIST_HEAD(&rq->queuelist);
 	/* csd/requeue_work/fifo_time is initialized before use */
 	rq->q = q;
 	rq->mq_ctx = ctx;
 	req_set_op_attrs(rq, op, op_flags);
+	if (blk_queue_io_stat(q))
+		rq->rq_flags |= RQF_IO_STAT;
 	/* do not touch atomic flags, it needs atomic ops against the timer */
 	rq->cpu = -1;
 	INIT_HLIST_NODE(&rq->hash);
@@ -198,7 +197,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
 		rq = data->hctx->tags->rqs[tag];
 
 		if (blk_mq_tag_busy(data->hctx)) {
-			rq->cmd_flags = REQ_MQ_INFLIGHT;
+			rq->rq_flags = RQF_MQ_INFLIGHT;
 			atomic_inc(&data->hctx->nr_active);
 		}
 
@@ -298,9 +297,9 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 	const int tag = rq->tag;
 	struct request_queue *q = rq->q;
 
-	if (rq->cmd_flags & REQ_MQ_INFLIGHT)
+	if (rq->rq_flags & RQF_MQ_INFLIGHT)
 		atomic_dec(&hctx->nr_active);
-	rq->cmd_flags = 0;
+	rq->rq_flags = 0;
 
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 	blk_mq_put_tag(hctx, ctx, tag);
@@ -489,10 +488,10 @@ static void blk_mq_requeue_work(struct work_struct *work)
 	spin_unlock_irqrestore(&q->requeue_lock, flags);
 
 	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
-		if (!(rq->cmd_flags & REQ_SOFTBARRIER))
+		if (!(rq->rq_flags & RQF_SOFTBARRIER))
 			continue;
 
-		rq->cmd_flags &= ~REQ_SOFTBARRIER;
+		rq->rq_flags &= ~RQF_SOFTBARRIER;
 		list_del_init(&rq->queuelist);
 		blk_mq_insert_request(rq, true, false, false);
 	}
@@ -519,11 +518,11 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
 	 * We abuse this flag that is otherwise used by the I/O scheduler to
 	 * request head insertation from the workqueue.
 	 */
-	BUG_ON(rq->cmd_flags & REQ_SOFTBARRIER);
+	BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
 
 	spin_lock_irqsave(&q->requeue_lock, flags);
 	if (at_head) {
-		rq->cmd_flags |= REQ_SOFTBARRIER;
+		rq->rq_flags |= RQF_SOFTBARRIER;
 		list_add(&rq->queuelist, &q->requeue_list);
 	} else {
 		list_add_tail(&rq->queuelist, &q->requeue_list);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index f0344e6939d5..bae1decb6ec3 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -270,7 +270,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
 	BUG_ON(tag >= bqt->real_max_depth);
 
 	list_del_init(&rq->queuelist);
-	rq->cmd_flags &= ~REQ_QUEUED;
+	rq->rq_flags &= ~RQF_QUEUED;
 	rq->tag = -1;
 
 	if (unlikely(bqt->tag_index[tag] == NULL))
@@ -316,7 +316,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	unsigned max_depth;
 	int tag;
 
-	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
+	if (unlikely((rq->rq_flags & RQF_QUEUED))) {
 		printk(KERN_ERR
 		       "%s: request %p for device [%s] already tagged %d",
 		       __func__, rq,
@@ -371,7 +371,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	 */
 
 	bqt->next_tag = (tag + 1) % bqt->max_depth;
-	rq->cmd_flags |= REQ_QUEUED;
+	rq->rq_flags |= RQF_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
 	blk_start_request(rq);
diff --git a/block/blk.h b/block/blk.h
index 74444c49078f..aa132dea598c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -130,7 +130,7 @@ static inline void blk_clear_rq_complete(struct request *rq)
 /*
  * Internal elevator interface
  */
-#define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED)
+#define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)
 
 void blk_insert_flush(struct request *rq);
 
@@ -247,7 +247,7 @@ extern int blk_update_nr_requests(struct request_queue *, unsigned int);
 static inline int blk_do_io_stat(struct request *rq)
 {
 	return rq->rq_disk &&
-	       (rq->cmd_flags & REQ_IO_STAT) &&
+	       (rq->rq_flags & RQF_IO_STAT) &&
 		(rq->cmd_type == REQ_TYPE_FS);
 }
 
diff --git a/block/elevator.c b/block/elevator.c
index f7d973a56fd7..ac80f89a0842 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -245,7 +245,7 @@ EXPORT_SYMBOL(elevator_exit);
 static inline void __elv_rqhash_del(struct request *rq)
 {
 	hash_del(&rq->hash);
-	rq->cmd_flags &= ~REQ_HASHED;
+	rq->rq_flags &= ~RQF_HASHED;
 }
 
 static void elv_rqhash_del(struct request_queue *q, struct request *rq)
@@ -260,7 +260,7 @@ static void elv_rqhash_add(struct request_queue *q, struct request *rq)
 
 	BUG_ON(ELV_ON_HASH(rq));
 	hash_add(e->hash, &rq->hash, rq_hash_key(rq));
-	rq->cmd_flags |= REQ_HASHED;
+	rq->rq_flags |= RQF_HASHED;
 }
 
 static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
@@ -352,7 +352,6 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 {
 	sector_t boundary;
 	struct list_head *entry;
-	int stop_flags;
 
 	if (q->last_merge == rq)
 		q->last_merge = NULL;
@@ -362,7 +361,6 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 	q->nr_sorted--;
 
 	boundary = q->end_sector;
-	stop_flags = REQ_SOFTBARRIER | REQ_STARTED;
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 
@@ -370,7 +368,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 			break;
 		if (rq_data_dir(rq) != rq_data_dir(pos))
 			break;
-		if (pos->cmd_flags & stop_flags)
+		if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
 			break;
 		if (blk_rq_pos(rq) >= boundary) {
 			if (blk_rq_pos(pos) < boundary)
@@ -510,7 +508,7 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
 			     struct request *next)
 {
 	struct elevator_queue *e = q->elevator;
-	const int next_sorted = next->cmd_flags & REQ_SORTED;
+	const int next_sorted = next->rq_flags & RQF_SORTED;
 
 	if (next_sorted && e->type->ops.elevator_merge_req_fn)
 		e->type->ops.elevator_merge_req_fn(q, rq, next);
@@ -537,13 +535,13 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
 #ifdef CONFIG_PM
 static void blk_pm_requeue_request(struct request *rq)
 {
-	if (rq->q->dev && !(rq->cmd_flags & REQ_PM))
+	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
 		rq->q->nr_pending--;
 }
 
 static void blk_pm_add_request(struct request_queue *q, struct request *rq)
 {
-	if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 &&
+	if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
 	    (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
 		pm_request_resume(q->dev);
 }
@@ -563,11 +561,11 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
 	 */
 	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]--;
-		if (rq->cmd_flags & REQ_SORTED)
+		if (rq->rq_flags & RQF_SORTED)
 			elv_deactivate_rq(q, rq);
 	}
 
-	rq->cmd_flags &= ~REQ_STARTED;
+	rq->rq_flags &= ~RQF_STARTED;
 
 	blk_pm_requeue_request(rq);
 
@@ -597,13 +595,13 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 
 	rq->q = q;
 
-	if (rq->cmd_flags & REQ_SOFTBARRIER) {
+	if (rq->rq_flags & RQF_SOFTBARRIER) {
 		/* barriers are scheduling boundary, update end_sector */
 		if (rq->cmd_type == REQ_TYPE_FS) {
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = rq;
 		}
-	} else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
+	} else if (!(rq->rq_flags & RQF_ELVPRIV) &&
 		    (where == ELEVATOR_INSERT_SORT ||
 		     where == ELEVATOR_INSERT_SORT_MERGE))
 		where = ELEVATOR_INSERT_BACK;
@@ -611,12 +609,12 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 	switch (where) {
 	case ELEVATOR_INSERT_REQUEUE:
 	case ELEVATOR_INSERT_FRONT:
-		rq->cmd_flags |= REQ_SOFTBARRIER;
+		rq->rq_flags |= RQF_SOFTBARRIER;
 		list_add(&rq->queuelist, &q->queue_head);
 		break;
 
 	case ELEVATOR_INSERT_BACK:
-		rq->cmd_flags |= REQ_SOFTBARRIER;
+		rq->rq_flags |= RQF_SOFTBARRIER;
 		elv_drain_elevator(q);
 		list_add_tail(&rq->queuelist, &q->queue_head);
 		/*
@@ -642,7 +640,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 			break;
 	case ELEVATOR_INSERT_SORT:
 		BUG_ON(rq->cmd_type != REQ_TYPE_FS);
-		rq->cmd_flags |= REQ_SORTED;
+		rq->rq_flags |= RQF_SORTED;
 		q->nr_sorted++;
 		if (rq_mergeable(rq)) {
 			elv_rqhash_add(q, rq);
@@ -659,7 +657,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		break;
 
 	case ELEVATOR_INSERT_FLUSH:
-		rq->cmd_flags |= REQ_SOFTBARRIER;
+		rq->rq_flags |= RQF_SOFTBARRIER;
 		blk_insert_flush(rq);
 		break;
 	default:
@@ -735,7 +733,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 	 */
 	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]--;
-		if ((rq->cmd_flags & REQ_SORTED) &&
+		if ((rq->rq_flags & RQF_SORTED) &&
 		    e->type->ops.elevator_completed_req_fn)
 			e->type->ops.elevator_completed_req_fn(q, rq);
 	}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 90fa4ac149db..7cf795e0fc8d 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -721,7 +721,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 
 	rq->timeout = 60*HZ;
 	if (cgc->quiet)
-		rq->cmd_flags |= REQ_QUIET;
+		rq->rq_flags |= RQF_QUIET;
 
 	blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
 	if (rq->errors)
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 05352f490d60..f90ea221f7f2 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -211,7 +211,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	sense_rq->cmd[4] = cmd_len;
 	sense_rq->cmd_type = REQ_TYPE_ATA_SENSE;
-	sense_rq->cmd_flags |= REQ_PREEMPT;
+	sense_rq->rq_flags |= RQF_PREEMPT;
 
 	if (drive->media == ide_tape)
 		sense_rq->cmd[13] = REQ_IDETAPE_PC1;
@@ -295,7 +295,7 @@ int ide_cd_expiry(ide_drive_t *drive)
 		wait = ATAPI_WAIT_PC;
 		break;
 	default:
-		if (!(rq->cmd_flags & REQ_QUIET))
+		if (!(rq->rq_flags & RQF_QUIET))
 			printk(KERN_INFO PFX "cmd 0x%x timed out\n",
 					 rq->cmd[0]);
 		wait = 0;
@@ -375,7 +375,7 @@ int ide_check_ireason(ide_drive_t *drive, struct request *rq, int len,
 	}
 
 	if (dev_is_idecd(drive) && rq->cmd_type == REQ_TYPE_ATA_PC)
-		rq->cmd_flags |= REQ_FAILED;
+		rq->rq_flags |= RQF_FAILED;
 
 	return 1;
 }
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index bf9a2ad296ed..9cbd217bc0c9 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -98,7 +98,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq)
 	struct request_sense *sense = &drive->sense_data;
 	int log = 0;
 
-	if (!sense || !rq || (rq->cmd_flags & REQ_QUIET))
+	if (!sense || !rq || (rq->rq_flags & RQF_QUIET))
 		return 0;
 
 	ide_debug_log(IDE_DBG_SENSE, "sense_key: 0x%x", sense->sense_key);
@@ -291,7 +291,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 		 * (probably while trying to recover from a former error).
 		 * Just give up.
 		 */
-		rq->cmd_flags |= REQ_FAILED;
+		rq->rq_flags |= RQF_FAILED;
 		return 2;
 	}
 
@@ -311,7 +311,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 			cdrom_saw_media_change(drive);
 
 			if (rq->cmd_type == REQ_TYPE_FS &&
-			    !(rq->cmd_flags & REQ_QUIET))
+			    !(rq->rq_flags & RQF_QUIET))
 				printk(KERN_ERR PFX "%s: tray open\n",
 					drive->name);
 		}
@@ -346,7 +346,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 		 * No point in retrying after an illegal request or data
 		 * protect error.
 		 */
-		if (!(rq->cmd_flags & REQ_QUIET))
+		if (!(rq->rq_flags & RQF_QUIET))
 			ide_dump_status(drive, "command error", stat);
 		do_end_request = 1;
 		break;
@@ -355,14 +355,14 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 		 * No point in re-trying a zillion times on a bad sector.
 		 * If we got here the error is not correctable.
 		 */
-		if (!(rq->cmd_flags & REQ_QUIET))
+		if (!(rq->rq_flags & RQF_QUIET))
 			ide_dump_status(drive, "media error "
 					"(bad sector)", stat);
 		do_end_request = 1;
 		break;
 	case BLANK_CHECK:
 		/* disk appears blank? */
-		if (!(rq->cmd_flags & REQ_QUIET))
+		if (!(rq->rq_flags & RQF_QUIET))
 			ide_dump_status(drive, "media error (blank)",
 					stat);
 		do_end_request = 1;
@@ -380,7 +380,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 	}
 
 	if (rq->cmd_type != REQ_TYPE_FS) {
-		rq->cmd_flags |= REQ_FAILED;
+		rq->rq_flags |= RQF_FAILED;
 		do_end_request = 1;
 	}
 
@@ -422,19 +422,19 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
 int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		    int write, void *buffer, unsigned *bufflen,
 		    struct request_sense *sense, int timeout,
-		    unsigned int cmd_flags)
+		    req_flags_t rq_flags)
 {
 	struct cdrom_info *info = drive->driver_data;
 	struct request_sense local_sense;
 	int retries = 10;
-	unsigned int flags = 0;
+	req_flags_t flags = 0;
 
 	if (!sense)
 		sense = &local_sense;
 
 	ide_debug_log(IDE_DBG_PC, "cmd[0]: 0x%x, write: 0x%x, timeout: %d, "
-				  "cmd_flags: 0x%x",
-				  cmd[0], write, timeout, cmd_flags);
+				  "rq_flags: 0x%x",
+				  cmd[0], write, timeout, rq_flags);
 
 	/* start of retry loop */
 	do {
@@ -446,7 +446,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		memcpy(rq->cmd, cmd, BLK_MAX_CDB);
 		rq->cmd_type = REQ_TYPE_ATA_PC;
 		rq->sense = sense;
-		rq->cmd_flags |= cmd_flags;
+		rq->rq_flags |= rq_flags;
 		rq->timeout = timeout;
 		if (buffer) {
 			error = blk_rq_map_kern(drive->queue, rq, buffer,
@@ -462,14 +462,14 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		if (buffer)
 			*bufflen = rq->resid_len;
 
-		flags = rq->cmd_flags;
+		flags = rq->rq_flags;
 		blk_put_request(rq);
 
 		/*
 		 * FIXME: we should probably abort/retry or something in case of
 		 * failure.
 		 */
-		if (flags & REQ_FAILED) {
+		if (flags & RQF_FAILED) {
 			/*
 			 * The request failed.  Retry if it was due to a unit
 			 * attention status (usually means media was changed).
@@ -494,10 +494,10 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		}
 
 		/* end of retry loop */
-	} while ((flags & REQ_FAILED) && retries >= 0);
+	} while ((flags & RQF_FAILED) && retries >= 0);
 
 	/* return an error if the command failed */
-	return (flags & REQ_FAILED) ? -EIO : 0;
+	return (flags & RQF_FAILED) ? -EIO : 0;
 }
 
 /*
@@ -589,7 +589,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 					"(%u bytes)\n", drive->name, __func__,
 					cmd->nleft);
 				if (!write)
-					rq->cmd_flags |= REQ_FAILED;
+					rq->rq_flags |= RQF_FAILED;
 				uptodate = 0;
 			}
 		} else if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
@@ -607,7 +607,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 			}
 
 			if (!uptodate)
-				rq->cmd_flags |= REQ_FAILED;
+				rq->rq_flags |= RQF_FAILED;
 		}
 		goto out_end;
 	}
@@ -745,9 +745,9 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 				  rq->cmd[0], rq->cmd_type);
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
-		rq->cmd_flags |= REQ_QUIET;
+		rq->rq_flags |= RQF_QUIET;
 	else
-		rq->cmd_flags &= ~REQ_FAILED;
+		rq->rq_flags &= ~RQF_FAILED;
 
 	drive->dma = 0;
 
@@ -867,7 +867,7 @@ int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
 	 */
 	cmd[7] = cdi->sanyo_slot % 3;
 
-	return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sense, 0, REQ_QUIET);
+	return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sense, 0, RQF_QUIET);
 }
 
 static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
@@ -890,7 +890,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 	cmd[0] = GPCMD_READ_CDVD_CAPACITY;
 
 	stat = ide_cd_queue_pc(drive, cmd, 0, &capbuf, &len, sense, 0,
-			       REQ_QUIET);
+			       RQF_QUIET);
 	if (stat)
 		return stat;
 
@@ -943,7 +943,7 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
 	if (msf_flag)
 		cmd[1] = 2;
 
-	return ide_cd_queue_pc(drive, cmd, 0, buf, &buflen, sense, 0, REQ_QUIET);
+	return ide_cd_queue_pc(drive, cmd, 0, buf, &buflen, sense, 0, RQF_QUIET);
 }
 
 /* Try to read the entire TOC for the disk into our internal buffer. */
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 1efc936f5b66..eea60c986c4f 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -101,7 +101,7 @@ void ide_cd_log_error(const char *, struct request *, struct request_sense *);
 
 /* ide-cd.c functions used by ide-cd_ioctl.c */
 int ide_cd_queue_pc(ide_drive_t *, const unsigned char *, int, void *,
-		    unsigned *, struct request_sense *, int, unsigned int);
+		    unsigned *, struct request_sense *, int, req_flags_t);
 int ide_cd_read_toc(ide_drive_t *, struct request_sense *);
 int ide_cdrom_get_capabilities(ide_drive_t *, u8 *);
 void ide_cdrom_update_speed(ide_drive_t *, u8 *);
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 5887a7a09e37..f085e3a2e1d6 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -305,7 +305,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
 
 	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
-	rq->cmd_flags = REQ_QUIET;
+	rq->rq_flags = RQF_QUIET;
 	ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
 	blk_put_request(rq);
 	/*
@@ -449,7 +449,7 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
 			    struct packet_command *cgc)
 {
 	ide_drive_t *drive = cdi->handle;
-	unsigned int flags = 0;
+	req_flags_t flags = 0;
 	unsigned len = cgc->buflen;
 
 	if (cgc->timeout <= 0)
@@ -463,7 +463,7 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
 		memset(cgc->sense, 0, sizeof(struct request_sense));
 
 	if (cgc->quiet)
-		flags |= REQ_QUIET;
+		flags |= RQF_QUIET;
 
 	cgc->stat = ide_cd_queue_pc(drive, cgc->cmd,
 				    cgc->data_direction == CGC_DATA_WRITE,
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 669ea1e45795..6360bbd37efe 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -307,7 +307,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 {
 	ide_startstop_t startstop;
 
-	BUG_ON(!(rq->cmd_flags & REQ_STARTED));
+	BUG_ON(!(rq->rq_flags & RQF_STARTED));
 
 #ifdef DEBUG
 	printk("%s: start_request: current=0x%08lx\n",
@@ -316,7 +316,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 
 	/* bail early if we've exceeded max_failures */
 	if (drive->max_failures && (drive->failures > drive->max_failures)) {
-		rq->cmd_flags |= REQ_FAILED;
+		rq->rq_flags |= RQF_FAILED;
 		goto kill_rq;
 	}
 
@@ -539,7 +539,7 @@ repeat:
 		 */
 		if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
 		    ata_pm_request(rq) == 0 &&
-		    (rq->cmd_flags & REQ_PREEMPT) == 0) {
+		    (rq->rq_flags & RQF_PREEMPT) == 0) {
 			/* there should be no pending command at this point */
 			ide_unlock_port(hwif);
 			goto plug_device;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index e34af488693a..a015acdffb39 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -53,7 +53,7 @@ static int ide_pm_execute_rq(struct request *rq)
 
 	spin_lock_irq(q->queue_lock);
 	if (unlikely(blk_queue_dying(q))) {
-		rq->cmd_flags |= REQ_QUIET;
+		rq->rq_flags |= RQF_QUIET;
 		rq->errors = -ENXIO;
 		__blk_end_request_all(rq, rq->errors);
 		spin_unlock_irq(q->queue_lock);
@@ -90,7 +90,7 @@ int generic_ide_resume(struct device *dev)
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_ATA_PM_RESUME;
-	rq->cmd_flags |= REQ_PREEMPT;
+	rq->rq_flags |= RQF_PREEMPT;
 	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_RESUME;
 	rqpm.pm_state = PM_EVENT_ON;
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index dc75bea0d541..f76cc36b8546 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -313,7 +313,7 @@ static void dm_unprep_request(struct request *rq)
 
 	if (!rq->q->mq_ops) {
 		rq->special = NULL;
-		rq->cmd_flags &= ~REQ_DONTPREP;
+		rq->rq_flags &= ~RQF_DONTPREP;
 	}
 
 	if (clone)
@@ -431,7 +431,7 @@ static void dm_softirq_done(struct request *rq)
 		return;
 	}
 
-	if (rq->cmd_flags & REQ_FAILED)
+	if (rq->rq_flags & RQF_FAILED)
 		mapped = false;
 
 	dm_done(clone, tio->error, mapped);
@@ -460,7 +460,7 @@ static void dm_complete_request(struct request *rq, int error)
  */
 static void dm_kill_unmapped_request(struct request *rq, int error)
 {
-	rq->cmd_flags |= REQ_FAILED;
+	rq->rq_flags |= RQF_FAILED;
 	dm_complete_request(rq, error);
 }
 
@@ -476,7 +476,7 @@ static void end_clone_request(struct request *clone, int error)
 		 * For just cleaning up the information of the queue in which
 		 * the clone was dispatched.
 		 * The clone is *NOT* freed actually here because it is alloced
-		 * from dm own mempool (REQ_ALLOCED isn't set).
+		 * from dm own mempool (RQF_ALLOCED isn't set).
 		 */
 		__blk_put_request(clone->q, clone);
 	}
@@ -497,7 +497,7 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
 	int r;
 
 	if (blk_queue_io_stat(clone->q))
-		clone->cmd_flags |= REQ_IO_STAT;
+		clone->rq_flags |= RQF_IO_STAT;
 
 	clone->start_time = jiffies;
 	r = blk_insert_cloned_request(clone->q, clone);
@@ -633,7 +633,7 @@ static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
 		return BLKPREP_DEFER;
 
 	rq->special = tio;
-	rq->cmd_flags |= REQ_DONTPREP;
+	rq->rq_flags |= RQF_DONTPREP;
 
 	return BLKPREP_OK;
 }
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index aacf584f2a42..f3512404bc52 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2006,7 +2006,7 @@ static int msb_prepare_req(struct request_queue *q, struct request *req)
 		blk_dump_rq_flags(req, "MS unsupported request");
 		return BLKPREP_KILL;
 	}
-	req->cmd_flags |= REQ_DONTPREP;
+	req->rq_flags |= RQF_DONTPREP;
 	return BLKPREP_OK;
 }
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c1472275fe57..fa0746d182ff 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -834,7 +834,7 @@ static int mspro_block_prepare_req(struct request_queue *q, struct request *req)
 		return BLKPREP_KILL;
 	}
 
-	req->cmd_flags |= REQ_DONTPREP;
+	req->rq_flags |= RQF_DONTPREP;
 
 	return BLKPREP_OK;
 }
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index c3335112e68c..f8190dd4a35c 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2117,7 +2117,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 		mmc_blk_abort_packed_req(mq_rq);
 	} else {
 		if (mmc_card_removed(card))
-			req->cmd_flags |= REQ_QUIET;
+			req->rq_flags |= RQF_QUIET;
 		while (ret)
 			ret = blk_end_request(req, -EIO,
 					blk_rq_cur_bytes(req));
@@ -2126,7 +2126,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
  start_new_req:
 	if (rqc) {
 		if (mmc_card_removed(card)) {
-			rqc->cmd_flags |= REQ_QUIET;
+			rqc->rq_flags |= RQF_QUIET;
 			blk_end_request_all(rqc, -EIO);
 		} else {
 			/*
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 8037f73a109a..8a67f1c2ce21 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -44,7 +44,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	if (mq && (mmc_card_removed(mq->card) || mmc_access_rpmb(mq)))
 		return BLKPREP_KILL;
 
-	req->cmd_flags |= REQ_DONTPREP;
+	req->rq_flags |= RQF_DONTPREP;
 
 	return BLKPREP_OK;
 }
@@ -120,7 +120,7 @@ static void mmc_request_fn(struct request_queue *q)
 
 	if (!mq) {
 		while ((req = blk_fetch_request(q)) != NULL) {
-			req->cmd_flags |= REQ_QUIET;
+			req->rq_flags |= RQF_QUIET;
 			__blk_end_request_all(req, -EIO);
 		}
 		return;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0fc99f0f2571..0955e9d22020 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -323,9 +323,9 @@ static int nvme_init_iod(struct request *rq, unsigned size,
 	iod->nents = 0;
 	iod->length = size;
 
-	if (!(rq->cmd_flags & REQ_DONTPREP)) {
+	if (!(rq->rq_flags & RQF_DONTPREP)) {
 		rq->retries = 0;
-		rq->cmd_flags |= REQ_DONTPREP;
+		rq->rq_flags |= RQF_DONTPREP;
 	}
 	return 0;
 }
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 241829e59668..05813a420188 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -154,7 +154,8 @@ static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
 	return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE,
 				      buff, bufflen, sshdr,
 				      ALUA_FAILOVER_TIMEOUT * HZ,
-				      ALUA_FAILOVER_RETRIES, NULL, req_flags);
+				      ALUA_FAILOVER_RETRIES, NULL,
+				      req_flags, 0);
 }
 
 /*
@@ -187,7 +188,8 @@ static int submit_stpg(struct scsi_device *sdev, int group_id,
 	return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
 				      stpg_data, stpg_len,
 				      sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
-				      ALUA_FAILOVER_RETRIES, NULL, req_flags);
+				      ALUA_FAILOVER_RETRIES, NULL,
+				      req_flags, 0);
 }
 
 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
@@ -1063,7 +1065,7 @@ static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
 		 state != SCSI_ACCESS_STATE_ACTIVE &&
 		 state != SCSI_ACCESS_STATE_LBA) {
 		ret = BLKPREP_KILL;
-		req->cmd_flags |= REQ_QUIET;
+		req->rq_flags |= RQF_QUIET;
 	}
 	return ret;
 
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index 375d81850f15..5b80746980b8 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -452,7 +452,7 @@ static int clariion_prep_fn(struct scsi_device *sdev, struct request *req)
 
 	if (h->lun_state != CLARIION_LUN_OWNED) {
 		ret = BLKPREP_KILL;
-		req->cmd_flags |= REQ_QUIET;
+		req->rq_flags |= RQF_QUIET;
 	}
 	return ret;
 
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index 9406d5f4a3d3..308e87195dc1 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -266,7 +266,7 @@ static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req)
 
 	if (h->path_state != HP_SW_PATH_ACTIVE) {
 		ret = BLKPREP_KILL;
-		req->cmd_flags |= REQ_QUIET;
+		req->rq_flags |= RQF_QUIET;
 	}
 	return ret;
 
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 06fbd0b0c68a..00d9c326158e 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -724,7 +724,7 @@ static int rdac_prep_fn(struct scsi_device *sdev, struct request *req)
 
 	if (h->state != RDAC_STATE_ACTIVE) {
 		ret = BLKPREP_KILL;
-		req->cmd_flags |= REQ_QUIET;
+		req->rq_flags |= RQF_QUIET;
 	}
 	return ret;
 
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 2f2a9910e30e..ef99f62831fb 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1595,7 +1595,7 @@ static int _init_blk_request(struct osd_request *or,
 	}
 
 	or->request = req;
-	req->cmd_flags |= REQ_QUIET;
+	req->rq_flags |= RQF_QUIET;
 
 	req->timeout = or->timeout;
 	req->retries = or->retries;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 5033223f6287..a2960f5d98ec 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -368,7 +368,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 		return DRIVER_ERROR << 24;
 
 	blk_rq_set_block_pc(req);
-	req->cmd_flags |= REQ_QUIET;
+	req->rq_flags |= RQF_QUIET;
 
 	SRpnt->bio = NULL;
 
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 106a6adbd6f1..996e134d79fa 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1988,7 +1988,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 
 	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
 
-	req->cmd_flags |= REQ_QUIET;
+	req->rq_flags |= RQF_QUIET;
 	req->timeout = 10 * HZ;
 	req->retries = 5;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2cca9cffc63f..8c52622ac257 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -163,26 +163,11 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 {
 	__scsi_queue_insert(cmd, reason, 1);
 }
-/**
- * scsi_execute - insert request and wait for the result
- * @sdev:	scsi device
- * @cmd:	scsi command
- * @data_direction: data direction
- * @buffer:	data buffer
- * @bufflen:	len of buffer
- * @sense:	optional sense buffer
- * @timeout:	request timeout in seconds
- * @retries:	number of times to retry request
- * @flags:	or into request flags;
- * @resid:	optional residual length
- *
- * returns the req->errors value which is the scsi_cmnd result
- * field.
- */
-int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+
+static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int data_direction, void *buffer, unsigned bufflen,
 		 unsigned char *sense, int timeout, int retries, u64 flags,
-		 int *resid)
+		 req_flags_t rq_flags, int *resid)
 {
 	struct request *req;
 	int write = (data_direction == DMA_TO_DEVICE);
@@ -203,7 +188,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	req->sense_len = 0;
 	req->retries = retries;
 	req->timeout = timeout;
-	req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
+	req->cmd_flags |= flags;
+	req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT;
 
 	/*
 	 * head injection *required* here otherwise quiesce won't work
@@ -227,12 +213,37 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 
 	return ret;
 }
+
+/**
+ * scsi_execute - insert request and wait for the result
+ * @sdev:	scsi device
+ * @cmd:	scsi command
+ * @data_direction: data direction
+ * @buffer:	data buffer
+ * @bufflen:	len of buffer
+ * @sense:	optional sense buffer
+ * @timeout:	request timeout in seconds
+ * @retries:	number of times to retry request
+ * @flags:	or into request flags;
+ * @resid:	optional residual length
+ *
+ * returns the req->errors value which is the scsi_cmnd result
+ * field.
+ */
+int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+		 int data_direction, void *buffer, unsigned bufflen,
+		 unsigned char *sense, int timeout, int retries, u64 flags,
+		 int *resid)
+{
+	return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense,
+			timeout, retries, flags, 0, resid);
+}
 EXPORT_SYMBOL(scsi_execute);
 
 int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
 		     int data_direction, void *buffer, unsigned bufflen,
 		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
-		     int *resid, u64 flags)
+		     int *resid, u64 flags, req_flags_t rq_flags)
 {
 	char *sense = NULL;
 	int result;
@@ -242,8 +253,8 @@ int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
 		if (!sense)
 			return DRIVER_ERROR << 24;
 	}
-	result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
-			      sense, timeout, retries, flags, resid);
+	result = __scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
+			      sense, timeout, retries, flags, rq_flags, resid);
 	if (sshdr)
 		scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
 
@@ -813,7 +824,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		 */
 		if ((sshdr.asc == 0x0) && (sshdr.ascq == 0x1d))
 			;
-		else if (!(req->cmd_flags & REQ_QUIET))
+		else if (!(req->rq_flags & RQF_QUIET))
 			scsi_print_sense(cmd);
 		result = 0;
 		/* BLOCK_PC may have set error */
@@ -943,7 +954,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	switch (action) {
 	case ACTION_FAIL:
 		/* Give up and fail the remainder of the request */
-		if (!(req->cmd_flags & REQ_QUIET)) {
+		if (!(req->rq_flags & RQF_QUIET)) {
 			static DEFINE_RATELIMIT_STATE(_rs,
 					DEFAULT_RATELIMIT_INTERVAL,
 					DEFAULT_RATELIMIT_BURST);
@@ -972,7 +983,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		 * A new command will be prepared and issued.
 		 */
 		if (q->mq_ops) {
-			cmd->request->cmd_flags &= ~REQ_DONTPREP;
+			cmd->request->rq_flags &= ~RQF_DONTPREP;
 			scsi_mq_uninit_cmd(cmd);
 			scsi_mq_requeue_cmd(cmd);
 		} else {
@@ -1234,7 +1245,7 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
 			/*
 			 * If the devices is blocked we defer normal commands.
 			 */
-			if (!(req->cmd_flags & REQ_PREEMPT))
+			if (!(req->rq_flags & RQF_PREEMPT))
 				ret = BLKPREP_DEFER;
 			break;
 		default:
@@ -1243,7 +1254,7 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
 			 * special commands.  In particular any user initiated
 			 * command is not allowed.
 			 */
-			if (!(req->cmd_flags & REQ_PREEMPT))
+			if (!(req->rq_flags & RQF_PREEMPT))
 				ret = BLKPREP_KILL;
 			break;
 		}
@@ -1279,7 +1290,7 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 			blk_delay_queue(q, SCSI_QUEUE_DELAY);
 		break;
 	default:
-		req->cmd_flags |= REQ_DONTPREP;
+		req->rq_flags |= RQF_DONTPREP;
 	}
 
 	return ret;
@@ -1736,7 +1747,7 @@ static void scsi_request_fn(struct request_queue *q)
 		 * we add the dev to the starved list so it eventually gets
 		 * a run when a tag is freed.
 		 */
-		if (blk_queue_tagged(q) && !(req->cmd_flags & REQ_QUEUED)) {
+		if (blk_queue_tagged(q) && !(req->rq_flags & RQF_QUEUED)) {
 			spin_lock_irq(shost->host_lock);
 			if (list_empty(&sdev->starved_entry))
 				list_add_tail(&sdev->starved_entry,
@@ -1903,11 +1914,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 		goto out_dec_target_busy;
 
 
-	if (!(req->cmd_flags & REQ_DONTPREP)) {
+	if (!(req->rq_flags & RQF_DONTPREP)) {
 		ret = prep_to_mq(scsi_mq_prep_fn(req));
 		if (ret)
 			goto out_dec_host_busy;
-		req->cmd_flags |= REQ_DONTPREP;
+		req->rq_flags |= RQF_DONTPREP;
 	} else {
 		blk_mq_start_request(req);
 	}
@@ -1952,7 +1963,7 @@ out:
 		 * we hit an error, as we will never see this command
 		 * again.
 		 */
-		if (req->cmd_flags & REQ_DONTPREP)
+		if (req->rq_flags & RQF_DONTPREP)
 			scsi_mq_uninit_cmd(cmd);
 		break;
 	default:
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b9618ffca829..cef1f78031d4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1520,7 +1520,7 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		 */
 		res = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0,
 					     &sshdr, timeout, SD_MAX_RETRIES,
-					     NULL, REQ_PM);
+					     NULL, 0, RQF_PM);
 		if (res == 0)
 			break;
 	}
@@ -1879,7 +1879,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 
 					good_bytes = 0;
 					req->__data_len = blk_rq_bytes(req);
-					req->cmd_flags |= REQ_QUIET;
+					req->rq_flags |= RQF_QUIET;
 				}
 			}
 		}
@@ -3278,7 +3278,7 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
 		return -ENODEV;
 
 	res = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-			       SD_TIMEOUT, SD_MAX_RETRIES, NULL, REQ_PM);
+			       SD_TIMEOUT, SD_MAX_RETRIES, NULL, 0, RQF_PM);
 	if (res) {
 		sd_print_result(sdkp, "Start/Stop Unit failed", res);
 		if (driver_byte(res) & DRIVER_SENSE)
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index d5b3bd915d9e..394ab490919c 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -348,7 +348,7 @@ void sd_zbc_complete(struct scsi_cmnd *cmd,
 			 * this case, so be quiet about the error.
 			 */
 			if (req_op(rq) == REQ_OP_ZONE_RESET)
-				rq->cmd_flags |= REQ_QUIET;
+				rq->rq_flags |= RQF_QUIET;
 			break;
 		case 0x21:
 			/*
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 7af5226aa55b..3bc46a4abd43 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -546,7 +546,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 		return DRIVER_ERROR << 24;
 
 	blk_rq_set_block_pc(req);
-	req->cmd_flags |= REQ_QUIET;
+	req->rq_flags |= RQF_QUIET;
 
 	mdata->null_mapped = 1;
 
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 05c745663c10..cf549871c1ee 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -5590,7 +5590,7 @@ ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp)
 
 	ret = scsi_execute_req_flags(sdp, cmd, DMA_FROM_DEVICE, buffer,
 				SCSI_SENSE_BUFFERSIZE, NULL,
-				msecs_to_jiffies(1000), 3, NULL, REQ_PM);
+				msecs_to_jiffies(1000), 3, NULL, 0, RQF_PM);
 	if (ret)
 		pr_err("%s: failed with err %d\n", __func__, ret);
 
@@ -5652,11 +5652,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 
 	/*
 	 * Current function would be generally called from the power management
-	 * callbacks hence set the REQ_PM flag so that it doesn't resume the
+	 * callbacks hence set the RQF_PM flag so that it doesn't resume the
 	 * already suspended childs.
 	 */
 	ret = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-				     START_STOP_TIMEOUT, 0, NULL, REQ_PM);
+				     START_STOP_TIMEOUT, 0, NULL, 0, RQF_PM);
 	if (ret) {
 		sdev_printk(KERN_WARNING, sdp,
 			    "START_STOP failed for power mode: %d, result %x\n",
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6df722de2e22..ec69a8fe3b29 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -167,26 +167,6 @@ enum rq_flag_bits {
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 
-	/* request only flags */
-	__REQ_SORTED,		/* elevator knows about this request */
-	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
-	__REQ_STARTED,		/* drive already may have started this one */
-	__REQ_DONTPREP,		/* don't call prep for this one */
-	__REQ_QUEUED,		/* uses queueing */
-	__REQ_ELVPRIV,		/* elevator private data attached */
-	__REQ_FAILED,		/* set if the request failed */
-	__REQ_QUIET,		/* don't worry about errors */
-	__REQ_PREEMPT,		/* set for "ide_preempt" requests and also
-				   for requests for which the SCSI "quiesce"
-				   state must be ignored. */
-	__REQ_ALLOCED,		/* request came from our alloc pool */
-	__REQ_COPY_USER,	/* contains copies of user pages */
-	__REQ_FLUSH_SEQ,	/* request for flush sequence */
-	__REQ_IO_STAT,		/* account I/O stat */
-	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
-	__REQ_PM,		/* runtime pm request */
-	__REQ_HASHED,		/* on IO scheduler merge hash */
-	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -208,29 +188,12 @@ enum rq_flag_bits {
 
 /* This mask is used for both bio and request merge checking */
 #define REQ_NOMERGE_FLAGS \
-	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ)
+	(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
-#define REQ_SORTED		(1ULL << __REQ_SORTED)
-#define REQ_SOFTBARRIER		(1ULL << __REQ_SOFTBARRIER)
 #define REQ_FUA			(1ULL << __REQ_FUA)
 #define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
-#define REQ_STARTED		(1ULL << __REQ_STARTED)
-#define REQ_DONTPREP		(1ULL << __REQ_DONTPREP)
-#define REQ_QUEUED		(1ULL << __REQ_QUEUED)
-#define REQ_ELVPRIV		(1ULL << __REQ_ELVPRIV)
-#define REQ_FAILED		(1ULL << __REQ_FAILED)
-#define REQ_QUIET		(1ULL << __REQ_QUIET)
-#define REQ_PREEMPT		(1ULL << __REQ_PREEMPT)
-#define REQ_ALLOCED		(1ULL << __REQ_ALLOCED)
-#define REQ_COPY_USER		(1ULL << __REQ_COPY_USER)
 #define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
-#define REQ_FLUSH_SEQ		(1ULL << __REQ_FLUSH_SEQ)
-#define REQ_IO_STAT		(1ULL << __REQ_IO_STAT)
-#define REQ_MIXED_MERGE		(1ULL << __REQ_MIXED_MERGE)
-#define REQ_PM			(1ULL << __REQ_PM)
-#define REQ_HASHED		(1ULL << __REQ_HASHED)
-#define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 
 enum req_op {
 	REQ_OP_READ,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 90097dd8b8ed..b4415feac679 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -78,6 +78,50 @@ enum rq_cmd_type_bits {
 	REQ_TYPE_DRV_PRIV,		/* driver defined types from here */
 };
 
+/*
+ * request flags */
+typedef __u32 __bitwise req_flags_t;
+
+/* elevator knows about this request */
+#define RQF_SORTED		((__force req_flags_t)(1 << 0))
+/* drive already may have started this one */
+#define RQF_STARTED		((__force req_flags_t)(1 << 1))
+/* uses tagged queueing */
+#define RQF_QUEUED		((__force req_flags_t)(1 << 2))
+/* may not be passed by ioscheduler */
+#define RQF_SOFTBARRIER		((__force req_flags_t)(1 << 3))
+/* request for flush sequence */
+#define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << 4))
+/* merge of different types, fail separately */
+#define RQF_MIXED_MERGE		((__force req_flags_t)(1 << 5))
+/* track inflight for MQ */
+#define RQF_MQ_INFLIGHT		((__force req_flags_t)(1 << 6))
+/* don't call prep for this one */
+#define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
+/* set for "ide_preempt" requests and also for requests for which the SCSI
+   "quiesce" state must be ignored. */
+#define RQF_PREEMPT		((__force req_flags_t)(1 << 8))
+/* contains copies of user pages */
+#define RQF_COPY_USER		((__force req_flags_t)(1 << 9))
+/* vaguely specified driver internal error.  Ignored by the block layer */
+#define RQF_FAILED		((__force req_flags_t)(1 << 10))
+/* don't warn about errors */
+#define RQF_QUIET		((__force req_flags_t)(1 << 11))
+/* elevator private data attached */
+#define RQF_ELVPRIV		((__force req_flags_t)(1 << 12))
+/* account I/O stat */
+#define RQF_IO_STAT		((__force req_flags_t)(1 << 13))
+/* request came from our alloc pool */
+#define RQF_ALLOCED		((__force req_flags_t)(1 << 14))
+/* runtime pm request */
+#define RQF_PM			((__force req_flags_t)(1 << 15))
+/* on IO scheduler merge hash */
+#define RQF_HASHED		((__force req_flags_t)(1 << 16))
+
+/* flags that prevent us from merging requests: */
+#define RQF_NOMERGE_FLAGS \
+	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ)
+
 #define BLK_MAX_CDB	16
 
 /*
@@ -99,6 +143,7 @@ struct request {
 	int cpu;
 	unsigned cmd_type;
 	u64 cmd_flags;
+	req_flags_t rq_flags;
 	unsigned long atomic_flags;
 
 	/* the following two fields are internal, NEVER access directly */
@@ -648,7 +693,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 			     REQ_FAILFAST_DRIVER))
 
 #define blk_account_rq(rq) \
-	(((rq)->cmd_flags & REQ_STARTED) && \
+	(((rq)->rq_flags & RQF_STARTED) && \
 	 ((rq)->cmd_type == REQ_TYPE_FS))
 
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
@@ -740,6 +785,8 @@ static inline bool rq_mergeable(struct request *rq)
 
 	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
 		return false;
+	if (rq->rq_flags & RQF_NOMERGE_FLAGS)
+		return false;
 
 	return true;
 }
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 8a9563144890..8990e580b278 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -414,14 +414,14 @@ extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 extern int scsi_execute_req_flags(struct scsi_device *sdev,
 	const unsigned char *cmd, int data_direction, void *buffer,
 	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
-	int retries, int *resid, u64 flags);
+	int retries, int *resid, u64 flags, req_flags_t rq_flags);
 static inline int scsi_execute_req(struct scsi_device *sdev,
 	const unsigned char *cmd, int data_direction, void *buffer,
 	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
 	int retries, int *resid)
 {
 	return scsi_execute_req_flags(sdev, cmd, data_direction, buffer,
-		bufflen, sshdr, timeout, retries, resid, 0);
+		bufflen, sshdr, timeout, retries, resid, 0, 0);
 }
 extern void sdev_disable_disk_events(struct scsi_device *sdev);
 extern void sdev_enable_disk_events(struct scsi_device *sdev);
-- 
cgit v1.2.3


From ef295ecf090d3e86e5b742fc6ab34f1122a43773 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 28 Oct 2016 08:48:16 -0600
Subject: block: better op and flags encoding

Now that we don't need the common flags to overflow outside the range
of a 32-bit type we can encode them the same way for both the bio and
request fields.  This in addition allows us to place the operation
first (and make some room for more ops while we're at it) and to
stop having to shift around the operation values.

In addition this allows passing around only one value in the block layer
instead of two (and eventuall also in the file systems, but we can do
that later) and thus clean up a lot of code.

Last but not least this allows decreasing the size of the cmd_flags
field in struct request to 32-bits.  Various functions passing this
value could also be updated, but I'd like to avoid the churn for now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/biodoc.txt |  4 +-
 block/blk-core.c               | 60 ++++++++++--------------------
 block/blk-flush.c              |  2 +-
 block/blk-lib.c                |  2 +-
 block/blk-map.c                |  2 +
 block/blk-mq.c                 | 28 ++++++--------
 block/cfq-iosched.c            | 66 ++++++++++++++++-----------------
 block/elevator.c               |  4 +-
 drivers/md/dm-crypt.c          |  2 +-
 drivers/scsi/sd.c              |  3 +-
 fs/btrfs/inode.c               |  5 +--
 fs/buffer.c                    |  2 +-
 fs/f2fs/f2fs.h                 |  2 +-
 fs/gfs2/lops.c                 |  2 +-
 include/linux/blk-cgroup.h     | 11 +++---
 include/linux/blk_types.h      | 83 +++++++++++++++++++-----------------------
 include/linux/blkdev.h         | 26 +------------
 include/linux/blktrace_api.h   |  2 +-
 include/linux/dm-io.h          |  2 +-
 include/linux/elevator.h       |  4 +-
 include/trace/events/bcache.h  | 12 ++----
 include/trace/events/block.h   | 31 ++++++----------
 kernel/trace/blktrace.c        | 14 +++----
 23 files changed, 148 insertions(+), 221 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6acea160298c..01ddeaf64b0f 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -553,8 +553,8 @@ struct request {
 	struct request_list *rl;
 }
 	
-See the rq_flag_bits definitions for an explanation of the various flags
-available. Some bits are used by the block layer or i/o scheduler.
+See the req_ops and req_flag_bits definitions for an explanation of the various
+flags available. Some bits are used by the block layer or i/o scheduler.
 	
 The behaviour of the various sector counts are almost the same as before,
 except that since we have multi-segment bios, current_nr_sectors refers
diff --git a/block/blk-core.c b/block/blk-core.c
index fd416651a676..0bfaa54d3e9f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1056,8 +1056,7 @@ static struct io_context *rq_ioc(struct bio *bio)
 /**
  * __get_request - get a free request
  * @rl: request list to allocate from
- * @op: REQ_OP_READ/REQ_OP_WRITE
- * @op_flags: rq_flag_bits
+ * @op: operation and flags
  * @bio: bio to allocate request for (can be %NULL)
  * @gfp_mask: allocation mask
  *
@@ -1068,23 +1067,22 @@ static struct io_context *rq_ioc(struct bio *bio)
  * Returns ERR_PTR on failure, with @q->queue_lock held.
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
-static struct request *__get_request(struct request_list *rl, int op,
-				     int op_flags, struct bio *bio,
-				     gfp_t gfp_mask)
+static struct request *__get_request(struct request_list *rl, unsigned int op,
+		struct bio *bio, gfp_t gfp_mask)
 {
 	struct request_queue *q = rl->q;
 	struct request *rq;
 	struct elevator_type *et = q->elevator->type;
 	struct io_context *ioc = rq_ioc(bio);
 	struct io_cq *icq = NULL;
-	const bool is_sync = rw_is_sync(op, op_flags) != 0;
+	const bool is_sync = op_is_sync(op);
 	int may_queue;
 	req_flags_t rq_flags = RQF_ALLOCED;
 
 	if (unlikely(blk_queue_dying(q)))
 		return ERR_PTR(-ENODEV);
 
-	may_queue = elv_may_queue(q, op, op_flags);
+	may_queue = elv_may_queue(q, op);
 	if (may_queue == ELV_MQUEUE_NO)
 		goto rq_starved;
 
@@ -1154,7 +1152,7 @@ static struct request *__get_request(struct request_list *rl, int op,
 
 	blk_rq_init(q, rq);
 	blk_rq_set_rl(rq, rl);
-	req_set_op_attrs(rq, op, op_flags);
+	rq->cmd_flags = op;
 	rq->rq_flags = rq_flags;
 
 	/* init elvpriv */
@@ -1232,8 +1230,7 @@ rq_starved:
 /**
  * get_request - get a free request
  * @q: request_queue to allocate request from
- * @op: REQ_OP_READ/REQ_OP_WRITE
- * @op_flags: rq_flag_bits
+ * @op: operation and flags
  * @bio: bio to allocate request for (can be %NULL)
  * @gfp_mask: allocation mask
  *
@@ -1244,18 +1241,17 @@ rq_starved:
  * Returns ERR_PTR on failure, with @q->queue_lock held.
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
-static struct request *get_request(struct request_queue *q, int op,
-				   int op_flags, struct bio *bio,
-				   gfp_t gfp_mask)
+static struct request *get_request(struct request_queue *q, unsigned int op,
+		struct bio *bio, gfp_t gfp_mask)
 {
-	const bool is_sync = rw_is_sync(op, op_flags) != 0;
+	const bool is_sync = op_is_sync(op);
 	DEFINE_WAIT(wait);
 	struct request_list *rl;
 	struct request *rq;
 
 	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
 retry:
-	rq = __get_request(rl, op, op_flags, bio, gfp_mask);
+	rq = __get_request(rl, op, bio, gfp_mask);
 	if (!IS_ERR(rq))
 		return rq;
 
@@ -1297,7 +1293,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 	create_io_context(gfp_mask, q->node);
 
 	spin_lock_irq(q->queue_lock);
-	rq = get_request(q, rw, 0, NULL, gfp_mask);
+	rq = get_request(q, rw, NULL, gfp_mask);
 	if (IS_ERR(rq)) {
 		spin_unlock_irq(q->queue_lock);
 		return rq;
@@ -1446,7 +1442,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	 */
 	if (rq_flags & RQF_ALLOCED) {
 		struct request_list *rl = blk_rq_rl(req);
-		bool sync = rw_is_sync(req_op(req), req->cmd_flags);
+		bool sync = op_is_sync(req->cmd_flags);
 
 		BUG_ON(!list_empty(&req->queuelist));
 		BUG_ON(ELV_ON_HASH(req));
@@ -1652,8 +1648,6 @@ out:
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
 	req->cmd_type = REQ_TYPE_FS;
-
-	req->cmd_flags |= bio->bi_opf & REQ_COMMON_MASK;
 	if (bio->bi_opf & REQ_RAHEAD)
 		req->cmd_flags |= REQ_FAILFAST_MASK;
 
@@ -1665,9 +1659,8 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 
 static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
-	const bool sync = !!(bio->bi_opf & REQ_SYNC);
 	struct blk_plug *plug;
-	int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
+	int el_ret, where = ELEVATOR_INSERT_SORT;
 	struct request *req;
 	unsigned int request_count = 0;
 
@@ -1722,24 +1715,11 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	}
 
 get_rq:
-	/*
-	 * This sync check and mask will be re-done in init_request_from_bio(),
-	 * but we need to set it earlier to expose the sync flag to the
-	 * rq allocator and io schedulers.
-	 */
-	if (sync)
-		rw_flags |= REQ_SYNC;
-
-	/*
-	 * Add in META/PRIO flags, if set, before we get to the IO scheduler
-	 */
-	rw_flags |= (bio->bi_opf & (REQ_META | REQ_PRIO));
-
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Returns with the queue unlocked.
 	 */
-	req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
+	req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
 	if (IS_ERR(req)) {
 		bio->bi_error = PTR_ERR(req);
 		bio_endio(bio);
@@ -2946,8 +2926,6 @@ EXPORT_SYMBOL_GPL(__blk_end_request_err);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
-	req_set_op(rq, bio_op(bio));
-
 	if (bio_has_data(bio))
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 
@@ -3031,8 +3009,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
 	dst->cpu = src->cpu;
-	req_set_op_attrs(dst, req_op(src),
-			 (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE);
+	dst->cmd_flags = src->cmd_flags | REQ_NOMERGE;
 	dst->cmd_type = src->cmd_type;
 	dst->__sector = blk_rq_pos(src);
 	dst->__data_len = blk_rq_bytes(src);
@@ -3537,8 +3514,11 @@ EXPORT_SYMBOL(blk_set_runtime_active);
 
 int __init blk_dev_init(void)
 {
-	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
+	BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
+	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
 			FIELD_SIZEOF(struct request, cmd_flags));
+	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
+			FIELD_SIZEOF(struct bio, bi_opf));
 
 	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
 	kblockd_workqueue = alloc_workqueue("kblockd",
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3990b9cfbda5..95f1d4d357df 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	}
 
 	flush_rq->cmd_type = REQ_TYPE_FS;
-	req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH);
+	flush_rq->cmd_flags = REQ_OP_FLUSH | WRITE_FLUSH;
 	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
 	flush_rq->rq_disk = first_rq->rq_disk;
 	flush_rq->end_io = flush_end_io;
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 46fe9248410d..18abda862915 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -29,7 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	struct request_queue *q = bdev_get_queue(bdev);
 	struct bio *bio = *biop;
 	unsigned int granularity;
-	enum req_op op;
+	unsigned int op;
 	int alignment;
 	sector_t bs_mask;
 
diff --git a/block/blk-map.c b/block/blk-map.c
index 2c5ae5fef473..0173a72a8aa9 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -16,6 +16,8 @@
 int blk_rq_append_bio(struct request *rq, struct bio *bio)
 {
 	if (!rq->bio) {
+		rq->cmd_flags &= REQ_OP_MASK;
+		rq->cmd_flags |= (bio->bi_opf & REQ_OP_MASK);
 		blk_rq_bio_prep(rq->q, rq, bio);
 	} else {
 		if (!ll_back_merge_fn(rq->q, rq, bio))
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b49c6658eb05..2da1a0ee3318 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -139,14 +139,13 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 EXPORT_SYMBOL(blk_mq_can_queue);
 
 static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-			       struct request *rq, int op,
-			       unsigned int op_flags)
+			       struct request *rq, unsigned int op)
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	/* csd/requeue_work/fifo_time is initialized before use */
 	rq->q = q;
 	rq->mq_ctx = ctx;
-	req_set_op_attrs(rq, op, op_flags);
+	rq->cmd_flags = op;
 	if (blk_queue_io_stat(q))
 		rq->rq_flags |= RQF_IO_STAT;
 	/* do not touch atomic flags, it needs atomic ops against the timer */
@@ -183,11 +182,11 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	rq->end_io_data = NULL;
 	rq->next_rq = NULL;
 
-	ctx->rq_dispatched[rw_is_sync(op, op_flags)]++;
+	ctx->rq_dispatched[op_is_sync(op)]++;
 }
 
 static struct request *
-__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
+__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op)
 {
 	struct request *rq;
 	unsigned int tag;
@@ -202,7 +201,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
 		}
 
 		rq->tag = tag;
-		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags);
+		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
 		return rq;
 	}
 
@@ -225,7 +224,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 	ctx = blk_mq_get_ctx(q);
 	hctx = blk_mq_map_queue(q, ctx->cpu);
 	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-	rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
+	rq = __blk_mq_alloc_request(&alloc_data, rw);
 	blk_mq_put_ctx(ctx);
 
 	if (!rq) {
@@ -277,7 +276,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 	ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
 
 	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-	rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
+	rq = __blk_mq_alloc_request(&alloc_data, rw);
 	if (!rq) {
 		ret = -EWOULDBLOCK;
 		goto out_queue_exit;
@@ -1196,19 +1195,14 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	struct request *rq;
-	int op = bio_data_dir(bio);
-	int op_flags = 0;
 
 	blk_queue_enter_live(q);
 	ctx = blk_mq_get_ctx(q);
 	hctx = blk_mq_map_queue(q, ctx->cpu);
 
-	if (rw_is_sync(bio_op(bio), bio->bi_opf))
-		op_flags |= REQ_SYNC;
-
-	trace_block_getrq(q, bio, op);
+	trace_block_getrq(q, bio, bio->bi_opf);
 	blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
-	rq = __blk_mq_alloc_request(data, op, op_flags);
+	rq = __blk_mq_alloc_request(data, bio->bi_opf);
 
 	data->hctx->queued++;
 	return rq;
@@ -1256,7 +1250,7 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
  */
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
-	const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
+	const int is_sync = op_is_sync(bio->bi_opf);
 	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
 	struct blk_mq_alloc_data data;
 	struct request *rq;
@@ -1350,7 +1344,7 @@ done:
  */
 static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 {
-	const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
+	const int is_sync = op_is_sync(bio->bi_opf);
 	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
 	struct blk_plug *plug;
 	unsigned int request_count = 0;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5e24d880306c..c96186adaa66 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -667,10 +667,10 @@ static inline void cfqg_put(struct cfq_group *cfqg)
 } while (0)
 
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
-					    struct cfq_group *curr_cfqg, int op,
-					    int op_flags)
+					    struct cfq_group *curr_cfqg,
+					    unsigned int op)
 {
-	blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, 1);
+	blkg_rwstat_add(&cfqg->stats.queued, op, 1);
 	cfqg_stats_end_empty_time(&cfqg->stats);
 	cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
 }
@@ -684,30 +684,29 @@ static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
 #endif
 }
 
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
-					       int op_flags)
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
+					       unsigned int op)
 {
-	blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, -1);
+	blkg_rwstat_add(&cfqg->stats.queued, op, -1);
 }
 
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
-					       int op_flags)
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
+					       unsigned int op)
 {
-	blkg_rwstat_add(&cfqg->stats.merged, op, op_flags, 1);
+	blkg_rwstat_add(&cfqg->stats.merged, op, 1);
 }
 
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-			uint64_t start_time, uint64_t io_start_time, int op,
-			int op_flags)
+			uint64_t start_time, uint64_t io_start_time,
+			unsigned int op)
 {
 	struct cfqg_stats *stats = &cfqg->stats;
 	unsigned long long now = sched_clock();
 
 	if (time_after64(now, io_start_time))
-		blkg_rwstat_add(&stats->service_time, op, op_flags,
-				now - io_start_time);
+		blkg_rwstat_add(&stats->service_time, op, now - io_start_time);
 	if (time_after64(io_start_time, start_time))
-		blkg_rwstat_add(&stats->wait_time, op, op_flags,
+		blkg_rwstat_add(&stats->wait_time, op,
 				io_start_time - start_time);
 }
 
@@ -786,16 +785,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) { }
 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)		do {} while (0)
 
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
-			struct cfq_group *curr_cfqg, int op, int op_flags) { }
+			struct cfq_group *curr_cfqg, unsigned int op) { }
 static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
 			uint64_t time, unsigned long unaccounted_time) { }
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
-			int op_flags) { }
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
-			int op_flags) { }
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
+			unsigned int op) { }
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
+			unsigned int op) { }
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-			uint64_t start_time, uint64_t io_start_time, int op,
-			int op_flags) { }
+			uint64_t start_time, uint64_t io_start_time,
+			unsigned int op) { }
 
 #endif	/* CONFIG_CFQ_GROUP_IOSCHED */
 
@@ -2474,10 +2473,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
 	elv_rb_del(&cfqq->sort_list, rq);
 	cfqq->queued[rq_is_sync(rq)]--;
-	cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
+	cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
 	cfq_add_rq_rb(rq);
 	cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
-				 req_op(rq), rq->cmd_flags);
+				 rq->cmd_flags);
 }
 
 static struct request *
@@ -2530,7 +2529,7 @@ static void cfq_remove_request(struct request *rq)
 	cfq_del_rq_rb(rq);
 
 	cfqq->cfqd->rq_queued--;
-	cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
+	cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
 	if (rq->cmd_flags & REQ_PRIO) {
 		WARN_ON(!cfqq->prio_pending);
 		cfqq->prio_pending--;
@@ -2565,7 +2564,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
 static void cfq_bio_merged(struct request_queue *q, struct request *req,
 				struct bio *bio)
 {
-	cfqg_stats_update_io_merged(RQ_CFQG(req), bio_op(bio), bio->bi_opf);
+	cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_opf);
 }
 
 static void
@@ -2588,7 +2587,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 	if (cfqq->next_rq == next)
 		cfqq->next_rq = rq;
 	cfq_remove_request(next);
-	cfqg_stats_update_io_merged(RQ_CFQG(rq), req_op(next), next->cmd_flags);
+	cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags);
 
 	cfqq = RQ_CFQQ(next);
 	/*
@@ -4142,7 +4141,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
 	rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 	cfq_add_rq_rb(rq);
-	cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq),
+	cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
 				 rq->cmd_flags);
 	cfq_rq_enqueued(cfqd, cfqq, rq);
 }
@@ -4240,8 +4239,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	cfqq->dispatched--;
 	(RQ_CFQG(rq))->dispatched--;
 	cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
-				     rq_io_start_time_ns(rq), req_op(rq),
-				     rq->cmd_flags);
+				     rq_io_start_time_ns(rq), rq->cmd_flags);
 
 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
@@ -4319,14 +4317,14 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 		cfq_schedule_dispatch(cfqd);
 }
 
-static void cfqq_boost_on_prio(struct cfq_queue *cfqq, int op_flags)
+static void cfqq_boost_on_prio(struct cfq_queue *cfqq, unsigned int op)
 {
 	/*
 	 * If REQ_PRIO is set, boost class and prio level, if it's below
 	 * BE/NORM. If prio is not set, restore the potentially boosted
 	 * class/prio level.
 	 */
-	if (!(op_flags & REQ_PRIO)) {
+	if (!(op & REQ_PRIO)) {
 		cfqq->ioprio_class = cfqq->org_ioprio_class;
 		cfqq->ioprio = cfqq->org_ioprio;
 	} else {
@@ -4347,7 +4345,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 	return ELV_MQUEUE_MAY;
 }
 
-static int cfq_may_queue(struct request_queue *q, int op, int op_flags)
+static int cfq_may_queue(struct request_queue *q, unsigned int op)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
@@ -4364,10 +4362,10 @@ static int cfq_may_queue(struct request_queue *q, int op, int op_flags)
 	if (!cic)
 		return ELV_MQUEUE_MAY;
 
-	cfqq = cic_to_cfqq(cic, rw_is_sync(op, op_flags));
+	cfqq = cic_to_cfqq(cic, op_is_sync(op));
 	if (cfqq) {
 		cfq_init_prio_data(cfqq, cic);
-		cfqq_boost_on_prio(cfqq, op_flags);
+		cfqq_boost_on_prio(cfqq, op);
 
 		return __cfq_may_queue(cfqq);
 	}
diff --git a/block/elevator.c b/block/elevator.c
index ac80f89a0842..a18a5db274e4 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -714,12 +714,12 @@ void elv_put_request(struct request_queue *q, struct request *rq)
 		e->type->ops.elevator_put_req_fn(rq);
 }
 
-int elv_may_queue(struct request_queue *q, int op, int op_flags)
+int elv_may_queue(struct request_queue *q, unsigned int op)
 {
 	struct elevator_queue *e = q->elevator;
 
 	if (e->type->ops.elevator_may_queue_fn)
-		return e->type->ops.elevator_may_queue_fn(q, op, op_flags);
+		return e->type->ops.elevator_may_queue_fn(q, op);
 
 	return ELV_MQUEUE_MAY;
 }
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index a2768835d394..68a9eb4f3f36 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1135,7 +1135,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
 	clone->bi_private = io;
 	clone->bi_end_io  = crypt_endio;
 	clone->bi_bdev    = cc->dev->bdev;
-	bio_set_op_attrs(clone, bio_op(io->base_bio), bio_flags(io->base_bio));
+	clone->bi_opf	  = io->base_bio->bi_opf;
 }
 
 static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cef1f78031d4..65738b0aad36 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1031,8 +1031,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
 	} else {
-		scmd_printk(KERN_ERR, SCpnt, "Unknown command %llu,%llx\n",
-			    req_op(rq), (unsigned long long) rq->cmd_flags);
+		scmd_printk(KERN_ERR, SCpnt, "Unknown command %d\n", req_op(rq));
 		goto out;
 	}
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2b790bda7998..9a377079af26 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8427,7 +8427,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
 	if (!bio)
 		return -ENOMEM;
 
-	bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio));
+	bio->bi_opf = orig_bio->bi_opf;
 	bio->bi_private = dip;
 	bio->bi_end_io = btrfs_end_dio_bio;
 	btrfs_io_bio(bio)->logical = file_offset;
@@ -8465,8 +8465,7 @@ next_block:
 						  start_sector, GFP_NOFS);
 			if (!bio)
 				goto out_err;
-			bio_set_op_attrs(bio, bio_op(orig_bio),
-					 bio_flags(orig_bio));
+			bio->bi_opf = orig_bio->bi_opf;
 			bio->bi_private = dip;
 			bio->bi_end_io = btrfs_end_dio_bio;
 			btrfs_io_bio(bio)->logical = file_offset;
diff --git a/fs/buffer.c b/fs/buffer.c
index b205a629001d..a29335867e30 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3118,7 +3118,7 @@ EXPORT_SYMBOL(submit_bh);
 /**
  * ll_rw_block: low-level access to block devices (DEPRECATED)
  * @op: whether to %READ or %WRITE
- * @op_flags: rq_flag_bits
+ * @op_flags: req_flag_bits
  * @nr: number of &struct buffer_heads in the array
  * @bhs: array of pointers to &struct buffer_head
  *
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9e8de18a168a..2cf4f7f09e32 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -688,7 +688,7 @@ struct f2fs_io_info {
 	struct f2fs_sb_info *sbi;	/* f2fs_sb_info pointer */
 	enum page_type type;	/* contains DATA/NODE/META/META_FLUSH */
 	int op;			/* contains REQ_OP_ */
-	int op_flags;		/* rq_flag_bits */
+	int op_flags;		/* req_flag_bits */
 	block_t new_blkaddr;	/* new block address to be written */
 	block_t old_blkaddr;	/* old block address before Cow */
 	struct page *page;	/* page to be written */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 49d5a1b61b06..b1f9144b42c7 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -231,7 +231,7 @@ static void gfs2_end_log_write(struct bio *bio)
  * gfs2_log_flush_bio - Submit any pending log bio
  * @sdp: The superblock
  * @op: REQ_OP
- * @op_flags: rq_flag_bits
+ * @op_flags: req_flag_bits
  *
  * Submit any pending part-built or full bio to the block device. If
  * there is no pending bio, then this is a no-op.
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 3bf5d33800ab..ddaf28d0988f 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -581,15 +581,14 @@ static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
 /**
  * blkg_rwstat_add - add a value to a blkg_rwstat
  * @rwstat: target blkg_rwstat
- * @op: REQ_OP
- * @op_flags: rq_flag_bits
+ * @op: REQ_OP and flags
  * @val: value to add
  *
  * Add @val to @rwstat.  The counters are chosen according to @rw.  The
  * caller is responsible for synchronizing calls to this function.
  */
 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
-				   int op, int op_flags, uint64_t val)
+				   unsigned int op, uint64_t val)
 {
 	struct percpu_counter *cnt;
 
@@ -600,7 +599,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 
 	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
 
-	if (op_flags & REQ_SYNC)
+	if (op & REQ_SYNC)
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
@@ -705,9 +704,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 
 	if (!throtl) {
 		blkg = blkg ?: q->root_blkg;
-		blkg_rwstat_add(&blkg->stat_bytes, bio_op(bio), bio->bi_opf,
+		blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
 				bio->bi_iter.bi_size);
-		blkg_rwstat_add(&blkg->stat_ios, bio_op(bio), bio->bi_opf, 1);
+		blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 	}
 
 	rcu_read_unlock();
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index ec69a8fe3b29..dca972d67548 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -88,24 +88,6 @@ struct bio {
 	struct bio_vec		bi_inline_vecs[0];
 };
 
-#define BIO_OP_SHIFT	(8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS)
-#define bio_flags(bio)	((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1))
-#define bio_op(bio)	((bio)->bi_opf >> BIO_OP_SHIFT)
-
-#define bio_set_op_attrs(bio, op, op_flags) do {			\
-	if (__builtin_constant_p(op))					\
-		BUILD_BUG_ON((op) + 0U >= (1U << REQ_OP_BITS));		\
-	else								\
-		WARN_ON_ONCE((op) + 0U >= (1U << REQ_OP_BITS));		\
-	if (__builtin_constant_p(op_flags))				\
-		BUILD_BUG_ON((op_flags) + 0U >= (1U << BIO_OP_SHIFT));	\
-	else								\
-		WARN_ON_ONCE((op_flags) + 0U >= (1U << BIO_OP_SHIFT));	\
-	(bio)->bi_opf = bio_flags(bio);					\
-	(bio)->bi_opf |= (((op) + 0U) << BIO_OP_SHIFT);			\
-	(bio)->bi_opf |= (op_flags);					\
-} while (0)
-
 #define BIO_RESET_BYTES		offsetof(struct bio, bi_max_vecs)
 
 /*
@@ -147,26 +129,40 @@ struct bio {
 #endif /* CONFIG_BLOCK */
 
 /*
- * Request flags.  For use in the cmd_flags field of struct request, and in
- * bi_opf of struct bio.  Note that some flags are only valid in either one.
+ * Operations and flags common to the bio and request structures.
+ * We use 8 bits for encoding the operation, and the remaining 24 for flags.
  */
-enum rq_flag_bits {
-	/* common flags */
-	__REQ_FAILFAST_DEV,	/* no driver retries of device errors */
+#define REQ_OP_BITS	8
+#define REQ_OP_MASK	((1 << REQ_OP_BITS) - 1)
+#define REQ_FLAG_BITS	24
+
+enum req_opf {
+	REQ_OP_READ,
+	REQ_OP_WRITE,
+	REQ_OP_DISCARD,		/* request to discard sectors */
+	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
+	REQ_OP_WRITE_SAME,	/* write same block many times */
+	REQ_OP_FLUSH,		/* request for cache flush */
+	REQ_OP_ZONE_REPORT,	/* Get zone information */
+	REQ_OP_ZONE_RESET,	/* Reset a zone write pointer */
+
+	REQ_OP_LAST,
+};
+
+enum req_flag_bits {
+	__REQ_FAILFAST_DEV =	/* no driver retries of device errors */
+		REQ_OP_BITS,
 	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
-
 	__REQ_SYNC,		/* request is sync (sync write or read) */
 	__REQ_META,		/* metadata io request */
 	__REQ_PRIO,		/* boost priority in cfq */
-
 	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
-
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -176,37 +172,32 @@ enum rq_flag_bits {
 #define REQ_SYNC		(1ULL << __REQ_SYNC)
 #define REQ_META		(1ULL << __REQ_META)
 #define REQ_PRIO		(1ULL << __REQ_PRIO)
+#define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
 #define REQ_NOIDLE		(1ULL << __REQ_NOIDLE)
 #define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
+#define REQ_FUA			(1ULL << __REQ_FUA)
+#define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
+#define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
-#define REQ_COMMON_MASK \
-	(REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
-	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_RAHEAD)
-#define REQ_CLONE_MASK		REQ_COMMON_MASK
 
-/* This mask is used for both bio and request merge checking */
 #define REQ_NOMERGE_FLAGS \
 	(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 
-#define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
-#define REQ_FUA			(1ULL << __REQ_FUA)
-#define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
-#define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
+#define bio_op(bio) \
+	((bio)->bi_opf & REQ_OP_MASK)
+#define req_op(req) \
+	((req)->cmd_flags & REQ_OP_MASK)
 
-enum req_op {
-	REQ_OP_READ,
-	REQ_OP_WRITE,
-	REQ_OP_DISCARD,		/* request to discard sectors */
-	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
-	REQ_OP_WRITE_SAME,	/* write same block many times */
-	REQ_OP_FLUSH,		/* request for cache flush */
-	REQ_OP_ZONE_REPORT,	/* Get zone information */
-	REQ_OP_ZONE_RESET,	/* Reset a zone write pointer */
-};
+/* obsolete, don't use in new code */
+#define bio_set_op_attrs(bio, op, op_flags) \
+	((bio)->bi_opf |= (op | op_flags))
 
-#define REQ_OP_BITS 3
+static inline bool op_is_sync(unsigned int op)
+{
+	return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC);
+}
 
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE	-1U
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b4415feac679..8396da2bb698 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -142,7 +142,7 @@ struct request {
 
 	int cpu;
 	unsigned cmd_type;
-	u64 cmd_flags;
+	unsigned int cmd_flags;		/* op and common flags */
 	req_flags_t rq_flags;
 	unsigned long atomic_flags;
 
@@ -244,20 +244,6 @@ struct request {
 	struct request *next_rq;
 };
 
-#define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS)
-#define req_op(req)  ((req)->cmd_flags >> REQ_OP_SHIFT)
-
-#define req_set_op(req, op) do {				\
-	WARN_ON(op >= (1 << REQ_OP_BITS));			\
-	(req)->cmd_flags &= ((1ULL << REQ_OP_SHIFT) - 1);	\
-	(req)->cmd_flags |= ((u64) (op) << REQ_OP_SHIFT);	\
-} while (0)
-
-#define req_set_op_attrs(req, op, flags) do {	\
-	req_set_op(req, op);			\
-	(req)->cmd_flags |= flags;		\
-} while (0)
-
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
@@ -741,17 +727,9 @@ static inline unsigned int blk_queue_zone_size(struct request_queue *q)
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
 
-/*
- * We regard a request as sync, if either a read or a sync write
- */
-static inline bool rw_is_sync(int op, unsigned int rw_flags)
-{
-	return op == REQ_OP_READ || (rw_flags & REQ_SYNC);
-}
-
 static inline bool rq_is_sync(struct request *rq)
 {
-	return rw_is_sync(req_op(rq), rq->cmd_flags);
+	return op_is_sync(rq->cmd_flags);
 }
 
 static inline bool blk_rl_full(struct request_list *rl, bool sync)
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cceb72f9e29f..e417f080219a 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -118,7 +118,7 @@ static inline int blk_cmd_buf_len(struct request *rq)
 }
 
 extern void blk_dump_cmd(char *buf, struct request *rq);
-extern void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes);
+extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
 
 #endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
 
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index b91b023deffb..a52c6580cc9a 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -58,7 +58,7 @@ struct dm_io_notify {
 struct dm_io_client;
 struct dm_io_request {
 	int bi_op;			/* REQ_OP */
-	int bi_op_flags;		/* rq_flag_bits */
+	int bi_op_flags;		/* req_flag_bits */
 	struct dm_io_memory mem;	/* Memory to use for io */
 	struct dm_io_notify notify;	/* Synchronous if notify.fn is NULL */
 	struct dm_io_client *client;	/* Client memory handler */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index e7f358d2e5fc..f219c9aed360 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -30,7 +30,7 @@ typedef int (elevator_dispatch_fn) (struct request_queue *, int);
 typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
 typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_may_queue_fn) (struct request_queue *, int, int);
+typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int);
 
 typedef void (elevator_init_icq_fn) (struct io_cq *);
 typedef void (elevator_exit_icq_fn) (struct io_cq *);
@@ -139,7 +139,7 @@ extern struct request *elv_former_request(struct request_queue *, struct request
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 extern int elv_register_queue(struct request_queue *q);
 extern void elv_unregister_queue(struct request_queue *q);
-extern int elv_may_queue(struct request_queue *, int, int);
+extern int elv_may_queue(struct request_queue *, unsigned int);
 extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *q, struct request *rq,
 			   struct bio *bio, gfp_t gfp_mask);
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index d336b890e31f..df3e9ae5ad8d 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -27,8 +27,7 @@ DECLARE_EVENT_CLASS(bcache_request,
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->orig_sector	= bio->bi_iter.bi_sector - 16;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
@@ -102,8 +101,7 @@ DECLARE_EVENT_CLASS(bcache_bio,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d  %s %llu + %u",
@@ -138,8 +136,7 @@ TRACE_EVENT(bcache_read,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		__entry->cache_hit = hit;
 		__entry->bypass = bypass;
 	),
@@ -170,8 +167,7 @@ TRACE_EVENT(bcache_write,
 		__entry->inode		= inode;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		__entry->writeback = writeback;
 		__entry->bypass = bypass;
 	),
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 8f3a163b8166..3e02e3a25413 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -84,8 +84,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
 					0 : blk_rq_sectors(rq);
 		__entry->errors    = rq->errors;
 
-		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags,
-			      blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
 		blk_dump_cmd(__get_str(cmd), rq);
 	),
 
@@ -163,7 +162,7 @@ TRACE_EVENT(block_rq_complete,
 		__entry->nr_sector = nr_bytes >> 9;
 		__entry->errors    = rq->errors;
 
-		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags, nr_bytes);
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
 		blk_dump_cmd(__get_str(cmd), rq);
 	),
 
@@ -199,8 +198,7 @@ DECLARE_EVENT_CLASS(block_rq,
 		__entry->bytes     = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
 					blk_rq_bytes(rq) : 0;
 
-		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags,
-			      blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
 		blk_dump_cmd(__get_str(cmd), rq);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
@@ -274,8 +272,7 @@ TRACE_EVENT(block_bio_bounce,
 					  bio->bi_bdev->bd_dev : 0;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -313,8 +310,7 @@ TRACE_EVENT(block_bio_complete,
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->error		= error;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u [%d]",
@@ -341,8 +337,7 @@ DECLARE_EVENT_CLASS(block_bio_merge,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -409,8 +404,7 @@ TRACE_EVENT(block_bio_queue,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -438,7 +432,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
 		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
 		__entry->sector		= bio ? bio->bi_iter.bi_sector : 0;
 		__entry->nr_sector	= bio ? bio_sectors(bio) : 0;
-		blk_fill_rwbs(__entry->rwbs, bio ? bio_op(bio) : 0,
+		blk_fill_rwbs(__entry->rwbs,
 			      bio ? bio->bi_opf : 0, __entry->nr_sector);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
         ),
@@ -573,8 +567,7 @@ TRACE_EVENT(block_split,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->new_sector	= new_sector;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -617,8 +610,7 @@ TRACE_EVENT(block_bio_remap,
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->old_dev	= dev;
 		__entry->old_sector	= from;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
@@ -664,8 +656,7 @@ TRACE_EVENT(block_rq_remap,
 		__entry->old_dev	= dev;
 		__entry->old_sector	= from;
 		__entry->nr_bios	= blk_rq_count_bios(rq);
-		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags,
-			      blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
 	),
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u",
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index dbafc5df03f3..95cecbf67f5c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1777,14 +1777,14 @@ void blk_dump_cmd(char *buf, struct request *rq)
 	}
 }
 
-void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes)
+void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes)
 {
 	int i = 0;
 
-	if (rw & REQ_PREFLUSH)
+	if (op & REQ_PREFLUSH)
 		rwbs[i++] = 'F';
 
-	switch (op) {
+	switch (op & REQ_OP_MASK) {
 	case REQ_OP_WRITE:
 	case REQ_OP_WRITE_SAME:
 		rwbs[i++] = 'W';
@@ -1806,13 +1806,13 @@ void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes)
 		rwbs[i++] = 'N';
 	}
 
-	if (rw & REQ_FUA)
+	if (op & REQ_FUA)
 		rwbs[i++] = 'F';
-	if (rw & REQ_RAHEAD)
+	if (op & REQ_RAHEAD)
 		rwbs[i++] = 'A';
-	if (rw & REQ_SYNC)
+	if (op & REQ_SYNC)
 		rwbs[i++] = 'S';
-	if (rw & REQ_META)
+	if (op & REQ_META)
 		rwbs[i++] = 'M';
 
 	rwbs[i] = '\0';
-- 
cgit v1.2.3


From 87374179c535a98337569904727aa02f960fe79e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:15 +0200
Subject: block: add a proper block layer data direction encoding

Currently the block layer op_is_write, bio_data_dir and rq_data_dir
helper treat every operation that is not a READ as a data out operation.
This worked surprisingly long, but the new REQ_OP_ZONE_REPORT operation
actually adds a second operation that reads data from the device.
Surprisingly nothing critical relied on this direction, but this might
be a good opportunity to properly fix this issue up.

We take a little inspiration and use the least significant bit of the
operation number to encode the data direction, which just requires us
to renumber the operations to fix this scheme.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 38 ++++++++++++++++++++++++++++++--------
 include/linux/fs.h        |  5 -----
 2 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dca972d67548..3fa62cabe8d2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -131,20 +131,37 @@ struct bio {
 /*
  * Operations and flags common to the bio and request structures.
  * We use 8 bits for encoding the operation, and the remaining 24 for flags.
+ *
+ * The least significant bit of the operation number indicates the data
+ * transfer direction:
+ *
+ *   - if the least significant bit is set transfers are TO the device
+ *   - if the least significant bit is not set transfers are FROM the device
+ *
+ * If a operation does not transfer data the least significant bit has no
+ * meaning.
  */
 #define REQ_OP_BITS	8
 #define REQ_OP_MASK	((1 << REQ_OP_BITS) - 1)
 #define REQ_FLAG_BITS	24
 
 enum req_opf {
-	REQ_OP_READ,
-	REQ_OP_WRITE,
-	REQ_OP_DISCARD,		/* request to discard sectors */
-	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
-	REQ_OP_WRITE_SAME,	/* write same block many times */
-	REQ_OP_FLUSH,		/* request for cache flush */
-	REQ_OP_ZONE_REPORT,	/* Get zone information */
-	REQ_OP_ZONE_RESET,	/* Reset a zone write pointer */
+	/* read sectors from the device */
+	REQ_OP_READ		= 0,
+	/* write sectors to the device */
+	REQ_OP_WRITE		= 1,
+	/* flush the volatile write cache */
+	REQ_OP_FLUSH		= 2,
+	/* discard sectors */
+	REQ_OP_DISCARD		= 3,
+	/* get zone information */
+	REQ_OP_ZONE_REPORT	= 4,
+	/* securely erase sectors */
+	REQ_OP_SECURE_ERASE	= 5,
+	/* seset a zone write pointer */
+	REQ_OP_ZONE_RESET	= 6,
+	/* write the same sector many times */
+	REQ_OP_WRITE_SAME	= 7,
 
 	REQ_OP_LAST,
 };
@@ -194,6 +211,11 @@ enum req_flag_bits {
 #define bio_set_op_attrs(bio, op, op_flags) \
 	((bio)->bi_opf |= (op | op_flags))
 
+static inline bool op_is_write(unsigned int op)
+{
+	return (op & 1);
+}
+
 static inline bool op_is_sync(unsigned int op)
 {
 	return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 16d2b6e874d6..e3e878f12b25 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2499,11 +2499,6 @@ extern void make_bad_inode(struct inode *);
 extern bool is_bad_inode(struct inode *);
 
 #ifdef CONFIG_BLOCK
-static inline bool op_is_write(unsigned int op)
-{
-	return op == REQ_OP_READ ? false : true;
-}
-
 /*
  * return data direction, READ or WRITE
  */
-- 
cgit v1.2.3


From 42412c3aae5d8ea57a46b8ff86bb67bc1a270d9c Mon Sep 17 00:00:00 2001
From: Silvio Fricke <silvio.fricke@gmail.com>
Date: Fri, 28 Oct 2016 10:14:09 +0200
Subject: workqueue: kerneldocify workqueue_attrs

Only formating changes.

Signed-off-by: Silvio Fricke <silvio.fricke@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/workqueue.h | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index fc6e22186405..d4f16cf6281c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -119,18 +119,30 @@ struct delayed_work {
 	int cpu;
 };
 
-/*
- * A struct for workqueue attributes.  This can be used to change
- * attributes of an unbound workqueue.
+/**
+ * struct workqueue_attrs - A struct for workqueue attributes.
  *
- * Unlike other fields, ->no_numa isn't a property of a worker_pool.  It
- * only modifies how apply_workqueue_attrs() select pools and thus doesn't
- * participate in pool hash calculations or equality comparisons.
+ * This can be used to change attributes of an unbound workqueue.
  */
 struct workqueue_attrs {
-	int			nice;		/* nice level */
-	cpumask_var_t		cpumask;	/* allowed CPUs */
-	bool			no_numa;	/* disable NUMA affinity */
+	/**
+	 * @nice: nice level
+	 */
+	int nice;
+
+	/**
+	 * @cpumask: allowed CPUs
+	 */
+	cpumask_var_t cpumask;
+
+	/**
+	 * @no_numa: disable NUMA affinity
+	 *
+	 * Unlike other fields, ``no_numa`` isn't a property of a worker_pool. It
+	 * only modifies how :c:func:`apply_workqueue_attrs` select pools and thus
+	 * doesn't participate in pool hash calculations or equality comparisons.
+	 */
+	bool no_numa;
 };
 
 static inline struct delayed_work *to_delayed_work(struct work_struct *work)
@@ -272,7 +284,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 
 /*
  * Workqueue flags and constants.  For details, please refer to
- * Documentation/workqueue.txt.
+ * Documentation/core-api/workqueue.rst.
  */
 enum {
 	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
@@ -370,7 +382,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * @args...: args for @fmt
  *
  * Allocate a workqueue with the specified parameters.  For detailed
- * information on WQ_* flags, please refer to Documentation/workqueue.txt.
+ * information on WQ_* flags, please refer to
+ * Documentation/core-api/workqueue.rst.
  *
  * The __lock_name macro dance is to guarantee that single lock_class_key
  * doesn't end up with different namesm, which isn't allowed by lockdep.
-- 
cgit v1.2.3


From b917783c7b350518f8c5d88bb5848aa8064408a6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 26 Oct 2016 18:49:46 +0200
Subject: flow_dissector: __skb_get_hash_symmetric arg can be const

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    | 2 +-
 net/core/flow_dissector.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 601258f6e621..663fda2887f7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1086,7 +1086,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
 }
 
 void __skb_get_hash(struct sk_buff *skb);
-u32 __skb_get_hash_symmetric(struct sk_buff *skb);
+u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 		   const struct flow_keys *keys, int hlen);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1a7b80f73376..0cc607d05fc8 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -723,7 +723,7 @@ EXPORT_SYMBOL(make_flow_keys_digest);
 
 static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
 
-u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
 {
 	struct flow_keys keys;
 
-- 
cgit v1.2.3


From 5579e1519bad43b874922dbe87c74fdcbd97a7db Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Wed, 31 Aug 2016 05:17:54 +0000
Subject: net/mlx5: Update struct mlx5_ifc_xrqc_bits

Update struct mlx5_ifc_xrqc_bits according to last specification

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6045d4d58065..12f72e45a3f0 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2844,7 +2844,7 @@ struct mlx5_ifc_xrqc_bits {
 
 	struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
 
-	u8         reserved_at_180[0x200];
+	u8         reserved_at_180[0x880];
 
 	struct mlx5_ifc_wq_bits wq;
 };
-- 
cgit v1.2.3


From dd257efb1e0f8875ed7e42b88837a8dada0d0e41 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Wed, 31 Aug 2016 05:29:58 +0000
Subject: net/mlx5: Ensure SRQ physical address structure endianness

SRQ physical address structure field should be in big-endian format.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/srq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
index 33c97dc900f8..1cde0fd53f90 100644
--- a/include/linux/mlx5/srq.h
+++ b/include/linux/mlx5/srq.h
@@ -55,7 +55,7 @@ struct mlx5_srq_attr {
 	u32 lwm;
 	u32 user_index;
 	u64 db_record;
-	u64 *pas;
+	__be64 *pas;
 };
 
 struct mlx5_core_dev;
-- 
cgit v1.2.3


From 813f854053c26204e2723c498def4c7870dcc7f4 Mon Sep 17 00:00:00 2001
From: Mohamad Haj Yahia <mohamad@mellanox.com>
Date: Thu, 11 Aug 2016 11:21:39 +0300
Subject: net/mlx5: Introduce TSAR manipulation firmware commands

TSAR (stands for Transmit Scheduling ARbiter) is a hardware component
that is responsible for selecting the next entity to serve on the
transmit path.
The arbitration defines the QoS policy between the agents connected to
the TSAR.
The TSAR is a consist two main features:
1) BW Allocation between agents:
The TSAR implements a defecit weighted round robin between the agents.
Each agent attached to the TSAR is assigned with a weight and it is
awarded transmission tokens according to this weight.
2) Rate limer per agent:
Each agent attached to the TSAR is (optionally) assigned with a rate
limit.
TSAR will not allow scheduling for an agent exceeding its defined rate
limit.

In this patch we implement the API of manipulating the TSAR.

Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      |  13 +-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |   7 +
 drivers/net/ethernet/mellanox/mlx5/core/rl.c       |  65 +++++++
 include/linux/mlx5/mlx5_ifc.h                      | 199 ++++++++++++++++++++-
 4 files changed, 279 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1e639f886021..8561102f2563 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -318,6 +318,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
 	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -419,11 +421,14 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
-
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
 	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -580,6 +585,12 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
 	MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+	MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
+	MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
+	MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT);
+	MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT);
+	MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT);
+	MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 3d0cfb9f18f9..bf431715172c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -91,6 +91,13 @@ int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
 bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+				       void *context, u32 *element_id);
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+				       void *context, u32 element_id,
+				       u32 modify_bitmask);
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+					u32 element_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev);
 u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 104902a93a0b..e651e4c02867 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -36,6 +36,71 @@
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
+/* Scheduling element fw management */
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+				       void *ctx, u32 *element_id)
+{
+	u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
+	void *schedc;
+	int err;
+
+	schedc = MLX5_ADDR_OF(create_scheduling_element_in, in,
+			      scheduling_context);
+	MLX5_SET(create_scheduling_element_in, in, opcode,
+		 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT);
+	MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy,
+		 hierarchy);
+	memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	*element_id = MLX5_GET(create_scheduling_element_out, out,
+			       scheduling_element_id);
+	return 0;
+}
+
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+				       void *ctx, u32 element_id,
+				       u32 modify_bitmask)
+{
+	u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
+	void *schedc;
+
+	schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in,
+			      scheduling_context);
+	MLX5_SET(modify_scheduling_element_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT);
+	MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id,
+		 element_id);
+	MLX5_SET(modify_scheduling_element_in, in, modify_bitmask,
+		 modify_bitmask);
+	MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy,
+		 hierarchy);
+	memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+					u32 element_id)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
+
+	MLX5_SET(destroy_scheduling_element_in, in, opcode,
+		 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+	MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id,
+		 element_id);
+	MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy,
+		 hierarchy);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 /* Finds an entry where we can register the given rate
  * If the rate already exists, return the entry where it is registered,
  * otherwise return the first available entry.
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 12f72e45a3f0..2632cb2caf10 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -145,6 +145,12 @@ enum {
 	MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
 	MLX5_CMD_OP_SET_RATE_LIMIT                = 0x780,
 	MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
+	MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT      = 0x782,
+	MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT     = 0x783,
+	MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT       = 0x784,
+	MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT      = 0x785,
+	MLX5_CMD_OP_CREATE_QOS_PARA_VPORT         = 0x786,
+	MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT        = 0x787,
 	MLX5_CMD_OP_ALLOC_PD                      = 0x800,
 	MLX5_CMD_OP_DEALLOC_PD                    = 0x801,
 	MLX5_CMD_OP_ALLOC_UAR                     = 0x802,
@@ -537,13 +543,27 @@ struct mlx5_ifc_e_switch_cap_bits {
 
 struct mlx5_ifc_qos_cap_bits {
 	u8         packet_pacing[0x1];
-	u8         reserved_0[0x1f];
-	u8         reserved_1[0x20];
+	u8         esw_scheduling[0x1];
+	u8         reserved_at_2[0x1e];
+
+	u8         reserved_at_20[0x20];
+
 	u8         packet_pacing_max_rate[0x20];
+
 	u8         packet_pacing_min_rate[0x20];
-	u8         reserved_2[0x10];
+
+	u8         reserved_at_80[0x10];
 	u8         packet_pacing_rate_table_size[0x10];
-	u8         reserved_3[0x760];
+
+	u8         esw_element_type[0x10];
+	u8         esw_tsar_type[0x10];
+
+	u8         reserved_at_c0[0x10];
+	u8         max_qos_para_vport[0x10];
+
+	u8         max_tsar_bw_share[0x20];
+
+	u8         reserved_at_100[0x700];
 };
 
 struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
@@ -2333,6 +2353,30 @@ struct mlx5_ifc_sqc_bits {
 	struct mlx5_ifc_wq_bits wq;
 };
 
+enum {
+	SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR = 0x0,
+	SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1,
+	SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2,
+	SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
+};
+
+struct mlx5_ifc_scheduling_context_bits {
+	u8         element_type[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         element_attributes[0x20];
+
+	u8         parent_element_id[0x20];
+
+	u8         reserved_at_60[0x40];
+
+	u8         bw_share[0x20];
+
+	u8         max_average_bw[0x20];
+
+	u8         reserved_at_e0[0x120];
+};
+
 struct mlx5_ifc_rqtc_bits {
 	u8         reserved_at_0[0xa0];
 
@@ -2920,6 +2964,29 @@ struct mlx5_ifc_register_loopback_control_bits {
 	u8         reserved_at_20[0x60];
 };
 
+struct mlx5_ifc_vport_tc_element_bits {
+	u8         traffic_class[0x4];
+	u8         reserved_at_4[0xc];
+	u8         vport_number[0x10];
+};
+
+struct mlx5_ifc_vport_element_bits {
+	u8         reserved_at_0[0x10];
+	u8         vport_number[0x10];
+};
+
+enum {
+	TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0,
+	TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1,
+	TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2,
+};
+
+struct mlx5_ifc_tsar_element_bits {
+	u8         reserved_at_0[0x8];
+	u8         tsar_type[0x8];
+	u8         reserved_at_10[0x10];
+};
+
 struct mlx5_ifc_teardown_hca_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -3540,6 +3607,39 @@ struct mlx5_ifc_query_special_contexts_in_bits {
 	u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_query_scheduling_element_out_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0xc0];
+
+	struct mlx5_ifc_scheduling_context_bits scheduling_context;
+
+	u8         reserved_at_300[0x100];
+};
+
+enum {
+	SCHEDULING_HIERARCHY_E_SWITCH = 0x2,
+};
+
+struct mlx5_ifc_query_scheduling_element_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         scheduling_hierarchy[0x8];
+	u8         reserved_at_48[0x18];
+
+	u8         scheduling_element_id[0x20];
+
+	u8         reserved_at_80[0x180];
+};
+
 struct mlx5_ifc_query_rqt_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -4725,6 +4825,43 @@ struct mlx5_ifc_modify_sq_in_bits {
 	struct mlx5_ifc_sqc_bits ctx;
 };
 
+struct mlx5_ifc_modify_scheduling_element_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x1c0];
+};
+
+enum {
+	MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE = 0x1,
+	MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW = 0x2,
+};
+
+struct mlx5_ifc_modify_scheduling_element_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         scheduling_hierarchy[0x8];
+	u8         reserved_at_48[0x18];
+
+	u8         scheduling_element_id[0x20];
+
+	u8         reserved_at_80[0x20];
+
+	u8         modify_bitmask[0x20];
+
+	u8         reserved_at_c0[0x40];
+
+	struct mlx5_ifc_scheduling_context_bits scheduling_context;
+
+	u8         reserved_at_300[0x100];
+};
+
 struct mlx5_ifc_modify_rqt_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -5390,6 +5527,30 @@ struct mlx5_ifc_destroy_sq_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_destroy_scheduling_element_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x1c0];
+};
+
+struct mlx5_ifc_destroy_scheduling_element_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         scheduling_hierarchy[0x8];
+	u8         reserved_at_48[0x18];
+
+	u8         scheduling_element_id[0x20];
+
+	u8         reserved_at_80[0x180];
+};
+
 struct mlx5_ifc_destroy_rqt_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -6017,6 +6178,36 @@ struct mlx5_ifc_create_sq_in_bits {
 	struct mlx5_ifc_sqc_bits ctx;
 };
 
+struct mlx5_ifc_create_scheduling_element_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	u8         scheduling_element_id[0x20];
+
+	u8         reserved_at_a0[0x160];
+};
+
+struct mlx5_ifc_create_scheduling_element_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         scheduling_hierarchy[0x8];
+	u8         reserved_at_48[0x18];
+
+	u8         reserved_at_60[0xa0];
+
+	struct mlx5_ifc_scheduling_context_bits scheduling_context;
+
+	u8         reserved_at_300[0x100];
+};
+
 struct mlx5_ifc_create_rqt_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
-- 
cgit v1.2.3


From 74491de937125d0c98c9b9c9208b4105717a3caa Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Wed, 31 Aug 2016 11:24:25 +0000
Subject: net/mlx5: Add multi dest support

Currently when calling mlx5_add_flow_rule we accept
only one flow destination, this commit allows to pass
multiple destinations.

This change forces us to change the return structure to a more
flexible one. We introduce a flow handle (struct mlx5_flow_handle),
it holds internally the number for rules created and holds an array
where each cell points the to a flow rule.

From the consumers (of mlx5_add_flow_rule) point of view this
change is only cosmetic and requires only to change the type
of the returned value they store.

From the core point of view, we now need to use a loop when
allocating and deleting rules (e.g given to us a flow handler).

Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/main.c                  |  14 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h               |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  14 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  |  38 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c    |  49 ++--
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c    |  19 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  32 +--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  68 ++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  22 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  42 +--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 289 ++++++++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |   5 +
 include/linux/mlx5/fs.h                            |  28 +-
 14 files changed, 374 insertions(+), 254 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d02341eebddb..8e0dbd51944e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1771,13 +1771,13 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 	mutex_lock(&dev->flow_db.lock);
 
 	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
-		mlx5_del_flow_rule(iter->rule);
+		mlx5_del_flow_rules(iter->rule);
 		put_flow_table(dev, iter->prio, true);
 		list_del(&iter->list);
 		kfree(iter);
 	}
 
-	mlx5_del_flow_rule(handler->rule);
+	mlx5_del_flow_rules(handler->rule);
 	put_flow_table(dev, handler->prio, true);
 	mutex_unlock(&dev->flow_db.lock);
 
@@ -1907,10 +1907,10 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
 	action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
 		MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
-	handler->rule = mlx5_add_flow_rule(ft, spec,
+	handler->rule = mlx5_add_flow_rules(ft, spec,
 					   action,
 					   MLX5_FS_DEFAULT_FLOW_TAG,
-					   dst);
+					   dst, 1);
 
 	if (IS_ERR(handler->rule)) {
 		err = PTR_ERR(handler->rule);
@@ -1941,7 +1941,7 @@ static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *de
 		handler_dst = create_flow_rule(dev, ft_prio,
 					       flow_attr, dst);
 		if (IS_ERR(handler_dst)) {
-			mlx5_del_flow_rule(handler->rule);
+			mlx5_del_flow_rules(handler->rule);
 			ft_prio->refcount--;
 			kfree(handler);
 			handler = handler_dst;
@@ -2004,7 +2004,7 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de
 						 &leftovers_specs[LEFTOVERS_UC].flow_attr,
 						 dst);
 		if (IS_ERR(handler_ucast)) {
-			mlx5_del_flow_rule(handler->rule);
+			mlx5_del_flow_rules(handler->rule);
 			ft_prio->refcount--;
 			kfree(handler);
 			handler = handler_ucast;
@@ -2046,7 +2046,7 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
 	return handler_rx;
 
 err_tx:
-	mlx5_del_flow_rule(handler_rx->rule);
+	mlx5_del_flow_rules(handler_rx->rule);
 	ft_rx->refcount--;
 	kfree(handler_rx);
 err:
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index dcdcd195fe53..d5d007740159 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -153,7 +153,7 @@ struct mlx5_ib_flow_handler {
 	struct list_head		list;
 	struct ib_flow			ibflow;
 	struct mlx5_ib_flow_prio	*prio;
-	struct mlx5_flow_rule	*rule;
+	struct mlx5_flow_handle		*rule;
 };
 
 struct mlx5_ib_flow_db {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 460363b66cb1..47ee8ffe987f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -520,7 +520,7 @@ struct mlx5e_vxlan_db {
 
 struct mlx5e_l2_rule {
 	u8  addr[ETH_ALEN + 2];
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 };
 
 struct mlx5e_flow_table {
@@ -541,10 +541,10 @@ struct mlx5e_tc_table {
 struct mlx5e_vlan_table {
 	struct mlx5e_flow_table		ft;
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-	struct mlx5_flow_rule	*active_vlans_rule[VLAN_N_VID];
-	struct mlx5_flow_rule	*untagged_rule;
-	struct mlx5_flow_rule	*any_vlan_rule;
-	bool          filter_disabled;
+	struct mlx5_flow_handle	*active_vlans_rule[VLAN_N_VID];
+	struct mlx5_flow_handle	*untagged_rule;
+	struct mlx5_flow_handle	*any_vlan_rule;
+	bool		filter_disabled;
 };
 
 struct mlx5e_l2_table {
@@ -562,14 +562,14 @@ struct mlx5e_l2_table {
 /* L3/L4 traffic type classifier */
 struct mlx5e_ttc_table {
 	struct mlx5e_flow_table  ft;
-	struct mlx5_flow_rule	 *rules[MLX5E_NUM_TT];
+	struct mlx5_flow_handle	 *rules[MLX5E_NUM_TT];
 };
 
 #define ARFS_HASH_SHIFT BITS_PER_BYTE
 #define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
 struct arfs_table {
 	struct mlx5e_flow_table  ft;
-	struct mlx5_flow_rule    *default_rule;
+	struct mlx5_flow_handle	 *default_rule;
 	struct hlist_head	 rules_hash[ARFS_HASH_SIZE];
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index a8cb38789774..8ff22e83e1dd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -56,7 +56,7 @@ struct arfs_tuple {
 struct arfs_rule {
 	struct mlx5e_priv	*priv;
 	struct work_struct      arfs_work;
-	struct mlx5_flow_rule   *rule;
+	struct mlx5_flow_handle *rule;
 	struct hlist_node	hlist;
 	int			rxq;
 	/* Flow ID passed to ndo_rx_flow_steer */
@@ -104,7 +104,7 @@ static int arfs_disable(struct mlx5e_priv *priv)
 		tt = arfs_get_tt(i);
 		/* Modify ttc rules destination to bypass the aRFS tables*/
 		err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
-						   &dest);
+						   &dest, NULL);
 		if (err) {
 			netdev_err(priv->netdev,
 				   "%s: modify ttc destination failed\n",
@@ -137,7 +137,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
 		tt = arfs_get_tt(i);
 		/* Modify ttc rules destination to point on the aRFS FTs */
 		err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
-						   &dest);
+						   &dest, NULL);
 		if (err) {
 			netdev_err(priv->netdev,
 				   "%s: modify ttc destination failed err=%d\n",
@@ -151,7 +151,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
 
 static void arfs_destroy_table(struct arfs_table *arfs_t)
 {
-	mlx5_del_flow_rule(arfs_t->default_rule);
+	mlx5_del_flow_rules(arfs_t->default_rule);
 	mlx5e_destroy_flow_table(&arfs_t->ft);
 }
 
@@ -205,10 +205,10 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec,
-						  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-						  MLX5_FS_DEFAULT_FLOW_TAG,
-						  &dest);
+	arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec,
+						   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+						   MLX5_FS_DEFAULT_FLOW_TAG,
+						   &dest, 1);
 	if (IS_ERR(arfs_t->default_rule)) {
 		err = PTR_ERR(arfs_t->default_rule);
 		arfs_t->default_rule = NULL;
@@ -396,7 +396,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
 	spin_unlock_bh(&priv->fs.arfs.arfs_lock);
 	hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
 		if (arfs_rule->rule)
-			mlx5_del_flow_rule(arfs_rule->rule);
+			mlx5_del_flow_rules(arfs_rule->rule);
 		hlist_del(&arfs_rule->hlist);
 		kfree(arfs_rule);
 	}
@@ -420,7 +420,7 @@ static void arfs_del_rules(struct mlx5e_priv *priv)
 	hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
 		cancel_work_sync(&rule->arfs_work);
 		if (rule->rule)
-			mlx5_del_flow_rule(rule->rule);
+			mlx5_del_flow_rules(rule->rule);
 		hlist_del(&rule->hlist);
 		kfree(rule);
 	}
@@ -462,12 +462,12 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
 	return NULL;
 }
 
-static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
-					    struct arfs_rule *arfs_rule)
+static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
+					      struct arfs_rule *arfs_rule)
 {
 	struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
 	struct arfs_tuple *tuple = &arfs_rule->tuple;
-	struct mlx5_flow_rule *rule = NULL;
+	struct mlx5_flow_handle *rule = NULL;
 	struct mlx5_flow_destination dest;
 	struct arfs_table *arfs_table;
 	struct mlx5_flow_spec *spec;
@@ -544,9 +544,9 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
 	}
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
-	rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				  MLX5_FS_DEFAULT_FLOW_TAG,
-				  &dest);
+	rule = mlx5_add_flow_rules(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				   MLX5_FS_DEFAULT_FLOW_TAG,
+				   &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
@@ -559,14 +559,14 @@ out:
 }
 
 static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
-				struct mlx5_flow_rule *rule, u16 rxq)
+				struct mlx5_flow_handle *rule, u16 rxq)
 {
 	struct mlx5_flow_destination dst;
 	int err = 0;
 
 	dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dst.tir_num = priv->direct_tir[rxq].tirn;
-	err =  mlx5_modify_rule_destination(rule, &dst);
+	err =  mlx5_modify_rule_destination(rule, &dst, NULL);
 	if (err)
 		netdev_warn(priv->netdev,
 			    "Failed to modfiy aRFS rule destination to rq=%d\n", rxq);
@@ -578,7 +578,7 @@ static void arfs_handle_work(struct work_struct *work)
 						   struct arfs_rule,
 						   arfs_work);
 	struct mlx5e_priv *priv = arfs_rule->priv;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 
 	mutex_lock(&priv->state_lock);
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 36fbc6b21a33..bed544d47ba1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -160,7 +160,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 {
 	struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
 	struct mlx5_flow_destination dest;
-	struct mlx5_flow_rule **rule_p;
+	struct mlx5_flow_handle **rule_p;
 	int err = 0;
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
@@ -187,10 +187,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 		break;
 	}
 
-	*rule_p = mlx5_add_flow_rule(ft, spec,
-				     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				     MLX5_FS_DEFAULT_FLOW_TAG,
-				     &dest);
+	*rule_p = mlx5_add_flow_rules(ft, spec,
+				      MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				      MLX5_FS_DEFAULT_FLOW_TAG,
+				      &dest, 1);
 
 	if (IS_ERR(*rule_p)) {
 		err = PTR_ERR(*rule_p);
@@ -229,20 +229,20 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
 	switch (rule_type) {
 	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
 		if (priv->fs.vlan.untagged_rule) {
-			mlx5_del_flow_rule(priv->fs.vlan.untagged_rule);
+			mlx5_del_flow_rules(priv->fs.vlan.untagged_rule);
 			priv->fs.vlan.untagged_rule = NULL;
 		}
 		break;
 	case MLX5E_VLAN_RULE_TYPE_ANY_VID:
 		if (priv->fs.vlan.any_vlan_rule) {
-			mlx5_del_flow_rule(priv->fs.vlan.any_vlan_rule);
+			mlx5_del_flow_rules(priv->fs.vlan.any_vlan_rule);
 			priv->fs.vlan.any_vlan_rule = NULL;
 		}
 		break;
 	case MLX5E_VLAN_RULE_TYPE_MATCH_VID:
 		mlx5e_vport_context_update_vlans(priv);
 		if (priv->fs.vlan.active_vlans_rule[vid]) {
-			mlx5_del_flow_rule(priv->fs.vlan.active_vlans_rule[vid]);
+			mlx5_del_flow_rules(priv->fs.vlan.active_vlans_rule[vid]);
 			priv->fs.vlan.active_vlans_rule[vid] = NULL;
 		}
 		mlx5e_vport_context_update_vlans(priv);
@@ -560,7 +560,7 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
 
 	for (i = 0; i < MLX5E_NUM_TT; i++) {
 		if (!IS_ERR_OR_NULL(ttc->rules[i])) {
-			mlx5_del_flow_rule(ttc->rules[i]);
+			mlx5_del_flow_rules(ttc->rules[i]);
 			ttc->rules[i] = NULL;
 		}
 	}
@@ -616,13 +616,14 @@ static struct {
 	},
 };
 
-static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
-						      struct mlx5_flow_table *ft,
-						      struct mlx5_flow_destination *dest,
-						      u16 etype,
-						      u8 proto)
+static struct mlx5_flow_handle *
+mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
+			struct mlx5_flow_table *ft,
+			struct mlx5_flow_destination *dest,
+			u16 etype,
+			u8 proto)
 {
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
@@ -643,10 +644,10 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
 	}
 
-	rule = mlx5_add_flow_rule(ft, spec,
-				  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				  MLX5_FS_DEFAULT_FLOW_TAG,
-				  dest);
+	rule = mlx5_add_flow_rules(ft, spec,
+				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				   MLX5_FS_DEFAULT_FLOW_TAG,
+				   dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
@@ -660,7 +661,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv)
 {
 	struct mlx5_flow_destination dest;
 	struct mlx5e_ttc_table *ttc;
-	struct mlx5_flow_rule **rules;
+	struct mlx5_flow_handle **rules;
 	struct mlx5_flow_table *ft;
 	int tt;
 	int err;
@@ -801,7 +802,7 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
 				   struct mlx5e_l2_rule *ai)
 {
 	if (!IS_ERR_OR_NULL(ai->rule)) {
-		mlx5_del_flow_rule(ai->rule);
+		mlx5_del_flow_rules(ai->rule);
 		ai->rule = NULL;
 	}
 }
@@ -847,9 +848,9 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 		break;
 	}
 
-	ai->rule = mlx5_add_flow_rule(ft, spec,
-				      MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				      MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+	ai->rule = mlx5_add_flow_rules(ft, spec,
+				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				       MLX5_FS_DEFAULT_FLOW_TAG, &dest, 1);
 	if (IS_ERR(ai->rule)) {
 		netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n",
 			   __func__, mv_dmac);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index d17c24227900..cf52c06377f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -36,7 +36,7 @@
 struct mlx5e_ethtool_rule {
 	struct list_head             list;
 	struct ethtool_rx_flow_spec  flow_spec;
-	struct mlx5_flow_rule        *rule;
+	struct mlx5_flow_handle	     *rule;
 	struct mlx5e_ethtool_table   *eth_ft;
 };
 
@@ -284,13 +284,14 @@ static bool outer_header_zero(u32 *match_criteria)
 						  size - 1);
 }
 
-static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
-						    struct mlx5_flow_table *ft,
-						    struct ethtool_rx_flow_spec *fs)
+static struct mlx5_flow_handle *
+add_ethtool_flow_rule(struct mlx5e_priv *priv,
+		      struct mlx5_flow_table *ft,
+		      struct ethtool_rx_flow_spec *fs)
 {
 	struct mlx5_flow_destination *dst = NULL;
 	struct mlx5_flow_spec *spec;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 	int err = 0;
 	u32 action;
 
@@ -317,8 +318,8 @@ static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
 	}
 
 	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
-	rule = mlx5_add_flow_rule(ft, spec, action,
-				  MLX5_FS_DEFAULT_FLOW_TAG, dst);
+	rule = mlx5_add_flow_rules(ft, spec, action,
+				   MLX5_FS_DEFAULT_FLOW_TAG, dst, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
@@ -335,7 +336,7 @@ static void del_ethtool_rule(struct mlx5e_priv *priv,
 			     struct mlx5e_ethtool_rule *eth_rule)
 {
 	if (eth_rule->rule)
-		mlx5_del_flow_rule(eth_rule->rule);
+		mlx5_del_flow_rules(eth_rule->rule);
 	list_del(&eth_rule->list);
 	priv->fs.ethtool.tot_num_rules--;
 	put_flow_table(eth_rule->eth_ft);
@@ -475,7 +476,7 @@ int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
 {
 	struct mlx5e_ethtool_table *eth_ft;
 	struct mlx5e_ethtool_rule *eth_rule;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 	int num_tuples;
 	int err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 3c97da103d30..88d3fd132d63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -328,7 +328,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_eswitch_rep *rep = priv->ppriv;
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_handle *flow_rule;
 	int err;
 	int i;
 
@@ -360,7 +360,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 	return 0;
 
 err_del_flow_rule:
-	mlx5_del_flow_rule(rep->vport_rx_rule);
+	mlx5_del_flow_rules(rep->vport_rx_rule);
 err_destroy_direct_tirs:
 	mlx5e_destroy_direct_tirs(priv);
 err_destroy_direct_rqts:
@@ -375,7 +375,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 	int i;
 
 	mlx5e_tc_cleanup(priv);
-	mlx5_del_flow_rule(rep->vport_rx_rule);
+	mlx5_del_flow_rules(rep->vport_rx_rule);
 	mlx5e_destroy_direct_tirs(priv);
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index ce8c54d18906..5d9ac0dbf3bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -47,21 +47,22 @@
 struct mlx5e_tc_flow {
 	struct rhash_head	node;
 	u64			cookie;
-	struct mlx5_flow_rule	*rule;
+	struct mlx5_flow_handle *rule;
 	struct mlx5_esw_flow_attr *attr;
 };
 
 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 
-static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
-						    struct mlx5_flow_spec *spec,
-						    u32 action, u32 flow_tag)
+static struct mlx5_flow_handle *
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+		      struct mlx5_flow_spec *spec,
+		      u32 action, u32 flow_tag)
 {
 	struct mlx5_core_dev *dev = priv->mdev;
 	struct mlx5_flow_destination dest = { 0 };
 	struct mlx5_fc *counter = NULL;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 	bool table_created = false;
 
 	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
@@ -94,9 +95,9 @@ static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	}
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	rule = mlx5_add_flow_rule(priv->fs.tc.t, spec,
-				  action, flow_tag,
-				  &dest);
+	rule = mlx5_add_flow_rules(priv->fs.tc.t, spec,
+				   action, flow_tag,
+				   &dest, 1);
 
 	if (IS_ERR(rule))
 		goto err_add_rule;
@@ -114,9 +115,10 @@ err_create_ft:
 	return rule;
 }
 
-static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
-						    struct mlx5_flow_spec *spec,
-						    struct mlx5_esw_flow_attr *attr)
+static struct mlx5_flow_handle *
+mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+		      struct mlx5_flow_spec *spec,
+		      struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	int err;
@@ -129,7 +131,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 }
 
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
-			      struct mlx5_flow_rule *rule,
+			      struct mlx5_flow_handle *rule,
 			      struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -140,7 +142,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 	if (esw && esw->mode == SRIOV_OFFLOADS)
 		mlx5_eswitch_del_vlan_action(esw, attr);
 
-	mlx5_del_flow_rule(rule);
+	mlx5_del_flow_rules(rule);
 
 	mlx5_fc_destroy(priv->mdev, counter);
 
@@ -450,7 +452,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	u32 flow_tag, action;
 	struct mlx5e_tc_flow *flow;
 	struct mlx5_flow_spec *spec;
-	struct mlx5_flow_rule *old = NULL;
+	struct mlx5_flow_handle *old = NULL;
 	struct mlx5_esw_flow_attr *old_attr = NULL;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
@@ -511,7 +513,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	goto out;
 
 err_del_rule:
-	mlx5_del_flow_rule(flow->rule);
+	mlx5_del_flow_rules(flow->rule);
 
 err_free:
 	if (!old)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 9ef01d1bea06..fcd8b15f6625 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -56,7 +56,7 @@ struct esw_uc_addr {
 /* E-Switch MC FDB table hash node */
 struct esw_mc_addr { /* SRIOV only */
 	struct l2addr_node     node;
-	struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */
+	struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */
 	u32                    refcnt;
 };
 
@@ -65,7 +65,7 @@ struct vport_addr {
 	struct l2addr_node     node;
 	u8                     action;
 	u32                    vport;
-	struct mlx5_flow_rule *flow_rule; /* SRIOV only */
+	struct mlx5_flow_handle *flow_rule; /* SRIOV only */
 	/* A flag indicating that mac was added due to mc promiscuous vport */
 	bool mc_promisc;
 };
@@ -237,13 +237,13 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index)
 }
 
 /* E-Switch FDB */
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 			 u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
 {
 	int match_header = (is_zero_ether_addr(mac_c) ? 0 :
 			    MLX5_MATCH_OUTER_HEADERS);
-	struct mlx5_flow_rule *flow_rule = NULL;
+	struct mlx5_flow_handle *flow_rule = NULL;
 	struct mlx5_flow_destination dest;
 	struct mlx5_flow_spec *spec;
 	void *mv_misc = NULL;
@@ -286,9 +286,9 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 		  dmac_v, dmac_c, vport);
 	spec->match_criteria_enable = match_header;
 	flow_rule =
-		mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				   0, &dest);
+		mlx5_add_flow_rules(esw->fdb_table.fdb, spec,
+				    MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				    0, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		esw_warn(esw->dev,
 			 "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
@@ -300,7 +300,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 	return flow_rule;
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
 {
 	u8 mac_c[ETH_ALEN];
@@ -309,7 +309,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
 	return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac);
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
 {
 	u8 mac_c[ETH_ALEN];
@@ -322,7 +322,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
 	return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v);
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
 {
 	u8 mac_c[ETH_ALEN];
@@ -515,7 +515,7 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	del_l2_table_entry(esw->dev, esw_uc->table_index);
 
 	if (vaddr->flow_rule)
-		mlx5_del_flow_rule(vaddr->flow_rule);
+		mlx5_del_flow_rules(vaddr->flow_rule);
 	vaddr->flow_rule = NULL;
 
 	l2addr_hash_del(esw_uc);
@@ -562,7 +562,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
 		case MLX5_ACTION_DEL:
 			if (!iter_vaddr)
 				continue;
-			mlx5_del_flow_rule(iter_vaddr->flow_rule);
+			mlx5_del_flow_rules(iter_vaddr->flow_rule);
 			l2addr_hash_del(iter_vaddr);
 			break;
 		}
@@ -632,7 +632,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 		  esw_mc->uplink_rule);
 
 	if (vaddr->flow_rule)
-		mlx5_del_flow_rule(vaddr->flow_rule);
+		mlx5_del_flow_rules(vaddr->flow_rule);
 	vaddr->flow_rule = NULL;
 
 	/* If the multicast mac is added as a result of mc promiscuous vport,
@@ -645,7 +645,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	update_allmulti_vports(esw, vaddr, esw_mc);
 
 	if (esw_mc->uplink_rule)
-		mlx5_del_flow_rule(esw_mc->uplink_rule);
+		mlx5_del_flow_rules(esw_mc->uplink_rule);
 
 	l2addr_hash_del(esw_mc);
 	return 0;
@@ -828,14 +828,14 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
 								UPLINK_VPORT);
 		allmulti_addr->refcnt++;
 	} else if (vport->allmulti_rule) {
-		mlx5_del_flow_rule(vport->allmulti_rule);
+		mlx5_del_flow_rules(vport->allmulti_rule);
 		vport->allmulti_rule = NULL;
 
 		if (--allmulti_addr->refcnt > 0)
 			goto promisc;
 
 		if (allmulti_addr->uplink_rule)
-			mlx5_del_flow_rule(allmulti_addr->uplink_rule);
+			mlx5_del_flow_rules(allmulti_addr->uplink_rule);
 		allmulti_addr->uplink_rule = NULL;
 	}
 
@@ -847,7 +847,7 @@ promisc:
 		vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw,
 								     vport_num);
 	} else if (vport->promisc_rule) {
-		mlx5_del_flow_rule(vport->promisc_rule);
+		mlx5_del_flow_rules(vport->promisc_rule);
 		vport->promisc_rule = NULL;
 	}
 }
@@ -1015,10 +1015,10 @@ static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
 					   struct mlx5_vport *vport)
 {
 	if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan))
-		mlx5_del_flow_rule(vport->egress.allowed_vlan);
+		mlx5_del_flow_rules(vport->egress.allowed_vlan);
 
 	if (!IS_ERR_OR_NULL(vport->egress.drop_rule))
-		mlx5_del_flow_rule(vport->egress.drop_rule);
+		mlx5_del_flow_rules(vport->egress.drop_rule);
 
 	vport->egress.allowed_vlan = NULL;
 	vport->egress.drop_rule = NULL;
@@ -1173,10 +1173,10 @@ static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
 					    struct mlx5_vport *vport)
 {
 	if (!IS_ERR_OR_NULL(vport->ingress.drop_rule))
-		mlx5_del_flow_rule(vport->ingress.drop_rule);
+		mlx5_del_flow_rules(vport->ingress.drop_rule);
 
 	if (!IS_ERR_OR_NULL(vport->ingress.allow_rule))
-		mlx5_del_flow_rule(vport->ingress.allow_rule);
+		mlx5_del_flow_rules(vport->ingress.allow_rule);
 
 	vport->ingress.drop_rule = NULL;
 	vport->ingress.allow_rule = NULL;
@@ -1253,9 +1253,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 	vport->ingress.allow_rule =
-		mlx5_add_flow_rule(vport->ingress.acl, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
-				   0, NULL);
+		mlx5_add_flow_rules(vport->ingress.acl, spec,
+				    MLX5_FLOW_CONTEXT_ACTION_ALLOW,
+				    0, NULL, 0);
 	if (IS_ERR(vport->ingress.allow_rule)) {
 		err = PTR_ERR(vport->ingress.allow_rule);
 		esw_warn(esw->dev,
@@ -1267,9 +1267,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 
 	memset(spec, 0, sizeof(*spec));
 	vport->ingress.drop_rule =
-		mlx5_add_flow_rule(vport->ingress.acl, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_DROP,
-				   0, NULL);
+		mlx5_add_flow_rules(vport->ingress.acl, spec,
+				    MLX5_FLOW_CONTEXT_ACTION_DROP,
+				    0, NULL, 0);
 	if (IS_ERR(vport->ingress.drop_rule)) {
 		err = PTR_ERR(vport->ingress.drop_rule);
 		esw_warn(esw->dev,
@@ -1321,9 +1321,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 	vport->egress.allowed_vlan =
-		mlx5_add_flow_rule(vport->egress.acl, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
-				   0, NULL);
+		mlx5_add_flow_rules(vport->egress.acl, spec,
+				    MLX5_FLOW_CONTEXT_ACTION_ALLOW,
+				    0, NULL, 0);
 	if (IS_ERR(vport->egress.allowed_vlan)) {
 		err = PTR_ERR(vport->egress.allowed_vlan);
 		esw_warn(esw->dev,
@@ -1336,9 +1336,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	/* Drop others rule (star rule) */
 	memset(spec, 0, sizeof(*spec));
 	vport->egress.drop_rule =
-		mlx5_add_flow_rule(vport->egress.acl, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_DROP,
-				   0, NULL);
+		mlx5_add_flow_rules(vport->egress.acl, spec,
+				    MLX5_FLOW_CONTEXT_ACTION_DROP,
+				    0, NULL, 0);
 	if (IS_ERR(vport->egress.drop_rule)) {
 		err = PTR_ERR(vport->egress.drop_rule);
 		esw_warn(esw->dev,
@@ -1667,7 +1667,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 		esw_disable_vport(esw, i);
 
 	if (mc_promisc && mc_promisc->uplink_rule)
-		mlx5_del_flow_rule(mc_promisc->uplink_rule);
+		mlx5_del_flow_rules(mc_promisc->uplink_rule);
 
 	esw_destroy_tsar(esw);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ddae90c1f15b..6d414cb1b75f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -97,16 +97,16 @@ struct vport_ingress {
 	struct mlx5_flow_group *allow_spoofchk_only_grp;
 	struct mlx5_flow_group *allow_untagged_only_grp;
 	struct mlx5_flow_group *drop_grp;
-	struct mlx5_flow_rule  *allow_rule;
-	struct mlx5_flow_rule  *drop_rule;
+	struct mlx5_flow_handle  *allow_rule;
+	struct mlx5_flow_handle  *drop_rule;
 };
 
 struct vport_egress {
 	struct mlx5_flow_table *acl;
 	struct mlx5_flow_group *allowed_vlans_grp;
 	struct mlx5_flow_group *drop_grp;
-	struct mlx5_flow_rule  *allowed_vlan;
-	struct mlx5_flow_rule  *drop_rule;
+	struct mlx5_flow_handle  *allowed_vlan;
+	struct mlx5_flow_handle  *drop_rule;
 };
 
 struct mlx5_vport_info {
@@ -125,8 +125,8 @@ struct mlx5_vport {
 	int                     vport;
 	struct hlist_head       uc_list[MLX5_L2_ADDR_HASH_SIZE];
 	struct hlist_head       mc_list[MLX5_L2_ADDR_HASH_SIZE];
-	struct mlx5_flow_rule   *promisc_rule;
-	struct mlx5_flow_rule   *allmulti_rule;
+	struct mlx5_flow_handle *promisc_rule;
+	struct mlx5_flow_handle *allmulti_rule;
 	struct work_struct      vport_change_handler;
 
 	struct vport_ingress    ingress;
@@ -162,7 +162,7 @@ struct mlx5_eswitch_fdb {
 			struct mlx5_flow_table *fdb;
 			struct mlx5_flow_group *send_to_vport_grp;
 			struct mlx5_flow_group *miss_grp;
-			struct mlx5_flow_rule  *miss_rule;
+			struct mlx5_flow_handle *miss_rule;
 			int vlan_push_pop_refcount;
 		} offloads;
 	};
@@ -175,7 +175,7 @@ enum {
 };
 
 struct mlx5_esw_sq {
-	struct mlx5_flow_rule	*send_to_vport_rule;
+	struct mlx5_flow_handle	*send_to_vport_rule;
 	struct list_head	 list;
 };
 
@@ -188,7 +188,7 @@ struct mlx5_eswitch_rep {
 	u8		       hw_id[ETH_ALEN];
 	void		      *priv_data;
 
-	struct mlx5_flow_rule *vport_rx_rule;
+	struct mlx5_flow_handle *vport_rx_rule;
 	struct list_head       vport_sqs_list;
 	u16		       vlan;
 	u32		       vlan_refcount;
@@ -257,11 +257,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 struct mlx5_flow_spec;
 struct mlx5_esw_flow_attr;
 
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
 				struct mlx5_esw_flow_attr *attr);
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index c55ad8d00c05..8b2a3832cd0a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -43,14 +43,14 @@ enum {
 	FDB_SLOW_PATH
 };
 
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
 				struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_flow_destination dest = { 0 };
 	struct mlx5_fc *counter = NULL;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 	void *misc;
 	int action;
 
@@ -80,8 +80,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
 				      MLX5_MATCH_MISC_PARAMETERS;
 
-	rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb,
-				  spec, action, 0, &dest);
+	rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb,
+				   spec, action, 0, &dest, 1);
 
 	if (IS_ERR(rule))
 		mlx5_fc_destroy(esw->dev, counter);
@@ -269,11 +269,11 @@ out:
 	return err;
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
 {
 	struct mlx5_flow_destination dest;
-	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_spec *spec;
 	void *misc;
 
@@ -296,9 +296,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = vport;
 
-	flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
-				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				       0, &dest);
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					0, &dest, 1);
 	if (IS_ERR(flow_rule))
 		esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
 out:
@@ -315,7 +315,7 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
 		return;
 
 	list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) {
-		mlx5_del_flow_rule(esw_sq->send_to_vport_rule);
+		mlx5_del_flow_rules(esw_sq->send_to_vport_rule);
 		list_del(&esw_sq->list);
 		kfree(esw_sq);
 	}
@@ -325,7 +325,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
 				 struct mlx5_eswitch_rep *rep,
 				 u16 *sqns_array, int sqns_num)
 {
-	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_esw_sq *esw_sq;
 	int err;
 	int i;
@@ -362,7 +362,7 @@ out_err:
 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 {
 	struct mlx5_flow_destination dest;
-	struct mlx5_flow_rule *flow_rule = NULL;
+	struct mlx5_flow_handle *flow_rule = NULL;
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
@@ -376,9 +376,9 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = 0;
 
-	flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
-				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				       0, &dest);
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					0, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		err = PTR_ERR(flow_rule);
 		esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
@@ -501,7 +501,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
 		return;
 
 	esw_debug(esw->dev, "Destroy offloads FDB Table\n");
-	mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule);
+	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
@@ -585,11 +585,11 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
 	mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
 }
 
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
 {
 	struct mlx5_flow_destination dest;
-	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_spec *spec;
 	void *misc;
 
@@ -610,9 +610,9 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dest.tir_num = tirn;
 
-	flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec,
-				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				       0, &dest);
+	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					0, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
 		goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 43d7052c76fc..6732287a98c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -155,6 +155,9 @@ static void del_flow_group(struct fs_node *node);
 static void del_fte(struct fs_node *node);
 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 				struct mlx5_flow_destination *d2);
+static struct mlx5_flow_rule *
+find_flow_rule(struct fs_fte *fte,
+	       struct mlx5_flow_destination *dest);
 
 static void tree_init_node(struct fs_node *node,
 			   unsigned int refcount,
@@ -640,8 +643,8 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	return err;
 }
 
-int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
-				 struct mlx5_flow_destination *dest)
+static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
+					 struct mlx5_flow_destination *dest)
 {
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
@@ -666,6 +669,28 @@ int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 	return err;
 }
 
+int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
+				 struct mlx5_flow_destination *new_dest,
+				 struct mlx5_flow_destination *old_dest)
+{
+	int i;
+
+	if (!old_dest) {
+		if (handle->num_rules != 1)
+			return -EINVAL;
+		return _mlx5_modify_rule_destination(handle->rule[0],
+						     new_dest);
+	}
+
+	for (i = 0; i < handle->num_rules; i++) {
+		if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
+			return _mlx5_modify_rule_destination(handle->rule[i],
+							     new_dest);
+	}
+
+	return -EINVAL;
+}
+
 /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft  */
 static int connect_fwd_rules(struct mlx5_core_dev *dev,
 			     struct mlx5_flow_table *new_next_ft,
@@ -688,7 +713,7 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
 	list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
 	mutex_unlock(&old_next_ft->lock);
 	list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
-		err = mlx5_modify_rule_destination(iter, &dest);
+		err = _mlx5_modify_rule_destination(iter, &dest);
 		if (err)
 			pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
 			       new_next_ft->id);
@@ -917,41 +942,117 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
 	return rule;
 }
 
-/* fte should not be deleted while calling this function */
-static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
-					   struct mlx5_flow_group *fg,
-					   struct mlx5_flow_destination *dest,
-					   bool update_action)
+static struct mlx5_flow_handle *alloc_handle(int num_rules)
 {
+	struct mlx5_flow_handle *handle;
+
+	handle = kzalloc(sizeof(*handle) + sizeof(handle->rule[0]) *
+			  num_rules, GFP_KERNEL);
+	if (!handle)
+		return NULL;
+
+	handle->num_rules = num_rules;
+
+	return handle;
+}
+
+static void destroy_flow_handle(struct fs_fte *fte,
+				struct mlx5_flow_handle *handle,
+				struct mlx5_flow_destination *dest,
+				int i)
+{
+	for (; --i >= 0;) {
+		if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) {
+			fte->dests_size--;
+			list_del(&handle->rule[i]->node.list);
+			kfree(handle->rule[i]);
+		}
+	}
+	kfree(handle);
+}
+
+static struct mlx5_flow_handle *
+create_flow_handle(struct fs_fte *fte,
+		   struct mlx5_flow_destination *dest,
+		   int dest_num,
+		   int *modify_mask,
+		   bool *new_rule)
+{
+	struct mlx5_flow_handle *handle;
+	struct mlx5_flow_rule *rule = NULL;
+	static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+	static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+	int type;
+	int i = 0;
+
+	handle = alloc_handle((dest_num) ? dest_num : 1);
+	if (!handle)
+		return ERR_PTR(-ENOMEM);
+
+	do {
+		if (dest) {
+			rule = find_flow_rule(fte, dest + i);
+			if (rule) {
+				atomic_inc(&rule->node.refcount);
+				goto rule_found;
+			}
+		}
+
+		*new_rule = true;
+		rule = alloc_rule(dest + i);
+		if (!rule)
+			goto free_rules;
+
+		/* Add dest to dests list- we need flow tables to be in the
+		 * end of the list for forward to next prio rules.
+		 */
+		tree_init_node(&rule->node, 1, del_rule);
+		if (dest &&
+		    dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+			list_add(&rule->node.list, &fte->node.children);
+		else
+			list_add_tail(&rule->node.list, &fte->node.children);
+		if (dest) {
+			fte->dests_size++;
+
+			type = dest[i].type ==
+				MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+			*modify_mask |= type ? count : dst;
+		}
+rule_found:
+		handle->rule[i] = rule;
+	} while (++i < dest_num);
+
+	return handle;
+
+free_rules:
+	destroy_flow_handle(fte, handle, dest, i);
+	return ERR_PTR(-ENOMEM);
+}
+
+/* fte should not be deleted while calling this function */
+static struct mlx5_flow_handle *
+add_rule_fte(struct fs_fte *fte,
+	     struct mlx5_flow_group *fg,
+	     struct mlx5_flow_destination *dest,
+	     int dest_num,
+	     bool update_action)
+{
+	struct mlx5_flow_handle *handle;
 	struct mlx5_flow_table *ft;
-	struct mlx5_flow_rule *rule;
 	int modify_mask = 0;
 	int err;
+	bool new_rule = false;
 
-	rule = alloc_rule(dest);
-	if (!rule)
-		return ERR_PTR(-ENOMEM);
+	handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
+				    &new_rule);
+	if (IS_ERR(handle) || !new_rule)
+		goto out;
 
 	if (update_action)
 		modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
 
 	fs_get_obj(ft, fg->node.parent);
-	/* Add dest to dests list- we need flow tables to be in the
-	 * end of the list for forward to next prio rules.
-	 */
-	tree_init_node(&rule->node, 1, del_rule);
-	if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
-		list_add(&rule->node.list, &fte->node.children);
-	else
-		list_add_tail(&rule->node.list, &fte->node.children);
-	if (dest) {
-		fte->dests_size++;
-
-		modify_mask |= dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ?
-			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS) :
-			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
-	}
-
 	if (!(fte->status & FS_FTE_STATUS_EXISTING))
 		err = mlx5_cmd_create_fte(get_dev(&ft->node),
 					  ft, fg->id, fte);
@@ -959,17 +1060,15 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
 		err = mlx5_cmd_update_fte(get_dev(&ft->node),
 					  ft, fg->id, modify_mask, fte);
 	if (err)
-		goto free_rule;
+		goto free_handle;
 
 	fte->status |= FS_FTE_STATUS_EXISTING;
 
-	return rule;
+out:
+	return handle;
 
-free_rule:
-	list_del(&rule->node.list);
-	kfree(rule);
-	if (dest)
-		fte->dests_size--;
+free_handle:
+	destroy_flow_handle(fte, handle, dest, handle->num_rules);
 	return ERR_PTR(err);
 }
 
@@ -1098,16 +1197,18 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
 	return NULL;
 }
 
-static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
-					  u32 *match_value,
-					  u8 action,
-					  u32 flow_tag,
-					  struct mlx5_flow_destination *dest)
+static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
+					    u32 *match_value,
+					    u8 action,
+					    u32 flow_tag,
+					    struct mlx5_flow_destination *dest,
+					    int dest_num)
 {
-	struct fs_fte *fte;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *handle;
 	struct mlx5_flow_table *ft;
 	struct list_head *prev;
+	struct fs_fte *fte;
+	int i;
 
 	nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT);
 	fs_for_each_fte(fte, fg) {
@@ -1116,40 +1217,33 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
 		    (action & fte->action) && flow_tag == fte->flow_tag) {
 			int old_action = fte->action;
 
-			rule = find_flow_rule(fte, dest);
-			if (rule) {
-				atomic_inc(&rule->node.refcount);
-				unlock_ref_node(&fte->node);
-				unlock_ref_node(&fg->node);
-				return rule;
-			}
 			fte->action |= action;
-			rule = add_rule_fte(fte, fg, dest,
-					    old_action != action);
-			if (IS_ERR(rule)) {
+			handle = add_rule_fte(fte, fg, dest, dest_num,
+					      old_action != action);
+			if (IS_ERR(handle)) {
 				fte->action = old_action;
 				goto unlock_fte;
 			} else {
-				goto add_rule;
+				goto add_rules;
 			}
 		}
 		unlock_ref_node(&fte->node);
 	}
 	fs_get_obj(ft, fg->node.parent);
 	if (fg->num_ftes >= fg->max_ftes) {
-		rule = ERR_PTR(-ENOSPC);
+		handle = ERR_PTR(-ENOSPC);
 		goto unlock_fg;
 	}
 
 	fte = create_fte(fg, match_value, action, flow_tag, &prev);
 	if (IS_ERR(fte)) {
-		rule = (void *)fte;
+		handle = (void *)fte;
 		goto unlock_fg;
 	}
 	tree_init_node(&fte->node, 0, del_fte);
 	nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
-	rule = add_rule_fte(fte, fg, dest, false);
-	if (IS_ERR(rule)) {
+	handle = add_rule_fte(fte, fg, dest, dest_num, false);
+	if (IS_ERR(handle)) {
 		kfree(fte);
 		goto unlock_fg;
 	}
@@ -1158,21 +1252,24 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
 
 	tree_add_node(&fte->node, &fg->node);
 	list_add(&fte->node.list, prev);
-add_rule:
-	tree_add_node(&rule->node, &fte->node);
+add_rules:
+	for (i = 0; i < handle->num_rules; i++) {
+		if (atomic_read(&handle->rule[i]->node.refcount) == 1)
+			tree_add_node(&handle->rule[i]->node, &fte->node);
+	}
 unlock_fte:
 	unlock_ref_node(&fte->node);
 unlock_fg:
 	unlock_ref_node(&fg->node);
-	return rule;
+	return handle;
 }
 
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule)
+struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
 {
 	struct mlx5_flow_rule *dst;
 	struct fs_fte *fte;
 
-	fs_get_obj(fte, rule->node.parent);
+	fs_get_obj(fte, handle->rule[0]->node.parent);
 
 	fs_for_each_dst(dst, fte) {
 		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
@@ -1211,18 +1308,22 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 	return true;
 }
 
-static struct mlx5_flow_rule *
-_mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   struct mlx5_flow_spec *spec,
-		    u32 action,
-		    u32 flow_tag,
-		    struct mlx5_flow_destination *dest)
+static struct mlx5_flow_handle *
+_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+		     struct mlx5_flow_spec *spec,
+		     u32 action,
+		     u32 flow_tag,
+		     struct mlx5_flow_destination *dest,
+		     int dest_num)
 {
 	struct mlx5_flow_group *g;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
+	int i;
 
-	if (!dest_is_valid(dest, action, ft))
-		return ERR_PTR(-EINVAL);
+	for (i = 0; i < dest_num; i++) {
+		if (!dest_is_valid(&dest[i], action, ft))
+			return ERR_PTR(-EINVAL);
+	}
 
 	nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
 	fs_for_each_fg(g, ft)
@@ -1231,7 +1332,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 					   g->mask.match_criteria,
 					   spec->match_criteria)) {
 			rule = add_rule_fg(g, spec->match_value,
-					   action, flow_tag, dest);
+					   action, flow_tag, dest, dest_num);
 			if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
 				goto unlock;
 		}
@@ -1244,7 +1345,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 	}
 
 	rule = add_rule_fg(g, spec->match_value,
-			   action, flow_tag, dest);
+			   action, flow_tag, dest, dest_num);
 	if (IS_ERR(rule)) {
 		/* Remove assumes refcount > 0 and autogroup creates a group
 		 * with a refcount = 0.
@@ -1265,17 +1366,18 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
 		(MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
 }
 
-struct mlx5_flow_rule *
-mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   struct mlx5_flow_spec *spec,
-		   u32 action,
-		   u32 flow_tag,
-		   struct mlx5_flow_destination *dest)
+struct mlx5_flow_handle *
+mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+		    struct mlx5_flow_spec *spec,
+		    u32 action,
+		    u32 flow_tag,
+		    struct mlx5_flow_destination *dest,
+		    int dest_num)
 {
 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	struct mlx5_flow_destination gen_dest;
 	struct mlx5_flow_table *next_ft = NULL;
-	struct mlx5_flow_rule *rule = NULL;
+	struct mlx5_flow_handle *handle = NULL;
 	u32 sw_action = action;
 	struct fs_prio *prio;
 
@@ -1291,6 +1393,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 			gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 			gen_dest.ft = next_ft;
 			dest = &gen_dest;
+			dest_num = 1;
 			action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 		} else {
 			mutex_unlock(&root->chain_lock);
@@ -1298,27 +1401,33 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 		}
 	}
 
-	rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest);
+	handle = _mlx5_add_flow_rules(ft, spec, action, flow_tag, dest,
+				      dest_num);
 
 	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
-		if (!IS_ERR_OR_NULL(rule) &&
-		    (list_empty(&rule->next_ft))) {
+		if (!IS_ERR_OR_NULL(handle) &&
+		    (list_empty(&handle->rule[0]->next_ft))) {
 			mutex_lock(&next_ft->lock);
-			list_add(&rule->next_ft, &next_ft->fwd_rules);
+			list_add(&handle->rule[0]->next_ft,
+				 &next_ft->fwd_rules);
 			mutex_unlock(&next_ft->lock);
-			rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+			handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 		}
 		mutex_unlock(&root->chain_lock);
 	}
-	return rule;
+	return handle;
 }
-EXPORT_SYMBOL(mlx5_add_flow_rule);
+EXPORT_SYMBOL(mlx5_add_flow_rules);
 
-void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
+void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
 {
-	tree_remove_node(&rule->node);
+	int i;
+
+	for (i = handle->num_rules - 1; i >= 0; i--)
+		tree_remove_node(&handle->rule[i]->node);
+	kfree(handle);
 }
-EXPORT_SYMBOL(mlx5_del_flow_rule);
+EXPORT_SYMBOL(mlx5_del_flow_rules);
 
 /* Assuming prio->node.children(flow tables) is sorted by level */
 static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 71ff03bceabb..d5150888645c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -94,6 +94,11 @@ struct mlx5_flow_rule {
 	u32					sw_action;
 };
 
+struct mlx5_flow_handle {
+	int num_rules;
+	struct mlx5_flow_rule *rule[];
+};
+
 /* Type of children is mlx5_flow_group */
 struct mlx5_flow_table {
 	struct fs_node			node;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 93ebc5e21334..0dcd287f4bd0 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -69,8 +69,8 @@ enum mlx5_flow_namespace_type {
 
 struct mlx5_flow_table;
 struct mlx5_flow_group;
-struct mlx5_flow_rule;
 struct mlx5_flow_namespace;
+struct mlx5_flow_handle;
 
 struct mlx5_flow_spec {
 	u8   match_criteria_enable;
@@ -127,18 +127,20 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
 /* Single destination per rule.
  * Group ID is implied by the match criteria.
  */
-struct mlx5_flow_rule *
-mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   struct mlx5_flow_spec *spec,
-		   u32 action,
-		   u32 flow_tag,
-		   struct mlx5_flow_destination *dest);
-void mlx5_del_flow_rule(struct mlx5_flow_rule *fr);
-
-int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
-				 struct mlx5_flow_destination *dest);
-
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule);
+struct mlx5_flow_handle *
+mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+		    struct mlx5_flow_spec *spec,
+		    u32 action,
+		    u32 flow_tag,
+		    struct mlx5_flow_destination *dest,
+		    int dest_num);
+void mlx5_del_flow_rules(struct mlx5_flow_handle *fr);
+
+int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
+				 struct mlx5_flow_destination *new_dest,
+				 struct mlx5_flow_destination *old_dest);
+
+struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler);
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
-- 
cgit v1.2.3


From e02737d5b82640497637d18428e2793bb7f02881 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Fri, 28 Oct 2016 15:04:46 -0700
Subject: x86/intel_rdt: Add tasks files

The root directory all subdirectories are automatically populated with a
read/write (mode 0644) file named "tasks". When read it will show all the
task IDs assigned to the resource group. Tasks can be added (one at a time)
to a group by writing the task ID to the file.  E.g.

Membership in a resource group is indicated by a new field in the
task_struct "int closid" which holds the CLOSID for each task. The default
resource group uses CLOSID=0 which means that all existing tasks when the
resctrl file system is mounted belong to the default group.

If a group is removed, tasks which are members of that group are moved to
the default group.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Cc: "Ravi V Shankar" <ravi.v.shankar@intel.com>
Cc: "Tony Luck" <tony.luck@intel.com>
Cc: "Shaohua Li" <shli@fb.com>
Cc: "Sai Prakhya" <sai.praneeth.prakhya@intel.com>
Cc: "Peter Zijlstra" <peterz@infradead.org>
Cc: "Stephane Eranian" <eranian@google.com>
Cc: "Dave Hansen" <dave.hansen@intel.com>
Cc: "David Carrillo-Cisneros" <davidcc@google.com>
Cc: "Nilay Vaish" <nilayvaish@gmail.com>
Cc: "Vikas Shivappa" <vikas.shivappa@linux.intel.com>
Cc: "Ingo Molnar" <mingo@elte.hu>
Cc: "Borislav Petkov" <bp@suse.de>
Cc: "H. Peter Anvin" <h.peter.anvin@intel.com>
Link: http://lkml.kernel.org/r/1477692289-37412-8-git-send-email-fenghua.yu@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 180 +++++++++++++++++++++++++++++++
 include/linux/sched.h                    |   3 +
 2 files changed, 183 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index e05a18685fc8..5cc0865f2908 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -28,6 +28,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/task_work.h>
 
 #include <uapi/linux/magic.h>
 
@@ -267,6 +268,162 @@ unlock:
 	return ret ?: nbytes;
 }
 
+struct task_move_callback {
+	struct callback_head	work;
+	struct rdtgroup		*rdtgrp;
+};
+
+static void move_myself(struct callback_head *head)
+{
+	struct task_move_callback *callback;
+	struct rdtgroup *rdtgrp;
+
+	callback = container_of(head, struct task_move_callback, work);
+	rdtgrp = callback->rdtgrp;
+
+	/*
+	 * If resource group was deleted before this task work callback
+	 * was invoked, then assign the task to root group and free the
+	 * resource group.
+	 */
+	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+	    (rdtgrp->flags & RDT_DELETED)) {
+		current->closid = 0;
+		kfree(rdtgrp);
+	}
+
+	kfree(callback);
+}
+
+static int __rdtgroup_move_task(struct task_struct *tsk,
+				struct rdtgroup *rdtgrp)
+{
+	struct task_move_callback *callback;
+	int ret;
+
+	callback = kzalloc(sizeof(*callback), GFP_KERNEL);
+	if (!callback)
+		return -ENOMEM;
+	callback->work.func = move_myself;
+	callback->rdtgrp = rdtgrp;
+
+	/*
+	 * Take a refcount, so rdtgrp cannot be freed before the
+	 * callback has been invoked.
+	 */
+	atomic_inc(&rdtgrp->waitcount);
+	ret = task_work_add(tsk, &callback->work, true);
+	if (ret) {
+		/*
+		 * Task is exiting. Drop the refcount and free the callback.
+		 * No need to check the refcount as the group cannot be
+		 * deleted before the write function unlocks rdtgroup_mutex.
+		 */
+		atomic_dec(&rdtgrp->waitcount);
+		kfree(callback);
+	} else {
+		tsk->closid = rdtgrp->closid;
+	}
+	return ret;
+}
+
+static int rdtgroup_task_write_permission(struct task_struct *task,
+					  struct kernfs_open_file *of)
+{
+	const struct cred *tcred = get_task_cred(task);
+	const struct cred *cred = current_cred();
+	int ret = 0;
+
+	/*
+	 * Even if we're attaching all tasks in the thread group, we only
+	 * need to check permissions on one of them.
+	 */
+	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+	    !uid_eq(cred->euid, tcred->uid) &&
+	    !uid_eq(cred->euid, tcred->suid))
+		ret = -EPERM;
+
+	put_cred(tcred);
+	return ret;
+}
+
+static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
+			      struct kernfs_open_file *of)
+{
+	struct task_struct *tsk;
+	int ret;
+
+	rcu_read_lock();
+	if (pid) {
+		tsk = find_task_by_vpid(pid);
+		if (!tsk) {
+			rcu_read_unlock();
+			return -ESRCH;
+		}
+	} else {
+		tsk = current;
+	}
+
+	get_task_struct(tsk);
+	rcu_read_unlock();
+
+	ret = rdtgroup_task_write_permission(tsk, of);
+	if (!ret)
+		ret = __rdtgroup_move_task(tsk, rdtgrp);
+
+	put_task_struct(tsk);
+	return ret;
+}
+
+static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
+{
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+	pid_t pid;
+
+	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
+		return -EINVAL;
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+	if (rdtgrp)
+		ret = rdtgroup_move_task(pid, rdtgrp, of);
+	else
+		ret = -ENOENT;
+
+	rdtgroup_kn_unlock(of->kn);
+
+	return ret ?: nbytes;
+}
+
+static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
+{
+	struct task_struct *p, *t;
+
+	rcu_read_lock();
+	for_each_process_thread(p, t) {
+		if (t->closid == r->closid)
+			seq_printf(s, "%d\n", t->pid);
+	}
+	rcu_read_unlock();
+}
+
+static int rdtgroup_tasks_show(struct kernfs_open_file *of,
+			       struct seq_file *s, void *v)
+{
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (rdtgrp)
+		show_rdt_tasks(rdtgrp, s);
+	else
+		ret = -ENOENT;
+	rdtgroup_kn_unlock(of->kn);
+
+	return ret;
+}
+
 /* Files in each rdtgroup */
 static struct rftype rdtgroup_base_files[] = {
 	{
@@ -276,6 +433,13 @@ static struct rftype rdtgroup_base_files[] = {
 		.write		= rdtgroup_cpus_write,
 		.seq_show	= rdtgroup_cpus_show,
 	},
+	{
+		.name		= "tasks",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_tasks_write,
+		.seq_show	= rdtgroup_tasks_show,
+	},
 };
 
 static int rdt_num_closids_show(struct kernfs_open_file *of,
@@ -592,6 +756,13 @@ static void rdt_reset_pqr_assoc_closid(void *v)
 static void rmdir_all_sub(void)
 {
 	struct rdtgroup *rdtgrp, *tmp;
+	struct task_struct *p, *t;
+
+	/* move all tasks to default resource group */
+	read_lock(&tasklist_lock);
+	for_each_process_thread(p, t)
+		t->closid = 0;
+	read_unlock(&tasklist_lock);
 
 	get_cpu();
 	/* Reset PQR_ASSOC MSR on this cpu. */
@@ -712,6 +883,7 @@ out_unlock:
 
 static int rdtgroup_rmdir(struct kernfs_node *kn)
 {
+	struct task_struct *p, *t;
 	struct rdtgroup *rdtgrp;
 	int cpu, ret = 0;
 
@@ -721,6 +893,14 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
 		return -ENOENT;
 	}
 
+	/* Give any tasks back to the default group */
+	read_lock(&tasklist_lock);
+	for_each_process_thread(p, t) {
+		if (t->closid == rdtgrp->closid)
+			t->closid = 0;
+	}
+	read_unlock(&tasklist_lock);
+
 	/* Give any CPUs back to the default group */
 	cpumask_or(&rdtgroup_default.cpu_mask,
 		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b0ec92..c8f4152e7265 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1791,6 +1791,9 @@ struct task_struct {
 	/* cg_list protected by css_set_lock and tsk->alloc_lock */
 	struct list_head cg_list;
 #endif
+#ifdef CONFIG_INTEL_RDT_A
+	int closid;
+#endif
 #ifdef CONFIG_FUTEX
 	struct robust_list_head __user *robust_list;
 #ifdef CONFIG_COMPAT
-- 
cgit v1.2.3


From 766763dbdc1dca11deabdb00077a1c19e2803f0a Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Thu, 27 Oct 2016 17:32:36 +0200
Subject: ARM: davinci: da8xx: Remove duplicated defines

Some macro for DA8xx CFGCHIP are defined in usb-davinci.h,
but da8xx-cfgchip.h intend to replace them.
Remove duplicated defines between da8xx-cfgchip.h and usb-davinci.h

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/board-da830-evm.c     |  5 +++--
 arch/arm/mach-davinci/board-omapl138-hawk.c |  3 ++-
 include/linux/platform_data/usb-davinci.h   | 23 -----------------------
 3 files changed, 5 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 3d8cf8cbd98a..df1f4091a8ae 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -18,6 +18,7 @@
 #include <linux/i2c.h>
 #include <linux/i2c/pcf857x.h>
 #include <linux/platform_data/at24.h>
+#include <linux/mfd/da8xx-cfgchip.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/spi/spi.h>
@@ -116,7 +117,7 @@ static __init void da830_evm_usb_init(void)
 	cfgchip2 = __raw_readl(DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP2_REG));
 
 	/* USB2.0 PHY reference clock is 24 MHz */
-	cfgchip2 &= ~CFGCHIP2_REFFREQ;
+	cfgchip2 &= ~CFGCHIP2_REFFREQ_MASK;
 	cfgchip2 |=  CFGCHIP2_REFFREQ_24MHZ;
 
 	/*
@@ -133,7 +134,7 @@ static __init void da830_evm_usb_init(void)
 	 * controller won't be able to drive VBUS thinking that it's a B-device.
 	 * Otherwise, we want to use the OTG mode and enable VBUS comparators.
 	 */
-	cfgchip2 &= ~CFGCHIP2_OTGMODE;
+	cfgchip2 &= ~CFGCHIP2_OTGMODE_MASK;
 #ifdef	CONFIG_USB_MUSB_HOST
 	cfgchip2 |=  CFGCHIP2_FORCE_HOST;
 #else
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c
index ee624861ca66..e1efa1066c1e 100644
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/gpio.h>
+#include <linux/mfd/da8xx-cfgchip.h>
 #include <linux/platform_data/gpio-davinci.h>
 
 #include <asm/mach-types.h>
@@ -254,7 +255,7 @@ static __init void omapl138_hawk_usb_init(void)
 	/* Setup the Ref. clock frequency for the HAWK at 24 MHz. */
 
 	cfgchip2 = __raw_readl(DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP2_REG));
-	cfgchip2 &= ~CFGCHIP2_REFFREQ;
+	cfgchip2 &= ~CFGCHIP2_REFFREQ_MASK;
 	cfgchip2 |=  CFGCHIP2_REFFREQ_24MHZ;
 	__raw_writel(cfgchip2, DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP2_REG));
 
diff --git a/include/linux/platform_data/usb-davinci.h b/include/linux/platform_data/usb-davinci.h
index e0bc4abe69c2..0926e99f2e8f 100644
--- a/include/linux/platform_data/usb-davinci.h
+++ b/include/linux/platform_data/usb-davinci.h
@@ -11,29 +11,6 @@
 #ifndef __ASM_ARCH_USB_H
 #define __ASM_ARCH_USB_H
 
-/* DA8xx CFGCHIP2 (USB 2.0 PHY Control) register bits */
-#define CFGCHIP2_PHYCLKGD	(1 << 17)
-#define CFGCHIP2_VBUSSENSE	(1 << 16)
-#define CFGCHIP2_RESET		(1 << 15)
-#define CFGCHIP2_OTGMODE	(3 << 13)
-#define CFGCHIP2_NO_OVERRIDE	(0 << 13)
-#define CFGCHIP2_FORCE_HOST	(1 << 13)
-#define CFGCHIP2_FORCE_DEVICE 	(2 << 13)
-#define CFGCHIP2_FORCE_HOST_VBUS_LOW (3 << 13)
-#define CFGCHIP2_USB1PHYCLKMUX	(1 << 12)
-#define CFGCHIP2_USB2PHYCLKMUX	(1 << 11)
-#define CFGCHIP2_PHYPWRDN	(1 << 10)
-#define CFGCHIP2_OTGPWRDN	(1 << 9)
-#define CFGCHIP2_DATPOL 	(1 << 8)
-#define CFGCHIP2_USB1SUSPENDM	(1 << 7)
-#define CFGCHIP2_PHY_PLLON	(1 << 6)	/* override PLL suspend */
-#define CFGCHIP2_SESENDEN	(1 << 5)	/* Vsess_end comparator */
-#define CFGCHIP2_VBDTCTEN	(1 << 4)	/* Vbus comparator */
-#define CFGCHIP2_REFFREQ	(0xf << 0)
-#define CFGCHIP2_REFFREQ_12MHZ	(1 << 0)
-#define CFGCHIP2_REFFREQ_24MHZ	(2 << 0)
-#define CFGCHIP2_REFFREQ_48MHZ	(3 << 0)
-
 struct	da8xx_ohci_root_hub;
 
 typedef void (*da8xx_ocic_handler_t)(struct da8xx_ohci_root_hub *hub,
-- 
cgit v1.2.3


From c62cce2caee558e18aa05c01c2fd3b40f07174f2 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Mon, 24 Oct 2016 18:29:13 -0700
Subject: net: add an ioctl to get a socket network namespace

Each socket operates in a network namespace where it has been created,
so if we want to dump and restore a socket, we have to know its network
namespace.

We have a socket_diag to get information about sockets, it doesn't
report sockets which are not bound or connected.

This patch introduces a new socket ioctl, which is called SIOCGSKNS
and used to get a file descriptor for a socket network namespace.

A task must have CAP_NET_ADMIN in a target network namespace to
use this ioctl.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/nsfs.c                    |  2 +-
 include/linux/proc_fs.h      |  4 ++++
 include/uapi/linux/sockios.h |  1 +
 net/socket.c                 | 13 +++++++++++++
 4 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8718af895eab..8c9fb29c6673 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -118,7 +118,7 @@ again:
 	return ret;
 }
 
-static int open_related_ns(struct ns_common *ns,
+int open_related_ns(struct ns_common *ns,
 		   struct ns_common *(*get_ns)(struct ns_common *ns))
 {
 	struct path path = {};
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index b97bf2ef996e..368c7ad06ae5 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -82,4 +82,8 @@ static inline struct proc_dir_entry *proc_net_mkdir(
 	return proc_mkdir_data(name, 0, parent, net);
 }
 
+struct ns_common;
+int open_related_ns(struct ns_common *ns,
+		   struct ns_common *(*get_ns)(struct ns_common *ns));
+
 #endif /* _LINUX_PROC_FS_H */
diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h
index 8e7890b26d9a..83cc54ce6081 100644
--- a/include/uapi/linux/sockios.h
+++ b/include/uapi/linux/sockios.h
@@ -84,6 +84,7 @@
 #define SIOCWANDEV	0x894A		/* get/set netdev parameters	*/
 
 #define SIOCOUTQNSD	0x894B		/* output queue size (not sent only) */
+#define SIOCGSKNS	0x894C		/* get socket network namespace */
 
 /* ARP cache control calls. */
 		    /*  0x8950 - 0x8952  * obsolete calls, don't re-use */
diff --git a/net/socket.c b/net/socket.c
index 5a9bf5ee2464..970a7ea3fc4a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -877,6 +877,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *	what to do with it - that's up to the protocol still.
  */
 
+static struct ns_common *get_net_ns(struct ns_common *ns)
+{
+	return &get_net(container_of(ns, struct net, ns))->ns;
+}
+
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
 	struct socket *sock;
@@ -945,6 +950,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 				err = dlci_ioctl_hook(cmd, argp);
 			mutex_unlock(&dlci_ioctl_mutex);
 			break;
+		case SIOCGSKNS:
+			err = -EPERM;
+			if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+				break;
+
+			err = open_related_ns(&net->ns, get_net_ns);
+			break;
 		default:
 			err = sock_do_ioctl(net, sock, cmd, arg);
 			break;
@@ -3093,6 +3105,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
 	case SIOCSIFVLAN:
 	case SIOCADDDLCI:
 	case SIOCDELDLCI:
+	case SIOCGSKNS:
 		return sock_ioctl(file, cmd, arg);
 
 	case SIOCGIFFLAGS:
-- 
cgit v1.2.3


From 9ed9895370aedd6032af2a9181c62c394d08223b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 30 Oct 2016 17:32:16 +0100
Subject: driver core: Functional dependencies tracking support

Currently, there is a problem with taking functional dependencies
between devices into account.

What I mean by a "functional dependency" is when the driver of device
B needs device A to be functional and (generally) its driver to be
present in order to work properly.  This has certain consequences
for power management (suspend/resume and runtime PM ordering) and
shutdown ordering of these devices.  In general, it also implies that
the driver of A needs to be working for B to be probed successfully
and it cannot be unbound from the device before the B's driver.

Support for representing those functional dependencies between
devices is added here to allow the driver core to track them and act
on them in certain cases where applicable.

The argument for doing that in the driver core is that there are
quite a few distinct use cases involving device dependencies, they
are relatively hard to get right in a driver (if one wants to
address all of them properly) and it only gets worse if multiplied
by the number of drivers potentially needing to do it.  Morever, at
least one case (asynchronous system suspend/resume) cannot be handled
in a single driver at all, because it requires the driver of A to
wait for B to suspend (during system suspend) and the driver of B to
wait for A to resume (during system resume).

For this reason, represent dependencies between devices as "links",
with the help of struct device_link objects each containing pointers
to the "linked" devices, a list node for each of them, status
information, flags, and an RCU head for synchronization.

Also add two new list heads, representing the lists of links to the
devices that depend on the given one (consumers) and to the devices
depended on by it (suppliers), and a "driver presence status" field
(needed for figuring out initial states of device links) to struct
device.

The entire data structure consisting of all of the lists of link
objects for all devices is protected by a mutex (for link object
addition/removal and for list walks during device driver probing
and removal) and by SRCU (for list walking in other case that will
be introduced by subsequent change sets).  If CONFIG_SRCU is not
selected, however, an rwsem is used for protecting the entire data
structure.

In addition, each link object has an internal status field whose
value reflects whether or not drivers are bound to the devices
pointed to by the link or probing/removal of their drivers is in
progress etc.  That field is only modified under the device links
mutex, but it may be read outside of it in some cases (introduced by
subsequent change sets), so modifications of it are annotated with
WRITE_ONCE().

New links are added by calling device_link_add() which takes three
arguments: pointers to the devices in question and flags.  In
particular, if DL_FLAG_STATELESS is set in the flags, the link status
is not to be taken into account for this link and the driver core
will not manage it.  In turn, if DL_FLAG_AUTOREMOVE is set in the
flags, the driver core will remove the link automatically when the
consumer device driver unbinds from it.

One of the actions carried out by device_link_add() is to reorder
the lists used for device shutdown and system suspend/resume to
put the consumer device along with all of its children and all of
its consumers (and so on, recursively) to the ends of those lists
in order to ensure the right ordering between all of the supplier
and consumer devices.

For this reason, it is not possible to create a link between two
devices if the would-be supplier device already depends on the
would-be consumer device as either a direct descendant of it or a
consumer of one of its direct descendants or one of its consumers
and so on.

There are two types of link objects, persistent and non-persistent.
The persistent ones stay around until one of the target devices is
deleted, while the non-persistent ones are removed automatically when
the consumer driver unbinds from its device (ie. they are assumed to
be valid only as long as the consumer device has a driver bound to
it).  Persistent links are created by default and non-persistent
links are created when the DL_FLAG_AUTOREMOVE flag is passed
to device_link_add().

Both persistent and non-persistent device links can be deleted
with an explicit call to device_link_del().

Links created without the DL_FLAG_STATELESS flag set are managed
by the driver core using a simple state machine.  There are 5 states
each link can be in: DORMANT (unused), AVAILABLE (the supplier driver
is present and functional), CONSUMER_PROBE (the consumer driver is
probing), ACTIVE (both supplier and consumer drivers are present and
functional), and SUPPLIER_UNBIND (the supplier driver is unbinding).
The driver core updates the link state automatically depending on
what happens to the linked devices and for each link state specific
actions are taken in addition to that.

For example, if the supplier driver unbinds from its device, the
driver core will also unbind the drivers of all of its consumers
automatically under the assumption that they cannot function
properly without the supplier.  Analogously, the driver core will
only allow the consumer driver to bind to its device if the
supplier driver is present and functional (ie. the link is in
the AVAILABLE state).  If that's not the case, it will rely on
the existing deferred probing mechanism to wait for the supplier
driver to become available.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/base.h        |  13 ++
 drivers/base/core.c        | 540 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/base/dd.c          |  41 +++-
 drivers/base/power/main.c  |   2 +
 drivers/base/power/power.h |  10 +
 include/linux/device.h     |  80 +++++++
 include/linux/pm.h         |   1 +
 7 files changed, 682 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index e05db388bd1c..e19b1008e5fb 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -107,6 +107,9 @@ extern void bus_remove_device(struct device *dev);
 
 extern int bus_add_driver(struct device_driver *drv);
 extern void bus_remove_driver(struct device_driver *drv);
+extern void device_release_driver_internal(struct device *dev,
+					   struct device_driver *drv,
+					   struct device *parent);
 
 extern void driver_detach(struct device_driver *drv);
 extern int driver_probe_device(struct device_driver *drv, struct device *dev);
@@ -152,3 +155,13 @@ extern int devtmpfs_init(void);
 #else
 static inline int devtmpfs_init(void) { return 0; }
 #endif
+
+/* Device links support */
+extern int device_links_read_lock(void);
+extern void device_links_read_unlock(int idx);
+extern int device_links_check_suppliers(struct device *dev);
+extern void device_links_driver_bound(struct device *dev);
+extern void device_links_driver_cleanup(struct device *dev);
+extern void device_links_no_driver(struct device *dev);
+extern bool device_links_busy(struct device *dev);
+extern void device_links_unbind_consumers(struct device *dev);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index ce057a568673..3c5ff17f578f 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,541 @@ static int __init sysfs_deprecated_setup(char *arg)
 early_param("sysfs.deprecated", sysfs_deprecated_setup);
 #endif
 
+/* Device links support. */
+
+#ifdef CONFIG_SRCU
+static DEFINE_MUTEX(device_links_lock);
+DEFINE_STATIC_SRCU(device_links_srcu);
+
+static inline void device_links_write_lock(void)
+{
+	mutex_lock(&device_links_lock);
+}
+
+static inline void device_links_write_unlock(void)
+{
+	mutex_unlock(&device_links_lock);
+}
+
+int device_links_read_lock(void)
+{
+	return srcu_read_lock(&device_links_srcu);
+}
+
+void device_links_read_unlock(int idx)
+{
+	srcu_read_unlock(&device_links_srcu, idx);
+}
+#else /* !CONFIG_SRCU */
+static DECLARE_RWSEM(device_links_lock);
+
+static inline void device_links_write_lock(void)
+{
+	down_write(&device_links_lock);
+}
+
+static inline void device_links_write_unlock(void)
+{
+	up_write(&device_links_lock);
+}
+
+int device_links_read_lock(void)
+{
+	down_read(&device_links_lock);
+	return 0;
+}
+
+void device_links_read_unlock(int not_used)
+{
+	up_read(&device_links_lock);
+}
+#endif /* !CONFIG_SRCU */
+
+/**
+ * device_is_dependent - Check if one device depends on another one
+ * @dev: Device to check dependencies for.
+ * @target: Device to check against.
+ *
+ * Check if @target depends on @dev or any device dependent on it (its child or
+ * its consumer etc).  Return 1 if that is the case or 0 otherwise.
+ */
+static int device_is_dependent(struct device *dev, void *target)
+{
+	struct device_link *link;
+	int ret;
+
+	if (WARN_ON(dev == target))
+		return 1;
+
+	ret = device_for_each_child(dev, target, device_is_dependent);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(link, &dev->links.consumers, s_node) {
+		if (WARN_ON(link->consumer == target))
+			return 1;
+
+		ret = device_is_dependent(link->consumer, target);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+static int device_reorder_to_tail(struct device *dev, void *not_used)
+{
+	struct device_link *link;
+
+	/*
+	 * Devices that have not been registered yet will be put to the ends
+	 * of the lists during the registration, so skip them here.
+	 */
+	if (device_is_registered(dev))
+		devices_kset_move_last(dev);
+
+	if (device_pm_initialized(dev))
+		device_pm_move_last(dev);
+
+	device_for_each_child(dev, NULL, device_reorder_to_tail);
+	list_for_each_entry(link, &dev->links.consumers, s_node)
+		device_reorder_to_tail(link->consumer, NULL);
+
+	return 0;
+}
+
+/**
+ * device_link_add - Create a link between two devices.
+ * @consumer: Consumer end of the link.
+ * @supplier: Supplier end of the link.
+ * @flags: Link flags.
+ *
+ * If the DL_FLAG_AUTOREMOVE is set, the link will be removed automatically
+ * when the consumer device driver unbinds from it.  The combination of both
+ * DL_FLAG_AUTOREMOVE and DL_FLAG_STATELESS set is invalid and will cause NULL
+ * to be returned.
+ *
+ * A side effect of the link creation is re-ordering of dpm_list and the
+ * devices_kset list by moving the consumer device and all devices depending
+ * on it to the ends of these lists (that does not happen to devices that have
+ * not been registered when this function is called).
+ *
+ * The supplier device is required to be registered when this function is called
+ * and NULL will be returned if that is not the case.  The consumer device need
+ * not be registerd, however.
+ */
+struct device_link *device_link_add(struct device *consumer,
+				    struct device *supplier, u32 flags)
+{
+	struct device_link *link;
+
+	if (!consumer || !supplier ||
+	    ((flags & DL_FLAG_STATELESS) && (flags & DL_FLAG_AUTOREMOVE)))
+		return NULL;
+
+	device_links_write_lock();
+	device_pm_lock();
+
+	/*
+	 * If the supplier has not been fully registered yet or there is a
+	 * reverse dependency between the consumer and the supplier already in
+	 * the graph, return NULL.
+	 */
+	if (!device_pm_initialized(supplier)
+	    || device_is_dependent(consumer, supplier)) {
+		link = NULL;
+		goto out;
+	}
+
+	list_for_each_entry(link, &supplier->links.consumers, s_node)
+		if (link->consumer == consumer)
+			goto out;
+
+	link = kmalloc(sizeof(*link), GFP_KERNEL);
+	if (!link)
+		goto out;
+
+	get_device(supplier);
+	link->supplier = supplier;
+	INIT_LIST_HEAD(&link->s_node);
+	get_device(consumer);
+	link->consumer = consumer;
+	INIT_LIST_HEAD(&link->c_node);
+	link->flags = flags;
+
+	/* Deterine the initial link state. */
+	if (flags & DL_FLAG_STATELESS) {
+		link->status = DL_STATE_NONE;
+	} else {
+		switch (supplier->links.status) {
+		case DL_DEV_DRIVER_BOUND:
+			switch (consumer->links.status) {
+			case DL_DEV_PROBING:
+				link->status = DL_STATE_CONSUMER_PROBE;
+				break;
+			case DL_DEV_DRIVER_BOUND:
+				link->status = DL_STATE_ACTIVE;
+				break;
+			default:
+				link->status = DL_STATE_AVAILABLE;
+				break;
+			}
+			break;
+		case DL_DEV_UNBINDING:
+			link->status = DL_STATE_SUPPLIER_UNBIND;
+			break;
+		default:
+			link->status = DL_STATE_DORMANT;
+			break;
+		}
+	}
+
+	/*
+	 * Move the consumer and all of the devices depending on it to the end
+	 * of dpm_list and the devices_kset list.
+	 *
+	 * It is necessary to hold dpm_list locked throughout all that or else
+	 * we may end up suspending with a wrong ordering of it.
+	 */
+	device_reorder_to_tail(consumer, NULL);
+
+	list_add_tail_rcu(&link->s_node, &supplier->links.consumers);
+	list_add_tail_rcu(&link->c_node, &consumer->links.suppliers);
+
+	dev_info(consumer, "Linked as a consumer to %s\n", dev_name(supplier));
+
+ out:
+	device_pm_unlock();
+	device_links_write_unlock();
+	return link;
+}
+EXPORT_SYMBOL_GPL(device_link_add);
+
+static void device_link_free(struct device_link *link)
+{
+	put_device(link->consumer);
+	put_device(link->supplier);
+	kfree(link);
+}
+
+#ifdef CONFIG_SRCU
+static void __device_link_free_srcu(struct rcu_head *rhead)
+{
+	device_link_free(container_of(rhead, struct device_link, rcu_head));
+}
+
+static void __device_link_del(struct device_link *link)
+{
+	dev_info(link->consumer, "Dropping the link to %s\n",
+		 dev_name(link->supplier));
+
+	list_del_rcu(&link->s_node);
+	list_del_rcu(&link->c_node);
+	call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu);
+}
+#else /* !CONFIG_SRCU */
+static void __device_link_del(struct device_link *link)
+{
+	dev_info(link->consumer, "Dropping the link to %s\n",
+		 dev_name(link->supplier));
+
+	list_del(&link->s_node);
+	list_del(&link->c_node);
+	device_link_free(link);
+}
+#endif /* !CONFIG_SRCU */
+
+/**
+ * device_link_del - Delete a link between two devices.
+ * @link: Device link to delete.
+ *
+ * The caller must ensure proper synchronization of this function with runtime
+ * PM.
+ */
+void device_link_del(struct device_link *link)
+{
+	device_links_write_lock();
+	device_pm_lock();
+	__device_link_del(link);
+	device_pm_unlock();
+	device_links_write_unlock();
+}
+EXPORT_SYMBOL_GPL(device_link_del);
+
+static void device_links_missing_supplier(struct device *dev)
+{
+	struct device_link *link;
+
+	list_for_each_entry(link, &dev->links.suppliers, c_node)
+		if (link->status == DL_STATE_CONSUMER_PROBE)
+			WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
+}
+
+/**
+ * device_links_check_suppliers - Check presence of supplier drivers.
+ * @dev: Consumer device.
+ *
+ * Check links from this device to any suppliers.  Walk the list of the device's
+ * links to suppliers and see if all of them are available.  If not, simply
+ * return -EPROBE_DEFER.
+ *
+ * We need to guarantee that the supplier will not go away after the check has
+ * been positive here.  It only can go away in __device_release_driver() and
+ * that function  checks the device's links to consumers.  This means we need to
+ * mark the link as "consumer probe in progress" to make the supplier removal
+ * wait for us to complete (or bad things may happen).
+ *
+ * Links with the DL_FLAG_STATELESS flag set are ignored.
+ */
+int device_links_check_suppliers(struct device *dev)
+{
+	struct device_link *link;
+	int ret = 0;
+
+	device_links_write_lock();
+
+	list_for_each_entry(link, &dev->links.suppliers, c_node) {
+		if (link->flags & DL_FLAG_STATELESS)
+			continue;
+
+		if (link->status != DL_STATE_AVAILABLE) {
+			device_links_missing_supplier(dev);
+			ret = -EPROBE_DEFER;
+			break;
+		}
+		WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE);
+	}
+	dev->links.status = DL_DEV_PROBING;
+
+	device_links_write_unlock();
+	return ret;
+}
+
+/**
+ * device_links_driver_bound - Update device links after probing its driver.
+ * @dev: Device to update the links for.
+ *
+ * The probe has been successful, so update links from this device to any
+ * consumers by changing their status to "available".
+ *
+ * Also change the status of @dev's links to suppliers to "active".
+ *
+ * Links with the DL_FLAG_STATELESS flag set are ignored.
+ */
+void device_links_driver_bound(struct device *dev)
+{
+	struct device_link *link;
+
+	device_links_write_lock();
+
+	list_for_each_entry(link, &dev->links.consumers, s_node) {
+		if (link->flags & DL_FLAG_STATELESS)
+			continue;
+
+		WARN_ON(link->status != DL_STATE_DORMANT);
+		WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
+	}
+
+	list_for_each_entry(link, &dev->links.suppliers, c_node) {
+		if (link->flags & DL_FLAG_STATELESS)
+			continue;
+
+		WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
+		WRITE_ONCE(link->status, DL_STATE_ACTIVE);
+	}
+
+	dev->links.status = DL_DEV_DRIVER_BOUND;
+
+	device_links_write_unlock();
+}
+
+/**
+ * __device_links_no_driver - Update links of a device without a driver.
+ * @dev: Device without a drvier.
+ *
+ * Delete all non-persistent links from this device to any suppliers.
+ *
+ * Persistent links stay around, but their status is changed to "available",
+ * unless they already are in the "supplier unbind in progress" state in which
+ * case they need not be updated.
+ *
+ * Links with the DL_FLAG_STATELESS flag set are ignored.
+ */
+static void __device_links_no_driver(struct device *dev)
+{
+	struct device_link *link, *ln;
+
+	list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
+		if (link->flags & DL_FLAG_STATELESS)
+			continue;
+
+		if (link->flags & DL_FLAG_AUTOREMOVE)
+			__device_link_del(link);
+		else if (link->status != DL_STATE_SUPPLIER_UNBIND)
+			WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
+	}
+
+	dev->links.status = DL_DEV_NO_DRIVER;
+}
+
+void device_links_no_driver(struct device *dev)
+{
+	device_links_write_lock();
+	__device_links_no_driver(dev);
+	device_links_write_unlock();
+}
+
+/**
+ * device_links_driver_cleanup - Update links after driver removal.
+ * @dev: Device whose driver has just gone away.
+ *
+ * Update links to consumers for @dev by changing their status to "dormant" and
+ * invoke %__device_links_no_driver() to update links to suppliers for it as
+ * appropriate.
+ *
+ * Links with the DL_FLAG_STATELESS flag set are ignored.
+ */
+void device_links_driver_cleanup(struct device *dev)
+{
+	struct device_link *link;
+
+	device_links_write_lock();
+
+	list_for_each_entry(link, &dev->links.consumers, s_node) {
+		if (link->flags & DL_FLAG_STATELESS)
+			continue;
+
+		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE);
+		WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
+		WRITE_ONCE(link->status, DL_STATE_DORMANT);
+	}
+
+	__device_links_no_driver(dev);
+
+	device_links_write_unlock();
+}
+
+/**
+ * device_links_busy - Check if there are any busy links to consumers.
+ * @dev: Device to check.
+ *
+ * Check each consumer of the device and return 'true' if its link's status
+ * is one of "consumer probe" or "active" (meaning that the given consumer is
+ * probing right now or its driver is present).  Otherwise, change the link
+ * state to "supplier unbind" to prevent the consumer from being probed
+ * successfully going forward.
+ *
+ * Return 'false' if there are no probing or active consumers.
+ *
+ * Links with the DL_FLAG_STATELESS flag set are ignored.
+ */
+bool device_links_busy(struct device *dev)
+{
+	struct device_link *link;
+	bool ret = false;
+
+	device_links_write_lock();
+
+	list_for_each_entry(link, &dev->links.consumers, s_node) {
+		if (link->flags & DL_FLAG_STATELESS)
+			continue;
+
+		if (link->status == DL_STATE_CONSUMER_PROBE
+		    || link->status == DL_STATE_ACTIVE) {
+			ret = true;
+			break;
+		}
+		WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND);
+	}
+
+	dev->links.status = DL_DEV_UNBINDING;
+
+	device_links_write_unlock();
+	return ret;
+}
+
+/**
+ * device_links_unbind_consumers - Force unbind consumers of the given device.
+ * @dev: Device to unbind the consumers of.
+ *
+ * Walk the list of links to consumers for @dev and if any of them is in the
+ * "consumer probe" state, wait for all device probes in progress to complete
+ * and start over.
+ *
+ * If that's not the case, change the status of the link to "supplier unbind"
+ * and check if the link was in the "active" state.  If so, force the consumer
+ * driver to unbind and start over (the consumer will not re-probe as we have
+ * changed the state of the link already).
+ *
+ * Links with the DL_FLAG_STATELESS flag set are ignored.
+ */
+void device_links_unbind_consumers(struct device *dev)
+{
+	struct device_link *link;
+
+ start:
+	device_links_write_lock();
+
+	list_for_each_entry(link, &dev->links.consumers, s_node) {
+		enum device_link_state status;
+
+		if (link->flags & DL_FLAG_STATELESS)
+			continue;
+
+		status = link->status;
+		if (status == DL_STATE_CONSUMER_PROBE) {
+			device_links_write_unlock();
+
+			wait_for_device_probe();
+			goto start;
+		}
+		WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND);
+		if (status == DL_STATE_ACTIVE) {
+			struct device *consumer = link->consumer;
+
+			get_device(consumer);
+
+			device_links_write_unlock();
+
+			device_release_driver_internal(consumer, NULL,
+						       consumer->parent);
+			put_device(consumer);
+			goto start;
+		}
+	}
+
+	device_links_write_unlock();
+}
+
+/**
+ * device_links_purge - Delete existing links to other devices.
+ * @dev: Target device.
+ */
+static void device_links_purge(struct device *dev)
+{
+	struct device_link *link, *ln;
+
+	/*
+	 * Delete all of the remaining links from this device to any other
+	 * devices (either consumers or suppliers).
+	 */
+	device_links_write_lock();
+
+	list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
+		WARN_ON(link->status == DL_STATE_ACTIVE);
+		__device_link_del(link);
+	}
+
+	list_for_each_entry_safe_reverse(link, ln, &dev->links.consumers, s_node) {
+		WARN_ON(link->status != DL_STATE_DORMANT &&
+			link->status != DL_STATE_NONE);
+		__device_link_del(link);
+	}
+
+	device_links_write_unlock();
+}
+
+/* Device links support end. */
+
 int (*platform_notify)(struct device *dev) = NULL;
 int (*platform_notify_remove)(struct device *dev) = NULL;
 static struct kobject *dev_kobj;
@@ -711,6 +1246,9 @@ void device_initialize(struct device *dev)
 #ifdef CONFIG_GENERIC_MSI_IRQ
 	INIT_LIST_HEAD(&dev->msi_list);
 #endif
+	INIT_LIST_HEAD(&dev->links.consumers);
+	INIT_LIST_HEAD(&dev->links.suppliers);
+	dev->links.status = DL_DEV_NO_DRIVER;
 }
 EXPORT_SYMBOL_GPL(device_initialize);
 
@@ -1258,6 +1796,8 @@ void device_del(struct device *dev)
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 					     BUS_NOTIFY_DEL_DEVICE, dev);
+
+	device_links_purge(dev);
 	dpm_sysfs_remove(dev);
 	if (parent)
 		klist_del(&dev->p->knode_parent);
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index df4ab5509c04..b2bca3cf0dd2 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -244,6 +244,7 @@ static void driver_bound(struct device *dev)
 		 __func__, dev_name(dev));
 
 	klist_add_tail(&dev->p->knode_driver, &dev->driver->p->klist_devices);
+	device_links_driver_bound(dev);
 
 	device_pm_check_callbacks(dev);
 
@@ -337,6 +338,10 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 		return ret;
 	}
 
+	ret = device_links_check_suppliers(dev);
+	if (ret)
+		return ret;
+
 	atomic_inc(&probe_count);
 	pr_debug("bus: '%s': %s: probing driver %s with device %s\n",
 		 drv->bus->name, __func__, drv->name, dev_name(dev));
@@ -415,6 +420,7 @@ probe_failed:
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 					     BUS_NOTIFY_DRIVER_NOT_BOUND, dev);
 pinctrl_bind_failed:
+	device_links_no_driver(dev);
 	devres_release_all(dev);
 	driver_sysfs_remove(dev);
 	dev->driver = NULL;
@@ -771,7 +777,7 @@ EXPORT_SYMBOL_GPL(driver_attach);
  * __device_release_driver() must be called with @dev lock held.
  * When called for a USB interface, @dev->parent lock must be held as well.
  */
-static void __device_release_driver(struct device *dev)
+static void __device_release_driver(struct device *dev, struct device *parent)
 {
 	struct device_driver *drv;
 
@@ -780,6 +786,25 @@ static void __device_release_driver(struct device *dev)
 		if (driver_allows_async_probing(drv))
 			async_synchronize_full();
 
+		while (device_links_busy(dev)) {
+			device_unlock(dev);
+			if (parent)
+				device_unlock(parent);
+
+			device_links_unbind_consumers(dev);
+			if (parent)
+				device_lock(parent);
+
+			device_lock(dev);
+			/*
+			 * A concurrent invocation of the same function might
+			 * have released the driver successfully while this one
+			 * was waiting, so check for that.
+			 */
+			if (dev->driver != drv)
+				return;
+		}
+
 		pm_runtime_get_sync(dev);
 
 		driver_sysfs_remove(dev);
@@ -795,6 +820,8 @@ static void __device_release_driver(struct device *dev)
 			dev->bus->remove(dev);
 		else if (drv->remove)
 			drv->remove(dev);
+
+		device_links_driver_cleanup(dev);
 		devres_release_all(dev);
 		dev->driver = NULL;
 		dev_set_drvdata(dev, NULL);
@@ -811,16 +838,16 @@ static void __device_release_driver(struct device *dev)
 	}
 }
 
-static void device_release_driver_internal(struct device *dev,
-					   struct device_driver *drv,
-					   struct device *parent)
+void device_release_driver_internal(struct device *dev,
+				    struct device_driver *drv,
+				    struct device *parent)
 {
 	if (parent)
 		device_lock(parent);
 
 	device_lock(dev);
 	if (!drv || drv == dev->driver)
-		__device_release_driver(dev);
+		__device_release_driver(dev, parent);
 
 	device_unlock(dev);
 	if (parent)
@@ -833,6 +860,10 @@ static void device_release_driver_internal(struct device *dev,
  *
  * Manually detach device from driver.
  * When called for a USB interface, @dev->parent lock must be held.
+ *
+ * If this function is to be called with @dev->parent lock held, ensure that
+ * the device's consumers are unbound in advance or that their locks can be
+ * acquired under the @dev->parent lock.
  */
 void device_release_driver(struct device *dev)
 {
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e44944f4be77..420914061405 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -131,6 +131,7 @@ void device_pm_add(struct device *dev)
 		dev_warn(dev, "parent %s should not be sleeping\n",
 			dev_name(dev->parent));
 	list_add_tail(&dev->power.entry, &dpm_list);
+	dev->power.in_dpm_list = true;
 	mutex_unlock(&dpm_list_mtx);
 }
 
@@ -145,6 +146,7 @@ void device_pm_remove(struct device *dev)
 	complete_all(&dev->power.completion);
 	mutex_lock(&dpm_list_mtx);
 	list_del_init(&dev->power.entry);
+	dev->power.in_dpm_list = false;
 	mutex_unlock(&dpm_list_mtx);
 	device_wakeup_disable(dev);
 	pm_runtime_remove(dev);
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 50e30e7b059d..0ba7842d665b 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -127,6 +127,11 @@ extern void device_pm_move_after(struct device *, struct device *);
 extern void device_pm_move_last(struct device *);
 extern void device_pm_check_callbacks(struct device *dev);
 
+static inline bool device_pm_initialized(struct device *dev)
+{
+	return dev->power.in_dpm_list;
+}
+
 #else /* !CONFIG_PM_SLEEP */
 
 static inline void device_pm_sleep_init(struct device *dev) {}
@@ -146,6 +151,11 @@ static inline void device_pm_move_last(struct device *dev) {}
 
 static inline void device_pm_check_callbacks(struct device *dev) {}
 
+static inline bool device_pm_initialized(struct device *dev)
+{
+	return device_is_registered(dev);
+}
+
 #endif /* !CONFIG_PM_SLEEP */
 
 static inline void device_pm_init(struct device *dev)
diff --git a/include/linux/device.h b/include/linux/device.h
index bc41e87a969b..9cae2feaf5cb 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -707,6 +707,81 @@ struct device_dma_parameters {
 	unsigned long segment_boundary_mask;
 };
 
+/**
+ * enum device_link_state - Device link states.
+ * @DL_STATE_NONE: The presence of the drivers is not being tracked.
+ * @DL_STATE_DORMANT: None of the supplier/consumer drivers is present.
+ * @DL_STATE_AVAILABLE: The supplier driver is present, but the consumer is not.
+ * @DL_STATE_CONSUMER_PROBE: The consumer is probing (supplier driver present).
+ * @DL_STATE_ACTIVE: Both the supplier and consumer drivers are present.
+ * @DL_STATE_SUPPLIER_UNBIND: The supplier driver is unbinding.
+ */
+enum device_link_state {
+	DL_STATE_NONE = -1,
+	DL_STATE_DORMANT = 0,
+	DL_STATE_AVAILABLE,
+	DL_STATE_CONSUMER_PROBE,
+	DL_STATE_ACTIVE,
+	DL_STATE_SUPPLIER_UNBIND,
+};
+
+/*
+ * Device link flags.
+ *
+ * STATELESS: The core won't track the presence of supplier/consumer drivers.
+ * AUTOREMOVE: Remove this link automatically on consumer driver unbind.
+ */
+#define DL_FLAG_STATELESS	BIT(0)
+#define DL_FLAG_AUTOREMOVE	BIT(1)
+
+/**
+ * struct device_link - Device link representation.
+ * @supplier: The device on the supplier end of the link.
+ * @s_node: Hook to the supplier device's list of links to consumers.
+ * @consumer: The device on the consumer end of the link.
+ * @c_node: Hook to the consumer device's list of links to suppliers.
+ * @status: The state of the link (with respect to the presence of drivers).
+ * @flags: Link flags.
+ * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks.
+ */
+struct device_link {
+	struct device *supplier;
+	struct list_head s_node;
+	struct device *consumer;
+	struct list_head c_node;
+	enum device_link_state status;
+	u32 flags;
+#ifdef CONFIG_SRCU
+	struct rcu_head rcu_head;
+#endif
+};
+
+/**
+ * enum dl_dev_state - Device driver presence tracking information.
+ * @DL_DEV_NO_DRIVER: There is no driver attached to the device.
+ * @DL_DEV_PROBING: A driver is probing.
+ * @DL_DEV_DRIVER_BOUND: The driver has been bound to the device.
+ * @DL_DEV_UNBINDING: The driver is unbinding from the device.
+ */
+enum dl_dev_state {
+	DL_DEV_NO_DRIVER = 0,
+	DL_DEV_PROBING,
+	DL_DEV_DRIVER_BOUND,
+	DL_DEV_UNBINDING,
+};
+
+/**
+ * struct dev_links_info - Device data related to device links.
+ * @suppliers: List of links to supplier devices.
+ * @consumers: List of links to consumer devices.
+ * @status: Driver status information.
+ */
+struct dev_links_info {
+	struct list_head suppliers;
+	struct list_head consumers;
+	enum dl_dev_state status;
+};
+
 /**
  * struct device - The basic device structure
  * @parent:	The device's "parent" device, the device to which it is attached.
@@ -799,6 +874,7 @@ struct device {
 					   core doesn't touch it */
 	void		*driver_data;	/* Driver data, set and get with
 					   dev_set/get_drvdata */
+	struct dev_links_info	links;
 	struct dev_pm_info	power;
 	struct dev_pm_domain	*pm_domain;
 
@@ -1116,6 +1192,10 @@ extern void device_shutdown(void);
 /* debugging and troubleshooting/diagnostic helpers. */
 extern const char *dev_driver_string(const struct device *dev);
 
+/* Device links interface. */
+struct device_link *device_link_add(struct device *consumer,
+				    struct device *supplier, u32 flags);
+void device_link_del(struct device_link *link);
 
 #ifdef CONFIG_PRINTK
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 06eb353182ab..721a70241fcd 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -559,6 +559,7 @@ struct dev_pm_info {
 	pm_message_t		power_state;
 	unsigned int		can_wakeup:1;
 	unsigned int		async_suspend:1;
+	bool			in_dpm_list:1;	/* Owned by the PM core */
 	bool			is_prepared:1;	/* Owned by the PM core */
 	bool			is_suspended:1;	/* Ditto */
 	bool			is_noirq_suspended:1;
-- 
cgit v1.2.3


From 21d5c57b3726166421251e94dabab047baaf8ce4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 30 Oct 2016 17:32:31 +0100
Subject: PM / runtime: Use device links

Modify the runtime PM framework to use device links to ensure that
supplier devices will not be suspended if any of their consumer
devices are active.

The idea is to reference count suppliers on the consumer's resume
and drop references to them on its suspend.  The information on
whether or not the supplier has been reference counted by the
consumer's (runtime) resume is stored in a new field (rpm_active)
in the link object for each link.

It may be necessary to clean up those references when the
supplier is unbinding and that's why the links whose status is
DEVICE_LINK_SUPPLIER_UNBIND are skipped by the runtime suspend
and resume code.

The above means that if the consumer device is probed in the
runtime-active state, the supplier has to be resumed and reference
counted by device_link_add() so the code works as expected on its
(runtime) suspend.  There is a new flag, DEVICE_LINK_RPM_ACTIVE,
to tell device_link_add() about that (in which case the caller
is responsible for making sure that the consumer really will
be runtime-active when runtime PM is enabled for it).

The other new link flag, DEVICE_LINK_PM_RUNTIME, tells the core
whether or not the link should be used for runtime PM at all.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c          |  27 +++++++-
 drivers/base/dd.c            |   3 +
 drivers/base/power/runtime.c | 157 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/device.h       |   6 ++
 include/linux/pm_runtime.h   |   6 ++
 5 files changed, 193 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3c5ff17f578f..876c62b12462 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -152,6 +152,14 @@ static int device_reorder_to_tail(struct device *dev, void *not_used)
  * @supplier: Supplier end of the link.
  * @flags: Link flags.
  *
+ * The caller is responsible for the proper synchronization of the link creation
+ * with runtime PM.  First, setting the DL_FLAG_PM_RUNTIME flag will cause the
+ * runtime PM framework to take the link into account.  Second, if the
+ * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will
+ * be forced into the active metastate and reference-counted upon the creation
+ * of the link.  If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be
+ * ignored.
+ *
  * If the DL_FLAG_AUTOREMOVE is set, the link will be removed automatically
  * when the consumer device driver unbinds from it.  The combination of both
  * DL_FLAG_AUTOREMOVE and DL_FLAG_STATELESS set is invalid and will cause NULL
@@ -193,10 +201,19 @@ struct device_link *device_link_add(struct device *consumer,
 		if (link->consumer == consumer)
 			goto out;
 
-	link = kmalloc(sizeof(*link), GFP_KERNEL);
+	link = kzalloc(sizeof(*link), GFP_KERNEL);
 	if (!link)
 		goto out;
 
+	if ((flags & DL_FLAG_PM_RUNTIME) && (flags & DL_FLAG_RPM_ACTIVE)) {
+		if (pm_runtime_get_sync(supplier) < 0) {
+			pm_runtime_put_noidle(supplier);
+			kfree(link);
+			link = NULL;
+			goto out;
+		}
+		link->rpm_active = true;
+	}
 	get_device(supplier);
 	link->supplier = supplier;
 	INIT_LIST_HEAD(&link->s_node);
@@ -213,6 +230,14 @@ struct device_link *device_link_add(struct device *consumer,
 		case DL_DEV_DRIVER_BOUND:
 			switch (consumer->links.status) {
 			case DL_DEV_PROBING:
+				/*
+				 * Balance the decrementation of the supplier's
+				 * runtime PM usage counter after consumer probe
+				 * in driver_probe_device().
+				 */
+				if (flags & DL_FLAG_PM_RUNTIME)
+					pm_runtime_get_sync(supplier);
+
 				link->status = DL_STATE_CONSUMER_PROBE;
 				break;
 			case DL_DEV_DRIVER_BOUND:
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index b2bca3cf0dd2..43be1cc751a4 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -513,6 +513,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev)
 	pr_debug("bus: '%s': %s: matched device %s with driver %s\n",
 		 drv->bus->name, __func__, dev_name(dev), drv->name);
 
+	pm_runtime_get_suppliers(dev);
 	if (dev->parent)
 		pm_runtime_get_sync(dev->parent);
 
@@ -523,6 +524,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev)
 	if (dev->parent)
 		pm_runtime_put(dev->parent);
 
+	pm_runtime_put_suppliers(dev);
 	return ret;
 }
 
@@ -806,6 +808,7 @@ static void __device_release_driver(struct device *dev, struct device *parent)
 		}
 
 		pm_runtime_get_sync(dev);
+		pm_runtime_clean_up_links(dev);
 
 		driver_sysfs_remove(dev);
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 82a081ea4317..462f90e952f8 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -12,6 +12,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/pm_wakeirq.h>
 #include <trace/events/rpm.h>
+
+#include "../base.h"
 #include "power.h"
 
 typedef int (*pm_callback_t)(struct device *);
@@ -258,6 +260,42 @@ static int rpm_check_suspend_allowed(struct device *dev)
 	return retval;
 }
 
+static int rpm_get_suppliers(struct device *dev)
+{
+	struct device_link *link;
+
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
+		int retval;
+
+		if (!(link->flags & DL_FLAG_PM_RUNTIME))
+			continue;
+
+		if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND ||
+		    link->rpm_active)
+			continue;
+
+		retval = pm_runtime_get_sync(link->supplier);
+		if (retval < 0) {
+			pm_runtime_put_noidle(link->supplier);
+			return retval;
+		}
+		link->rpm_active = true;
+	}
+	return 0;
+}
+
+static void rpm_put_suppliers(struct device *dev)
+{
+	struct device_link *link;
+
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+		if (link->rpm_active &&
+		    READ_ONCE(link->status) != DL_STATE_SUPPLIER_UNBIND) {
+			pm_runtime_put(link->supplier);
+			link->rpm_active = false;
+		}
+}
+
 /**
  * __rpm_callback - Run a given runtime PM callback for a given device.
  * @cb: Runtime PM callback to run.
@@ -266,19 +304,55 @@ static int rpm_check_suspend_allowed(struct device *dev)
 static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
-	int retval;
+	int retval, idx;
 
-	if (dev->power.irq_safe)
+	if (dev->power.irq_safe) {
 		spin_unlock(&dev->power.lock);
-	else
+	} else {
 		spin_unlock_irq(&dev->power.lock);
 
+		/*
+		 * Resume suppliers if necessary.
+		 *
+		 * The device's runtime PM status cannot change until this
+		 * routine returns, so it is safe to read the status outside of
+		 * the lock.
+		 */
+		if (dev->power.runtime_status == RPM_RESUMING) {
+			idx = device_links_read_lock();
+
+			retval = rpm_get_suppliers(dev);
+			if (retval)
+				goto fail;
+
+			device_links_read_unlock(idx);
+		}
+	}
+
 	retval = cb(dev);
 
-	if (dev->power.irq_safe)
+	if (dev->power.irq_safe) {
 		spin_lock(&dev->power.lock);
-	else
+	} else {
+		/*
+		 * If the device is suspending and the callback has returned
+		 * success, drop the usage counters of the suppliers that have
+		 * been reference counted on its resume.
+		 *
+		 * Do that if resume fails too.
+		 */
+		if ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
+		    || (dev->power.runtime_status == RPM_RESUMING && retval)) {
+			idx = device_links_read_lock();
+
+ fail:
+			rpm_put_suppliers(dev);
+
+			device_links_read_unlock(idx);
+		}
+
 		spin_lock_irq(&dev->power.lock);
+	}
 
 	return retval;
 }
@@ -1446,6 +1520,79 @@ void pm_runtime_remove(struct device *dev)
 	pm_runtime_reinit(dev);
 }
 
+/**
+ * pm_runtime_clean_up_links - Prepare links to consumers for driver removal.
+ * @dev: Device whose driver is going to be removed.
+ *
+ * Check links from this device to any consumers and if any of them have active
+ * runtime PM references to the device, drop the usage counter of the device
+ * (once per link).
+ *
+ * Links with the DL_FLAG_STATELESS flag set are ignored.
+ *
+ * Since the device is guaranteed to be runtime-active at the point this is
+ * called, nothing else needs to be done here.
+ *
+ * Moreover, this is called after device_links_busy() has returned 'false', so
+ * the status of each link is guaranteed to be DL_STATE_SUPPLIER_UNBIND and
+ * therefore rpm_active can't be manipulated concurrently.
+ */
+void pm_runtime_clean_up_links(struct device *dev)
+{
+	struct device_link *link;
+	int idx;
+
+	idx = device_links_read_lock();
+
+	list_for_each_entry_rcu(link, &dev->links.consumers, s_node) {
+		if (link->flags & DL_FLAG_STATELESS)
+			continue;
+
+		if (link->rpm_active) {
+			pm_runtime_put_noidle(dev);
+			link->rpm_active = false;
+		}
+	}
+
+	device_links_read_unlock(idx);
+}
+
+/**
+ * pm_runtime_get_suppliers - Resume and reference-count supplier devices.
+ * @dev: Consumer device.
+ */
+void pm_runtime_get_suppliers(struct device *dev)
+{
+	struct device_link *link;
+	int idx;
+
+	idx = device_links_read_lock();
+
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+		if (link->flags & DL_FLAG_PM_RUNTIME)
+			pm_runtime_get_sync(link->supplier);
+
+	device_links_read_unlock(idx);
+}
+
+/**
+ * pm_runtime_put_suppliers - Drop references to supplier devices.
+ * @dev: Consumer device.
+ */
+void pm_runtime_put_suppliers(struct device *dev)
+{
+	struct device_link *link;
+	int idx;
+
+	idx = device_links_read_lock();
+
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+		if (link->flags & DL_FLAG_PM_RUNTIME)
+			pm_runtime_put(link->supplier);
+
+	device_links_read_unlock(idx);
+}
+
 /**
  * pm_runtime_force_suspend - Force a device into suspend state if needed.
  * @dev: Device to suspend.
diff --git a/include/linux/device.h b/include/linux/device.h
index 9cae2feaf5cb..49f453892ca5 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -730,9 +730,13 @@ enum device_link_state {
  *
  * STATELESS: The core won't track the presence of supplier/consumer drivers.
  * AUTOREMOVE: Remove this link automatically on consumer driver unbind.
+ * PM_RUNTIME: If set, the runtime PM framework will use this link.
+ * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation.
  */
 #define DL_FLAG_STATELESS	BIT(0)
 #define DL_FLAG_AUTOREMOVE	BIT(1)
+#define DL_FLAG_PM_RUNTIME	BIT(2)
+#define DL_FLAG_RPM_ACTIVE	BIT(3)
 
 /**
  * struct device_link - Device link representation.
@@ -742,6 +746,7 @@ enum device_link_state {
  * @c_node: Hook to the consumer device's list of links to suppliers.
  * @status: The state of the link (with respect to the presence of drivers).
  * @flags: Link flags.
+ * @rpm_active: Whether or not the consumer device is runtime-PM-active.
  * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks.
  */
 struct device_link {
@@ -751,6 +756,7 @@ struct device_link {
 	struct list_head c_node;
 	enum device_link_state status;
 	u32 flags;
+	bool rpm_active;
 #ifdef CONFIG_SRCU
 	struct rcu_head rcu_head;
 #endif
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 2e14d2667b6c..c2ee87138e4a 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -55,6 +55,9 @@ extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
 extern void pm_runtime_update_max_time_suspended(struct device *dev,
 						 s64 delta_ns);
 extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
+extern void pm_runtime_clean_up_links(struct device *dev);
+extern void pm_runtime_get_suppliers(struct device *dev);
+extern void pm_runtime_put_suppliers(struct device *dev);
 
 static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
 {
@@ -186,6 +189,9 @@ static inline unsigned long pm_runtime_autosuspend_expiration(
 				struct device *dev) { return 0; }
 static inline void pm_runtime_set_memalloc_noio(struct device *dev,
 						bool enable){}
+static inline void pm_runtime_clean_up_links(struct device *dev) {}
+static inline void pm_runtime_get_suppliers(struct device *dev) {}
+static inline void pm_runtime_put_suppliers(struct device *dev) {}
 
 #endif /* !CONFIG_PM */
 
-- 
cgit v1.2.3


From baa8809f60971d10220dfe79248f54b2b265f003 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 30 Oct 2016 17:32:43 +0100
Subject: PM / runtime: Optimize the use of device links

If the device has no links to suppliers that should be used for
runtime PM (links with DEVICE_LINK_PM_RUNTIME set), there is no
reason to walk the list of suppliers for that device during
runtime suspend and resume.

Add a simple mechanism to detect that case and possibly avoid the
extra unnecessary overhead.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c          | 20 +++++++++++++-------
 drivers/base/power/runtime.c | 23 ++++++++++++++++++++---
 include/linux/pm.h           |  1 +
 include/linux/pm_runtime.h   |  4 ++++
 4 files changed, 38 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 876c62b12462..d0c9df5cdd9e 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -205,14 +205,17 @@ struct device_link *device_link_add(struct device *consumer,
 	if (!link)
 		goto out;
 
-	if ((flags & DL_FLAG_PM_RUNTIME) && (flags & DL_FLAG_RPM_ACTIVE)) {
-		if (pm_runtime_get_sync(supplier) < 0) {
-			pm_runtime_put_noidle(supplier);
-			kfree(link);
-			link = NULL;
-			goto out;
+	if (flags & DL_FLAG_PM_RUNTIME) {
+		if (flags & DL_FLAG_RPM_ACTIVE) {
+			if (pm_runtime_get_sync(supplier) < 0) {
+				pm_runtime_put_noidle(supplier);
+				kfree(link);
+				link = NULL;
+				goto out;
+			}
+			link->rpm_active = true;
 		}
-		link->rpm_active = true;
+		pm_runtime_new_link(consumer);
 	}
 	get_device(supplier);
 	link->supplier = supplier;
@@ -296,6 +299,9 @@ static void __device_link_del(struct device_link *link)
 	dev_info(link->consumer, "Dropping the link to %s\n",
 		 dev_name(link->supplier));
 
+	if (link->flags & DL_FLAG_PM_RUNTIME)
+		pm_runtime_drop_link(link->consumer);
+
 	list_del_rcu(&link->s_node);
 	list_del_rcu(&link->c_node);
 	call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 462f90e952f8..ba7b4a8c07e5 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -305,6 +305,7 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
 	int retval, idx;
+	bool use_links = dev->power.links_count > 0;
 
 	if (dev->power.irq_safe) {
 		spin_unlock(&dev->power.lock);
@@ -318,7 +319,7 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 		 * routine returns, so it is safe to read the status outside of
 		 * the lock.
 		 */
-		if (dev->power.runtime_status == RPM_RESUMING) {
+		if (use_links && dev->power.runtime_status == RPM_RESUMING) {
 			idx = device_links_read_lock();
 
 			retval = rpm_get_suppliers(dev);
@@ -341,8 +342,9 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 		 *
 		 * Do that if resume fails too.
 		 */
-		if ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
-		    || (dev->power.runtime_status == RPM_RESUMING && retval)) {
+		if (use_links
+		    && ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
+		    || (dev->power.runtime_status == RPM_RESUMING && retval))) {
 			idx = device_links_read_lock();
 
  fail:
@@ -1593,6 +1595,21 @@ void pm_runtime_put_suppliers(struct device *dev)
 	device_links_read_unlock(idx);
 }
 
+void pm_runtime_new_link(struct device *dev)
+{
+	spin_lock_irq(&dev->power.lock);
+	dev->power.links_count++;
+	spin_unlock_irq(&dev->power.lock);
+}
+
+void pm_runtime_drop_link(struct device *dev)
+{
+	spin_lock_irq(&dev->power.lock);
+	WARN_ON(dev->power.links_count == 0);
+	dev->power.links_count--;
+	spin_unlock_irq(&dev->power.lock);
+}
+
 /**
  * pm_runtime_force_suspend - Force a device into suspend state if needed.
  * @dev: Device to suspend.
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 721a70241fcd..ccfe00ecc7e6 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -597,6 +597,7 @@ struct dev_pm_info {
 	unsigned int		use_autosuspend:1;
 	unsigned int		timer_autosuspends:1;
 	unsigned int		memalloc_noio:1;
+	unsigned int		links_count;
 	enum rpm_request	request;
 	enum rpm_status		runtime_status;
 	int			runtime_error;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index c2ee87138e4a..73814877537d 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -58,6 +58,8 @@ extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
 extern void pm_runtime_clean_up_links(struct device *dev);
 extern void pm_runtime_get_suppliers(struct device *dev);
 extern void pm_runtime_put_suppliers(struct device *dev);
+extern void pm_runtime_new_link(struct device *dev);
+extern void pm_runtime_drop_link(struct device *dev);
 
 static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
 {
@@ -192,6 +194,8 @@ static inline void pm_runtime_set_memalloc_noio(struct device *dev,
 static inline void pm_runtime_clean_up_links(struct device *dev) {}
 static inline void pm_runtime_get_suppliers(struct device *dev) {}
 static inline void pm_runtime_put_suppliers(struct device *dev) {}
+static inline void pm_runtime_new_link(struct device *dev) {}
+static inline void pm_runtime_drop_link(struct device *dev) {}
 
 #endif /* !CONFIG_PM */
 
-- 
cgit v1.2.3


From 9cf1f6a8c4cbb7836b838b51b3b02ddf32c6c6a0 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 28 Oct 2016 11:43:20 -0400
Subject: net: Move functions for configuring traffic classes out of inline
 headers

The functions for configuring the traffic class to queue mappings have
other effects that need to be addressed.  Instead of trying to export a
bunch of new functions just relocate the functions so that we can
instrument them directly with the functionality they will need.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 31 +++----------------------------
 net/core/dev.c            | 29 +++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 20ce8df115ac..e05ab3bd48d2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1920,34 +1920,9 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
 	return 0;
 }
 
-static inline
-void netdev_reset_tc(struct net_device *dev)
-{
-	dev->num_tc = 0;
-	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
-	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
-}
-
-static inline
-int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
-{
-	if (tc >= dev->num_tc)
-		return -EINVAL;
-
-	dev->tc_to_txq[tc].count = count;
-	dev->tc_to_txq[tc].offset = offset;
-	return 0;
-}
-
-static inline
-int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
-{
-	if (num_tc > TC_MAX_QUEUE)
-		return -EINVAL;
-
-	dev->num_tc = num_tc;
-	return 0;
-}
+void netdev_reset_tc(struct net_device *dev);
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset);
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc);
 
 static inline
 int netdev_get_num_tc(struct net_device *dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 8341dadf5e94..2d54be912136 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2173,6 +2173,35 @@ error:
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
+void netdev_reset_tc(struct net_device *dev)
+{
+	dev->num_tc = 0;
+	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
+	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+}
+EXPORT_SYMBOL(netdev_reset_tc);
+
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
+{
+	if (tc >= dev->num_tc)
+		return -EINVAL;
+
+	dev->tc_to_txq[tc].count = count;
+	dev->tc_to_txq[tc].offset = offset;
+	return 0;
+}
+EXPORT_SYMBOL(netdev_set_tc_queue);
+
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
+{
+	if (num_tc > TC_MAX_QUEUE)
+		return -EINVAL;
+
+	dev->num_tc = num_tc;
+	return 0;
+}
+EXPORT_SYMBOL(netdev_set_num_tc);
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
-- 
cgit v1.2.3


From 8d059b0f6f5b1d3acf829454e1087818ad660058 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 28 Oct 2016 11:43:49 -0400
Subject: net: Add sysfs value to determine queue traffic class

Add a sysfs attribute for a Tx queue that allows us to determine the
traffic class for a given queue.  This will allow us to more easily
determine this in the future.  It is needed as XPS will take the traffic
class for a group of queues into account in order to avoid pulling traffic
from one traffic class into another.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 17 +++++++++++++++++
 net/core/net-sysfs.c      | 20 +++++++++++++++++++-
 3 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e05ab3bd48d2..d91a41860614 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1920,6 +1920,7 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
 	return 0;
 }
 
+int netdev_txq_to_tc(struct net_device *dev, unsigned int txq);
 void netdev_reset_tc(struct net_device *dev);
 int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset);
 int netdev_set_num_tc(struct net_device *dev, u8 num_tc);
diff --git a/net/core/dev.c b/net/core/dev.c
index 2d54be912136..db0fdbbcd9b8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1948,6 +1948,23 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
 	}
 }
 
+int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
+{
+	if (dev->num_tc) {
+		struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+		int i;
+
+		for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
+			if ((txq - tc->offset) < tc->count)
+				return i;
+		}
+
+		return -1;
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_XPS
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)		\
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d4fe28606ff5..38bd9b933195 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1024,7 +1024,6 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
 	return sprintf(buf, "%lu", trans_timeout);
 }
 
-#ifdef CONFIG_XPS
 static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 {
 	struct net_device *dev = queue->dev;
@@ -1036,6 +1035,21 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 	return i;
 }
 
+static ssize_t show_traffic_class(struct netdev_queue *queue,
+				  struct netdev_queue_attribute *attribute,
+				  char *buf)
+{
+	struct net_device *dev = queue->dev;
+	int index = get_netdev_queue_index(queue);
+	int tc = netdev_txq_to_tc(dev, index);
+
+	if (tc < 0)
+		return -EINVAL;
+
+	return sprintf(buf, "%u\n", tc);
+}
+
+#ifdef CONFIG_XPS
 static ssize_t show_tx_maxrate(struct netdev_queue *queue,
 			       struct netdev_queue_attribute *attribute,
 			       char *buf)
@@ -1078,6 +1092,9 @@ static struct netdev_queue_attribute queue_tx_maxrate =
 static struct netdev_queue_attribute queue_trans_timeout =
 	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
 
+static struct netdev_queue_attribute queue_traffic_class =
+	__ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL);
+
 #ifdef CONFIG_BQL
 /*
  * Byte queue limits sysfs structures and functions.
@@ -1263,6 +1280,7 @@ static struct netdev_queue_attribute xps_cpus_attribute =
 
 static struct attribute *netdev_queue_default_attrs[] = {
 	&queue_trans_timeout.attr,
+	&queue_traffic_class.attr,
 #ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
 	&queue_tx_maxrate.attr,
-- 
cgit v1.2.3


From 184c449f91fef521042970cca46bd5cdfc0e3a37 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 28 Oct 2016 11:50:13 -0400
Subject: net: Add support for XPS with QoS via traffic classes

This patch adds support for setting and using XPS when QoS via traffic
classes is enabled.  With this change we will factor in the priority and
traffic class mapping of the packet and use that information to correctly
select the queue.

This allows us to define a set of queues for a given traffic class via
mqprio and then configure the XPS mapping for those queues so that the
traffic flows can avoid head-of-line blocking between the individual CPUs
if so desired.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   4 +-
 net/core/dev.c            | 117 ++++++++++++++++++++++++++++++++--------------
 net/core/net-sysfs.c      |  31 +++++++-----
 3 files changed, 105 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d91a41860614..66fd61c681d9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -732,8 +732,8 @@ struct xps_dev_maps {
 	struct rcu_head rcu;
 	struct xps_map __rcu *cpu_map[0];
 };
-#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
-    (nr_cpu_ids * sizeof(struct xps_map *)))
+#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +		\
+	(nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
 #endif /* CONFIG_XPS */
 
 #define TC_MAX_QUEUE	16
diff --git a/net/core/dev.c b/net/core/dev.c
index 108a6adce185..f23e28668f32 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2002,14 +2002,22 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 				 struct xps_dev_maps *dev_maps,
 				 int cpu, u16 offset, u16 count)
 {
-	int i, j;
+	int num_tc = dev->num_tc ? : 1;
+	bool active = false;
+	int tci;
 
-	for (i = count, j = offset; i--; j++) {
-		if (!remove_xps_queue(dev_maps, cpu, j))
-			break;
+	for (tci = cpu * num_tc; num_tc--; tci++) {
+		int i, j;
+
+		for (i = count, j = offset; i--; j++) {
+			if (!remove_xps_queue(dev_maps, cpu, j))
+				break;
+		}
+
+		active |= i < 0;
 	}
 
-	return i < 0;
+	return active;
 }
 
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
@@ -2086,20 +2094,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index)
 {
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+	int i, cpu, tci, numa_node_id = -2;
+	int maps_sz, num_tc = 1, tc = 0;
 	struct xps_map *map, *new_map;
-	int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
-	int cpu, numa_node_id = -2;
 	bool active = false;
 
+	if (dev->num_tc) {
+		num_tc = dev->num_tc;
+		tc = netdev_txq_to_tc(dev, index);
+		if (tc < 0)
+			return -EINVAL;
+	}
+
+	maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
+	if (maps_sz < L1_CACHE_BYTES)
+		maps_sz = L1_CACHE_BYTES;
+
 	mutex_lock(&xps_map_mutex);
 
 	dev_maps = xmap_dereference(dev->xps_maps);
 
 	/* allocate memory for queue storage */
-	for_each_online_cpu(cpu) {
-		if (!cpumask_test_cpu(cpu, mask))
-			continue;
-
+	for_each_cpu_and(cpu, cpu_online_mask, mask) {
 		if (!new_dev_maps)
 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
@@ -2107,25 +2123,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -ENOMEM;
 		}
 
-		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+		tci = cpu * num_tc + tc;
+		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
 				 NULL;
 
 		map = expand_xps_map(map, cpu, index);
 		if (!map)
 			goto error;
 
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+		RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
 	}
 
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 
 	for_each_possible_cpu(cpu) {
+		/* copy maps belonging to foreign traffic classes */
+		for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+			/* fill in the new device map from the old device map */
+			map = xmap_dereference(dev_maps->cpu_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+		}
+
+		/* We need to explicitly update tci as prevous loop
+		 * could break out early if dev_maps is NULL.
+		 */
+		tci = cpu * num_tc + tc;
+
 		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
 			/* add queue to CPU maps */
 			int pos = 0;
 
-			map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+			map = xmap_dereference(new_dev_maps->cpu_map[tci]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
 
@@ -2139,26 +2168,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 #endif
 		} else if (dev_maps) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[cpu]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+			map = xmap_dereference(dev_maps->cpu_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
 		}
 
+		/* copy maps belonging to foreign traffic classes */
+		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
+			/* fill in the new device map from the old device map */
+			map = xmap_dereference(dev_maps->cpu_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+		}
 	}
 
 	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
 
 	/* Cleanup old maps */
-	if (dev_maps) {
-		for_each_possible_cpu(cpu) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
-			map = xmap_dereference(dev_maps->cpu_map[cpu]);
+	if (!dev_maps)
+		goto out_no_old_maps;
+
+	for_each_possible_cpu(cpu) {
+		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+			map = xmap_dereference(dev_maps->cpu_map[tci]);
 			if (map && map != new_map)
 				kfree_rcu(map, rcu);
 		}
-
-		kfree_rcu(dev_maps, rcu);
 	}
 
+	kfree_rcu(dev_maps, rcu);
+
+out_no_old_maps:
 	dev_maps = new_dev_maps;
 	active = true;
 
@@ -2173,11 +2212,12 @@ out_no_new_maps:
 
 	/* removes queue from unused CPUs */
 	for_each_possible_cpu(cpu) {
-		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
-			continue;
-
-		if (remove_xps_queue(dev_maps, cpu, index))
-			active = true;
+		for (i = tc, tci = cpu * num_tc; i--; tci++)
+			active |= remove_xps_queue(dev_maps, tci, index);
+		if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+			active |= remove_xps_queue(dev_maps, tci, index);
+		for (i = num_tc - tc, tci++; --i; tci++)
+			active |= remove_xps_queue(dev_maps, tci, index);
 	}
 
 	/* free map if not active */
@@ -2193,11 +2233,14 @@ out_no_maps:
 error:
 	/* remove any maps that we added */
 	for_each_possible_cpu(cpu) {
-		new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
-		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
-				 NULL;
-		if (new_map && new_map != map)
-			kfree(new_map);
+		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+			map = dev_maps ?
+			      xmap_dereference(dev_maps->cpu_map[tci]) :
+			      NULL;
+			if (new_map && new_map != map)
+				kfree(new_map);
+		}
 	}
 
 	mutex_unlock(&xps_map_mutex);
@@ -3158,8 +3201,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_maps);
 	if (dev_maps) {
-		map = rcu_dereference(
-		    dev_maps->cpu_map[skb->sender_cpu - 1]);
+		unsigned int tci = skb->sender_cpu - 1;
+
+		if (dev->num_tc) {
+			tci *= dev->num_tc;
+			tci += netdev_get_prio_tc_map(dev, skb->priority);
+		}
+
+		map = rcu_dereference(dev_maps->cpu_map[tci]);
 		if (map) {
 			if (map->len == 1)
 				queue_index = map->queues[0];
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 38bd9b933195..b0c04cf4851d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1210,29 +1210,38 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 			    struct netdev_queue_attribute *attribute, char *buf)
 {
 	struct net_device *dev = queue->dev;
+	int cpu, len, num_tc = 1, tc = 0;
 	struct xps_dev_maps *dev_maps;
 	cpumask_var_t mask;
 	unsigned long index;
-	int i, len;
 
 	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
 		return -ENOMEM;
 
 	index = get_netdev_queue_index(queue);
 
+	if (dev->num_tc) {
+		num_tc = dev->num_tc;
+		tc = netdev_txq_to_tc(dev, index);
+		if (tc < 0)
+			return -EINVAL;
+	}
+
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_maps);
 	if (dev_maps) {
-		for_each_possible_cpu(i) {
-			struct xps_map *map =
-			    rcu_dereference(dev_maps->cpu_map[i]);
-			if (map) {
-				int j;
-				for (j = 0; j < map->len; j++) {
-					if (map->queues[j] == index) {
-						cpumask_set_cpu(i, mask);
-						break;
-					}
+		for_each_possible_cpu(cpu) {
+			int i, tci = cpu * num_tc + tc;
+			struct xps_map *map;
+
+			map = rcu_dereference(dev_maps->cpu_map[tci]);
+			if (!map)
+				continue;
+
+			for (i = map->len; i--;) {
+				if (map->queues[i] == index) {
+					cpumask_set_cpu(cpu, mask);
+					break;
 				}
 			}
 		}
-- 
cgit v1.2.3


From 0fefbfbaad298162737d5418eb85065879f99b3e Mon Sep 17 00:00:00 2001
From: Sudarsana Kalluru <Sudarsana.Kalluru@cavium.com>
Date: Mon, 31 Oct 2016 07:14:21 +0200
Subject: qed*: Management firmware - notifications and defaults

Management firmware is interested in various tidbits about
the driver - including the driver state & several configuration
related fields [MTU, primtary MAC, etc.].
This adds the necessray logic to update MFW with such configurations,
some of which are passed directly via qed while for others APIs
are provide so that qede would be able to later configure if needed.

This also introduces a new default configuration for MTU which would
replace the default inherited by being an ethernet device.

Signed-off-by: Sudarsana Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h           |   1 +
 drivers/net/ethernet/qlogic/qed/qed_dev.c       |  52 +++++++-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h       |  59 ++++++++-
 drivers/net/ethernet/qlogic/qed/qed_main.c      |  75 +++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c       | 163 ++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h       | 102 +++++++++++++++
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c |   2 +
 drivers/net/ethernet/qlogic/qede/qede_main.c    |   8 ++
 include/linux/qed/qed_if.h                      |  28 ++++
 9 files changed, 487 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 653bb5735f0c..f20243c1fb0b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -226,6 +226,7 @@ struct qed_hw_info {
 	u32				port_mode;
 	u32				hw_mode;
 	unsigned long		device_capabilities;
+	u16				mtu;
 };
 
 struct qed_hw_cid_data {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index edae5fc5fccd..33fd69e24bae 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1057,8 +1057,10 @@ int qed_hw_init(struct qed_dev *cdev,
 		bool allow_npar_tx_switch,
 		const u8 *bin_fw_data)
 {
-	u32 load_code, param;
-	int rc, mfw_rc, i;
+	u32 load_code, param, drv_mb_param;
+	bool b_default_mtu = true;
+	struct qed_hwfn *p_hwfn;
+	int rc = 0, mfw_rc, i;
 
 	if ((int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
 		DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n");
@@ -1074,6 +1076,12 @@ int qed_hw_init(struct qed_dev *cdev,
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
+		/* If management didn't provide a default, set one of our own */
+		if (!p_hwfn->hw_info.mtu) {
+			p_hwfn->hw_info.mtu = 1500;
+			b_default_mtu = false;
+		}
+
 		if (IS_VF(cdev)) {
 			p_hwfn->b_int_enabled = 1;
 			continue;
@@ -1157,6 +1165,38 @@ int qed_hw_init(struct qed_dev *cdev,
 		p_hwfn->hw_init_done = true;
 	}
 
+	if (IS_PF(cdev)) {
+		p_hwfn = QED_LEADING_HWFN(cdev);
+		drv_mb_param = (FW_MAJOR_VERSION << 24) |
+			       (FW_MINOR_VERSION << 16) |
+			       (FW_REVISION_VERSION << 8) |
+			       (FW_ENGINEERING_VERSION);
+		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				 DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER,
+				 drv_mb_param, &load_code, &param);
+		if (rc)
+			DP_INFO(p_hwfn, "Failed to update firmware version\n");
+
+		if (!b_default_mtu) {
+			rc = qed_mcp_ov_update_mtu(p_hwfn, p_hwfn->p_main_ptt,
+						   p_hwfn->hw_info.mtu);
+			if (rc)
+				DP_INFO(p_hwfn,
+					"Failed to update default mtu\n");
+		}
+
+		rc = qed_mcp_ov_update_driver_state(p_hwfn,
+						    p_hwfn->p_main_ptt,
+						  QED_OV_DRIVER_STATE_DISABLED);
+		if (rc)
+			DP_INFO(p_hwfn, "Failed to update driver state\n");
+
+		rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt,
+					       QED_OV_ESWITCH_VEB);
+		if (rc)
+			DP_INFO(p_hwfn, "Failed to update eswitch mode\n");
+	}
+
 	return 0;
 }
 
@@ -1801,6 +1841,9 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 
 	qed_get_num_funcs(p_hwfn, p_ptt);
 
+	if (qed_mcp_is_init(p_hwfn))
+		p_hwfn->hw_info.mtu = p_hwfn->mcp_info->func_info.mtu;
+
 	return qed_hw_get_resc(p_hwfn);
 }
 
@@ -1975,8 +2018,13 @@ int qed_hw_prepare(struct qed_dev *cdev,
 
 void qed_hw_remove(struct qed_dev *cdev)
 {
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	int i;
 
+	if (IS_PF(cdev))
+		qed_mcp_ov_update_driver_state(p_hwfn, p_hwfn->p_main_ptt,
+					       QED_OV_DRIVER_STATE_NOT_LOADED);
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 72eee29c677f..36de87a1befa 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -8564,6 +8564,15 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_INIT_PHY			0x22000000
 #define DRV_MSG_CODE_LINK_RESET			0x23000000
 #define DRV_MSG_CODE_SET_DCBX			0x25000000
+#define DRV_MSG_CODE_OV_UPDATE_CURR_CFG         0x26000000
+#define DRV_MSG_CODE_OV_UPDATE_BUS_NUM          0x27000000
+#define DRV_MSG_CODE_OV_UPDATE_BOOT_PROGRESS    0x28000000
+#define DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER     0x29000000
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE     0x31000000
+#define DRV_MSG_CODE_BW_UPDATE_ACK              0x32000000
+#define DRV_MSG_CODE_OV_UPDATE_MTU              0x33000000
+#define DRV_MSG_CODE_OV_UPDATE_WOL              0x38000000
+#define DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE     0x39000000
 
 #define DRV_MSG_CODE_BW_UPDATE_ACK		0x32000000
 #define DRV_MSG_CODE_NIG_DRAIN			0x30000000
@@ -8574,6 +8583,13 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_MCP_RESET			0x00090000
 #define DRV_MSG_CODE_SET_VERSION		0x000f0000
 #define DRV_MSG_CODE_MCP_HALT                   0x00100000
+#define DRV_MSG_CODE_SET_VMAC                   0x00110000
+#define DRV_MSG_CODE_GET_VMAC                   0x00120000
+#define DRV_MSG_CODE_VMAC_TYPE_SHIFT            4
+#define DRV_MSG_CODE_VMAC_TYPE_MASK             0x30
+#define DRV_MSG_CODE_VMAC_TYPE_MAC              1
+#define DRV_MSG_CODE_VMAC_TYPE_WWNN             2
+#define DRV_MSG_CODE_VMAC_TYPE_WWPN             3
 
 #define DRV_MSG_CODE_GET_STATS                  0x00130000
 #define DRV_MSG_CODE_STATS_TYPE_LAN             1
@@ -8589,7 +8605,10 @@ struct public_drv_mb {
 #define DRV_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
 	u32 drv_mb_param;
-#define DRV_MB_PARAM_UNLOAD_WOL_MCP		0x00000001
+#define DRV_MB_PARAM_UNLOAD_WOL_UNKNOWN         0x00000000
+#define DRV_MB_PARAM_UNLOAD_WOL_MCP             0x00000001
+#define DRV_MB_PARAM_UNLOAD_WOL_DISABLED        0x00000002
+#define DRV_MB_PARAM_UNLOAD_WOL_ENABLED         0x00000003
 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK		0x000000FF
 #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT		3
 
@@ -8602,6 +8621,44 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_LLDP_SEND_MASK		0x00000001
 #define DRV_MB_PARAM_LLDP_SEND_SHIFT		0
 
+#define DRV_MB_PARAM_OV_CURR_CFG_SHIFT		0
+#define DRV_MB_PARAM_OV_CURR_CFG_MASK		0x0000000F
+#define DRV_MB_PARAM_OV_CURR_CFG_NONE		0
+#define DRV_MB_PARAM_OV_CURR_CFG_OS		1
+#define DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC	2
+#define DRV_MB_PARAM_OV_CURR_CFG_OTHER		3
+
+#define DRV_MB_PARAM_OV_STORM_FW_VER_SHIFT	0
+#define DRV_MB_PARAM_OV_STORM_FW_VER_MASK	0xFFFFFFFF
+#define DRV_MB_PARAM_OV_STORM_FW_VER_MAJOR_MASK	0xFF000000
+#define DRV_MB_PARAM_OV_STORM_FW_VER_MINOR_MASK	0x00FF0000
+#define DRV_MB_PARAM_OV_STORM_FW_VER_BUILD_MASK	0x0000FF00
+#define DRV_MB_PARAM_OV_STORM_FW_VER_DROP_MASK	0x000000FF
+
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_SHIFT	0
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_MASK	0xF
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_UNKNOWN	0x1
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED	0x2
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_LOADING	0x3
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED	0x4
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE	0x5
+
+#define DRV_MB_PARAM_OV_MTU_SIZE_SHIFT	0
+#define DRV_MB_PARAM_OV_MTU_SIZE_MASK	0xFFFFFFFF
+
+#define DRV_MB_PARAM_WOL_MASK	(DRV_MB_PARAM_WOL_DEFAULT | \
+				 DRV_MB_PARAM_WOL_DISABLED | \
+				 DRV_MB_PARAM_WOL_ENABLED)
+#define DRV_MB_PARAM_WOL_DEFAULT	DRV_MB_PARAM_UNLOAD_WOL_MCP
+#define DRV_MB_PARAM_WOL_DISABLED	DRV_MB_PARAM_UNLOAD_WOL_DISABLED
+#define DRV_MB_PARAM_WOL_ENABLED	DRV_MB_PARAM_UNLOAD_WOL_ENABLED
+
+#define DRV_MB_PARAM_ESWITCH_MODE_MASK	(DRV_MB_PARAM_ESWITCH_MODE_NONE | \
+					 DRV_MB_PARAM_ESWITCH_MODE_VEB | \
+					 DRV_MB_PARAM_ESWITCH_MODE_VEPA)
+#define DRV_MB_PARAM_ESWITCH_MODE_NONE	0x0
+#define DRV_MB_PARAM_ESWITCH_MODE_VEB	0x1
+#define DRV_MB_PARAM_ESWITCH_MODE_VEPA	0x2
 
 #define DRV_MB_PARAM_SET_LED_MODE_OPER		0x0
 #define DRV_MB_PARAM_SET_LED_MODE_ON		0x1
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index c418360ba02a..d9fa52a22667 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -243,6 +243,8 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 				    &dev_info->mfw_rev, NULL);
 	}
 
+	dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu;
+
 	return 0;
 }
 
@@ -1431,6 +1433,76 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
 	return status;
 }
 
+static int qed_update_drv_state(struct qed_dev *cdev, bool active)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int status = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	status = qed_mcp_ov_update_driver_state(hwfn, ptt, active ?
+						QED_OV_DRIVER_STATE_ACTIVE :
+						QED_OV_DRIVER_STATE_DISABLED);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return status;
+}
+
+static int qed_update_mac(struct qed_dev *cdev, u8 *mac)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int status = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	status = qed_mcp_ov_update_mac(hwfn, ptt, mac);
+	if (status)
+		goto out;
+
+	status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV);
+
+out:
+	qed_ptt_release(hwfn, ptt);
+	return status;
+}
+
+static int qed_update_mtu(struct qed_dev *cdev, u16 mtu)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int status = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	status = qed_mcp_ov_update_mtu(hwfn, ptt, mtu);
+	if (status)
+		goto out;
+
+	status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV);
+
+out:
+	qed_ptt_release(hwfn, ptt);
+	return status;
+}
+
 static struct qed_selftest_ops qed_selftest_ops_pass = {
 	.selftest_memory = &qed_selftest_memory,
 	.selftest_interrupt = &qed_selftest_interrupt,
@@ -1465,6 +1537,9 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.get_coalesce = &qed_get_coalesce,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
+	.update_drv_state = &qed_update_drv_state,
+	.update_mac = &qed_update_mac,
+	.update_mtu = &qed_update_mtu,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index bdc9ba92f6d4..98dc913fd76d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/etherdevice.h>
 #include "qed.h"
 #include "qed_dcbx.h"
 #include "qed_hsi.h"
@@ -1068,6 +1069,8 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 
 	info->ovlan = (u16)(shmem_info.ovlan_stag & FUNC_MF_CFG_OV_STAG_MASK);
 
+	info->mtu = (u16)shmem_info.mtu_size;
+
 	DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_IFUP),
 		   "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x\n",
 		info->pause_on_host, info->protocol,
@@ -1223,6 +1226,166 @@ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
 }
 
+int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     enum qed_ov_client client)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	switch (client) {
+	case QED_OV_CLIENT_DRV:
+		drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OS;
+		break;
+	case QED_OV_CLIENT_USER:
+		drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OTHER;
+		break;
+	case QED_OV_CLIENT_VENDOR_SPEC:
+		drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Invalid client type %d\n", client);
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_CURR_CFG,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+
+	return rc;
+}
+
+int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   enum qed_ov_driver_state drv_state)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	switch (drv_state) {
+	case QED_OV_DRIVER_STATE_NOT_LOADED:
+		drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED;
+		break;
+	case QED_OV_DRIVER_STATE_DISABLED:
+		drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED;
+		break;
+	case QED_OV_DRIVER_STATE_ACTIVE:
+		drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Invalid driver state %d\n", drv_state);
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send driver state\n");
+
+	return rc;
+}
+
+int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u16 mtu)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	drv_mb_param = (u32)mtu << DRV_MB_PARAM_OV_MTU_SIZE_SHIFT;
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_MTU,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send mtu value, rc = %d\n", rc);
+
+	return rc;
+}
+
+int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u8 *mac)
+{
+	struct qed_mcp_mb_params mb_params;
+	union drv_union_data union_data;
+	int rc;
+
+	memset(&mb_params, 0, sizeof(mb_params));
+	mb_params.cmd = DRV_MSG_CODE_SET_VMAC;
+	mb_params.param = DRV_MSG_CODE_VMAC_TYPE_MAC <<
+			  DRV_MSG_CODE_VMAC_TYPE_SHIFT;
+	mb_params.param |= MCP_PF_ID(p_hwfn);
+	ether_addr_copy(&union_data.raw_data[0], mac);
+	mb_params.p_data_src = &union_data;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send mac address, rc = %d\n", rc);
+
+	return rc;
+}
+
+int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, enum qed_ov_wol wol)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	switch (wol) {
+	case QED_OV_WOL_DEFAULT:
+		drv_mb_param = DRV_MB_PARAM_WOL_DEFAULT;
+		break;
+	case QED_OV_WOL_DISABLED:
+		drv_mb_param = DRV_MB_PARAM_WOL_DISABLED;
+		break;
+	case QED_OV_WOL_ENABLED:
+		drv_mb_param = DRV_MB_PARAM_WOL_ENABLED;
+		break;
+	default:
+		DP_ERR(p_hwfn, "Invalid wol state %d\n", wol);
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_WOL,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send wol mode, rc = %d\n", rc);
+
+	return rc;
+}
+
+int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
+			      enum qed_ov_eswitch eswitch)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	switch (eswitch) {
+	case QED_OV_ESWITCH_NONE:
+		drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_NONE;
+		break;
+	case QED_OV_ESWITCH_VEB:
+		drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEB;
+		break;
+	case QED_OV_ESWITCH_VEPA:
+		drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEPA;
+		break;
+	default:
+		DP_ERR(p_hwfn, "Invalid eswitch mode %d\n", eswitch);
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send eswitch mode, rc = %d\n", rc);
+
+	return rc;
+}
+
 int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, enum qed_led_mode mode)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index dff520ed069b..89507190628d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -92,6 +92,8 @@ struct qed_mcp_function_info {
 
 #define QED_MCP_VLAN_UNSET              (0xffff)
 	u16				ovlan;
+
+	u16				mtu;
 };
 
 struct qed_mcp_nvm_common {
@@ -147,6 +149,30 @@ union qed_mcp_protocol_stats {
 	struct qed_mcp_rdma_stats rdma_stats;
 };
 
+enum qed_ov_eswitch {
+	QED_OV_ESWITCH_NONE,
+	QED_OV_ESWITCH_VEB,
+	QED_OV_ESWITCH_VEPA
+};
+
+enum qed_ov_client {
+	QED_OV_CLIENT_DRV,
+	QED_OV_CLIENT_USER,
+	QED_OV_CLIENT_VENDOR_SPEC
+};
+
+enum qed_ov_driver_state {
+	QED_OV_DRIVER_STATE_NOT_LOADED,
+	QED_OV_DRIVER_STATE_DISABLED,
+	QED_OV_DRIVER_STATE_ACTIVE
+};
+
+enum qed_ov_wol {
+	QED_OV_WOL_DEFAULT,
+	QED_OV_WOL_DISABLED,
+	QED_OV_WOL_ENABLED
+};
+
 /**
  * @brief - returns the link params of the hw function
  *
@@ -277,6 +303,69 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt,
 			 struct qed_mcp_drv_version *p_ver);
 
+/**
+ * @brief Notify MFW about the change in base device properties
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param client - qed client type
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     enum qed_ov_client client);
+
+/**
+ * @brief Notify MFW about the driver state
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param drv_state - Driver state
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   enum qed_ov_driver_state drv_state);
+
+/**
+ * @brief Send MTU size to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param mtu - MTU size
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u16 mtu);
+
+/**
+ * @brief Send MAC address to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param mac - MAC address
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u8 *mac);
+
+/**
+ * @brief Send WOL mode to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param wol - WOL mode
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  enum qed_ov_wol wol);
+
 /**
  * @brief Set LED status
  *
@@ -546,4 +635,17 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
 int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u32 mask_parities);
 
+/**
+ * @brief Send eswitch mode to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param eswitch - eswitch mode
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
+			      enum qed_ov_eswitch eswitch);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 0100f5c0a4ec..775fdaafd24d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -739,6 +739,8 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu)
 
 	qede_update_mtu(edev, &args);
 
+	edev->ops->common->update_mtu(edev->cdev, args.mtu);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 8488ad36a2b8..df0bd0ce2b18 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -2396,6 +2396,8 @@ static void qede_init_ndev(struct qede_dev *edev)
 
 	/* Set network device HW mac */
 	ether_addr_copy(edev->ndev->dev_addr, edev->dev_info.common.hw_mac);
+
+	ndev->mtu = edev->dev_info.common.mtu;
 }
 
 /* This function converts from 32b param to two params of level and module
@@ -3751,6 +3753,8 @@ static int qede_open(struct net_device *ndev)
 
 	udp_tunnel_get_rx_info(ndev);
 
+	edev->ops->common->update_drv_state(edev->cdev, true);
+
 	return 0;
 }
 
@@ -3760,6 +3764,8 @@ static int qede_close(struct net_device *ndev)
 
 	qede_unload(edev, QEDE_UNLOAD_NORMAL);
 
+	edev->ops->common->update_drv_state(edev->cdev, false);
+
 	return 0;
 }
 
@@ -3820,6 +3826,8 @@ static int qede_set_mac_addr(struct net_device *ndev, void *p)
 	if (rc)
 		return rc;
 
+	edev->ops->common->update_mac(edev->cdev, addr->sa_data);
+
 	/* Add MAC filter according to the new unicast HW MAC address */
 	ether_addr_copy(edev->primary_mac, ndev->dev_addr);
 	return qede_set_ucast_rx_mac(edev, QED_FILTER_XCAST_TYPE_ADD,
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 8978a60371f4..5c909cd02764 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -267,6 +267,7 @@ struct qed_dev_info {
 	u8		mf_mode;
 	bool		tx_switching;
 	bool		rdma_supported;
+	u16		mtu;
 };
 
 enum qed_sb_type {
@@ -554,6 +555,33 @@ struct qed_common_ops {
  */
 	int (*set_led)(struct qed_dev *cdev,
 		       enum qed_led_mode mode);
+
+/**
+ * @brief update_drv_state - API to inform the change in the driver state.
+ *
+ * @param cdev
+ * @param active
+ *
+ */
+	int (*update_drv_state)(struct qed_dev *cdev, bool active);
+
+/**
+ * @brief update_mac - API to inform the change in the mac address
+ *
+ * @param cdev
+ * @param mac
+ *
+ */
+	int (*update_mac)(struct qed_dev *cdev, u8 *mac);
+
+/**
+ * @brief update_mtu - API to inform the change in the mtu
+ *
+ * @param cdev
+ * @param mtu
+ *
+ */
+	int (*update_mtu)(struct qed_dev *cdev, u16 mtu);
 };
 
 #define MASK_FIELD(_name, _value) \
-- 
cgit v1.2.3


From 7a4b21b7d1f0644456501e33d3917c9aaee76a75 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Mon, 31 Oct 2016 07:14:22 +0200
Subject: qed: Add nvram selftest

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h       |   4 +
 drivers/net/ethernet/qlogic/qed/qed_main.c      |   1 +
 drivers/net/ethernet/qlogic/qed/qed_mcp.c       |  94 ++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h       |  41 ++++++++++
 drivers/net/ethernet/qlogic/qed/qed_selftest.c  | 101 ++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_selftest.h  |  10 +++
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c |   7 ++
 include/linux/qed/qed_if.h                      |   9 +++
 8 files changed, 267 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 36de87a1befa..f7dfa2ec2d19 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -8666,6 +8666,8 @@ struct public_drv_mb {
 
 #define DRV_MB_PARAM_BIST_REGISTER_TEST		1
 #define DRV_MB_PARAM_BIST_CLOCK_TEST		2
+#define DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES	3
+#define DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX	4
 
 #define DRV_MB_PARAM_BIST_RC_UNKNOWN		0
 #define DRV_MB_PARAM_BIST_RC_PASSED		1
@@ -8674,6 +8676,8 @@ struct public_drv_mb {
 
 #define DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT	0
 #define DRV_MB_PARAM_BIST_TEST_INDEX_MASK	0x000000FF
+#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT	8
+#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_MASK		0x0000FF00
 
 	u32 fw_mb_header;
 #define FW_MSG_CODE_MASK			0xffff0000
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index d9fa52a22667..31f8e420c830 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1508,6 +1508,7 @@ static struct qed_selftest_ops qed_selftest_ops_pass = {
 	.selftest_interrupt = &qed_selftest_interrupt,
 	.selftest_register = &qed_selftest_register,
 	.selftest_clock = &qed_selftest_clock,
+	.selftest_nvram = &qed_selftest_nvram,
 };
 
 const struct qed_common_ops qed_common_ops_pass = {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 98dc913fd76d..8be61570ce6b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1434,6 +1434,52 @@ int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len)
+{
+	u32 bytes_left = len, offset = 0, bytes_to_copy, read_len = 0;
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	u32 resp = 0, resp_param = 0;
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EBUSY;
+
+	while (bytes_left > 0) {
+		bytes_to_copy = min_t(u32, bytes_left, MCP_DRV_NVM_BUF_LEN);
+
+		rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+					DRV_MSG_CODE_NVM_READ_NVRAM,
+					addr + offset +
+					(bytes_to_copy <<
+					 DRV_MB_PARAM_NVM_LEN_SHIFT),
+					&resp, &resp_param,
+					&read_len,
+					(u32 *)(p_buf + offset));
+
+		if (rc || (resp != FW_MSG_CODE_NVM_OK)) {
+			DP_NOTICE(cdev, "MCP command rc = %d\n", rc);
+			break;
+		}
+
+		/* This can be a lengthy process, and it's possible scheduler
+		 * isn't preemptable. Sleep a bit to prevent CPU hogging.
+		 */
+		if (bytes_left % 0x1000 <
+		    (bytes_left - read_len) % 0x1000)
+			usleep_range(1000, 2000);
+
+		offset += read_len;
+		bytes_left -= read_len;
+	}
+
+	cdev->mcp_nvm_resp = resp;
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 drv_mb_param = 0, rsp, param;
@@ -1475,3 +1521,51 @@ int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 	return rc;
 }
+
+int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
+					 struct qed_ptt *p_ptt,
+					 u32 *num_images)
+{
+	u32 drv_mb_param = 0, rsp;
+	int rc = 0;
+
+	drv_mb_param = (DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES <<
+			DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT);
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST,
+			 drv_mb_param, &rsp, num_images);
+	if (rc)
+		return rc;
+
+	if (((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK))
+		rc = -EINVAL;
+
+	return rc;
+}
+
+int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt,
+					struct bist_nvm_image_att *p_image_att,
+					u32 image_index)
+{
+	u32 buf_size = 0, param, resp = 0, resp_param = 0;
+	int rc;
+
+	param = DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX <<
+		DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT;
+	param |= image_index << DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT;
+
+	rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+				DRV_MSG_CODE_BIST_TEST, param,
+				&resp, &resp_param,
+				&buf_size,
+				(u32 *)p_image_att);
+	if (rc)
+		return rc;
+
+	if (((resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK) ||
+	    (p_image_att->return_code != 1))
+		rc = -EINVAL;
+
+	return rc;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 89507190628d..be8152d49de2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -379,6 +379,18 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt,
 		    enum qed_led_mode mode);
 
+/**
+ * @brief Read from nvm
+ *
+ *  @param cdev
+ *  @param addr - nvm offset
+ *  @param p_buf - nvm read buffer
+ *  @param len - buffer len
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len);
+
 /**
  * @brief Bist register test
  *
@@ -401,6 +413,35 @@ int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn,
 int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt);
 
+/**
+ * @brief Bist nvm test - get number of images
+ *
+ *  @param p_hwfn       - hw function
+ *  @param p_ptt        - PTT required for register access
+ *  @param num_images   - number of images if operation was
+ *			  successful. 0 if not.
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
+					 struct qed_ptt *p_ptt,
+					 u32 *num_images);
+
+/**
+ * @brief Bist nvm test - get image attributes by index
+ *
+ *  @param p_hwfn      - hw function
+ *  @param p_ptt       - PTT required for register access
+ *  @param p_image_att - Attributes of image
+ *  @param image_index - Index of image to get information for
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt,
+					struct bist_nvm_image_att *p_image_att,
+					u32 image_index);
+
 /* Using hwfn number (and not pf_num) is required since in CMT mode,
  * same pf_num may be used by two different hwfn
  * TODO - this shouldn't really be in .h file, but until all fields
diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
index 9b7678f26909..48bfaecaf6dc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
@@ -1,3 +1,4 @@
+#include <linux/crc32.h>
 #include "qed.h"
 #include "qed_dev_api.h"
 #include "qed_mcp.h"
@@ -75,3 +76,103 @@ int qed_selftest_clock(struct qed_dev *cdev)
 
 	return rc;
 }
+
+int qed_selftest_nvram(struct qed_dev *cdev)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u32 num_images, i, j, nvm_crc, calc_crc;
+	struct bist_nvm_image_att image_att;
+	u8 *buf = NULL;
+	__be32 val;
+	int rc;
+
+	if (!p_ptt) {
+		DP_ERR(p_hwfn, "failed to acquire ptt\n");
+		return -EBUSY;
+	}
+
+	/* Acquire from MFW the amount of available images */
+	rc = qed_mcp_bist_nvm_test_get_num_images(p_hwfn, p_ptt, &num_images);
+	if (rc || !num_images) {
+		DP_ERR(p_hwfn, "Failed getting number of images\n");
+		return -EINVAL;
+	}
+
+	/* Iterate over images and validate CRC */
+	for (i = 0; i < num_images; i++) {
+		/* This mailbox returns information about the image required for
+		 * reading it.
+		 */
+		rc = qed_mcp_bist_nvm_test_get_image_att(p_hwfn, p_ptt,
+							 &image_att, i);
+		if (rc) {
+			DP_ERR(p_hwfn,
+			       "Failed getting image index %d attributes\n",
+			       i);
+			goto err0;
+		}
+
+		/* After MFW crash dump is collected - the image's CRC stops
+		 * being valid.
+		 */
+		if (image_att.image_type == NVM_TYPE_MDUMP)
+			continue;
+
+		DP_VERBOSE(p_hwfn, QED_MSG_SP, "image index %d, size %x\n",
+			   i, image_att.len);
+
+		/* Allocate a buffer for holding the nvram image */
+		buf = kzalloc(image_att.len, GFP_KERNEL);
+		if (!buf) {
+			rc = -ENOMEM;
+			goto err0;
+		}
+
+		/* Read image into buffer */
+		rc = qed_mcp_nvm_read(p_hwfn->cdev, image_att.nvm_start_addr,
+				      buf, image_att.len);
+		if (rc) {
+			DP_ERR(p_hwfn,
+			       "Failed reading image index %d from nvm.\n", i);
+			goto err1;
+		}
+
+		/* Convert the buffer into big-endian format (excluding the
+		 * closing 4 bytes of CRC).
+		 */
+		for (j = 0; j < image_att.len - 4; j += 4) {
+			val = cpu_to_be32(*(u32 *)&buf[j]);
+			*(u32 *)&buf[j] = (__force u32)val;
+		}
+
+		/* Calc CRC for the "actual" image buffer, i.e. not including
+		 * the last 4 CRC bytes.
+		 */
+		nvm_crc = *(u32 *)(buf + image_att.len - 4);
+		calc_crc = crc32(0xffffffff, buf, image_att.len - 4);
+		calc_crc = (__force u32)~cpu_to_be32(calc_crc);
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "nvm crc 0x%x, calc_crc 0x%x\n", nvm_crc, calc_crc);
+
+		if (calc_crc != nvm_crc) {
+			rc = -EINVAL;
+			goto err1;
+		}
+
+		/* Done with this image; Free to prevent double release
+		 * on subsequent failure.
+		 */
+		kfree(buf);
+		buf = NULL;
+	}
+
+	qed_ptt_release(p_hwfn, p_ptt);
+	return 0;
+
+err1:
+	kfree(buf);
+err0:
+	qed_ptt_release(p_hwfn, p_ptt);
+	return rc;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.h b/drivers/net/ethernet/qlogic/qed/qed_selftest.h
index 50eb0b49950f..739ddb730967 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_selftest.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.h
@@ -37,4 +37,14 @@ int qed_selftest_register(struct qed_dev *cdev);
  * @return int
  */
 int qed_selftest_clock(struct qed_dev *cdev);
+
+/**
+ * @brief qed_selftest_nvram - Perform nvram test
+ *
+ * @param cdev
+ *
+ * @return int
+ */
+int qed_selftest_nvram(struct qed_dev *cdev);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 775fdaafd24d..a8094088b9ac 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -157,6 +157,7 @@ enum qede_ethtool_tests {
 	QEDE_ETHTOOL_MEMORY_TEST,
 	QEDE_ETHTOOL_REGISTER_TEST,
 	QEDE_ETHTOOL_CLOCK_TEST,
+	QEDE_ETHTOOL_NVRAM_TEST,
 	QEDE_ETHTOOL_TEST_MAX
 };
 
@@ -166,6 +167,7 @@ static const char qede_tests_str_arr[QEDE_ETHTOOL_TEST_MAX][ETH_GSTRING_LEN] = {
 	"Memory (online)\t\t",
 	"Register (online)\t",
 	"Clock (online)\t\t",
+	"Nvram (online)\t\t",
 };
 
 static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf)
@@ -1392,6 +1394,11 @@ static void qede_self_test(struct net_device *dev,
 		buf[QEDE_ETHTOOL_CLOCK_TEST] = 1;
 		etest->flags |= ETH_TEST_FL_FAILED;
 	}
+
+	if (edev->ops->common->selftest->selftest_nvram(edev->cdev)) {
+		buf[QEDE_ETHTOOL_NVRAM_TEST] = 1;
+		etest->flags |= ETH_TEST_FL_FAILED;
+	}
 }
 
 static int qede_set_tunable(struct net_device *dev,
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 5c909cd02764..ffc2d2f5e88f 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -402,6 +402,15 @@ struct qed_selftest_ops {
  * @return 0 on success, error otherwise.
  */
 	int (*selftest_clock)(struct qed_dev *cdev);
+
+/**
+ * @brief selftest_nvram - Perform nvram test
+ *
+ * @param cdev
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int (*selftest_nvram) (struct qed_dev *cdev);
 };
 
 struct qed_common_ops {
-- 
cgit v1.2.3


From 14d39648cbfc6289e3f873d30f282b9517ebe860 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Mon, 31 Oct 2016 07:14:23 +0200
Subject: qed*: Add support for WoL

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h           | 11 ++++-
 drivers/net/ethernet/qlogic/qed/qed_dev.c       | 19 ++++++++-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h       |  4 ++
 drivers/net/ethernet/qlogic/qed/qed_main.c      | 29 +++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c       | 56 ++++++++++++++++++++++++-
 drivers/net/ethernet/qlogic/qede/qede.h         |  2 +
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 41 ++++++++++++++++++
 drivers/net/ethernet/qlogic/qede/qede_main.c    |  9 ++++
 include/linux/qed/qed_if.h                      | 10 +++++
 9 files changed, 176 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index f20243c1fb0b..8828ffac4b23 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -195,6 +195,11 @@ enum qed_dev_cap {
 	QED_DEV_CAP_ROCE,
 };
 
+enum qed_wol_support {
+	QED_WOL_SUPPORT_NONE,
+	QED_WOL_SUPPORT_PME,
+};
+
 struct qed_hw_info {
 	/* PCI personality */
 	enum qed_pci_personality	personality;
@@ -227,6 +232,8 @@ struct qed_hw_info {
 	u32				hw_mode;
 	unsigned long		device_capabilities;
 	u16				mtu;
+
+	enum qed_wol_support b_wol_support;
 };
 
 struct qed_hw_cid_data {
@@ -539,7 +546,9 @@ struct qed_dev {
 	u8				mcp_rev;
 	u8				boot_mode;
 
-	u8				wol;
+	/* WoL related configurations */
+	u8 wol_config;
+	u8 wol_mac[ETH_ALEN];
 
 	u32				int_mode;
 	enum qed_coalescing_mode	int_coalescing_mode;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 33fd69e24bae..127ed5f27d8d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1364,8 +1364,24 @@ int qed_hw_reset(struct qed_dev *cdev)
 {
 	int rc = 0;
 	u32 unload_resp, unload_param;
+	u32 wol_param;
 	int i;
 
+	switch (cdev->wol_config) {
+	case QED_OV_WOL_DISABLED:
+		wol_param = DRV_MB_PARAM_UNLOAD_WOL_DISABLED;
+		break;
+	case QED_OV_WOL_ENABLED:
+		wol_param = DRV_MB_PARAM_UNLOAD_WOL_ENABLED;
+		break;
+	default:
+		DP_NOTICE(cdev,
+			  "Unknown WoL configuration %02x\n", cdev->wol_config);
+		/* Fallthrough */
+	case QED_OV_WOL_DEFAULT:
+		wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP;
+	}
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -1394,8 +1410,7 @@ int qed_hw_reset(struct qed_dev *cdev)
 
 		/* Send unload command to MCP */
 		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-				 DRV_MSG_CODE_UNLOAD_REQ,
-				 DRV_MB_PARAM_UNLOAD_WOL_MCP,
+				 DRV_MSG_CODE_UNLOAD_REQ, wol_param,
 				 &unload_resp, &unload_param);
 		if (rc) {
 			DP_NOTICE(p_hwfn, "qed_hw_reset: UNLOAD_REQ failed\n");
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index f7dfa2ec2d19..fdb7a099955b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -8601,6 +8601,7 @@ struct public_drv_mb {
 
 #define DRV_MSG_CODE_BIST_TEST			0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE		0x00200000
+#define DRV_MSG_CODE_OS_WOL			0x002e0000
 
 #define DRV_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
@@ -8697,6 +8698,9 @@ struct public_drv_mb {
 #define FW_MSG_CODE_NVM_OK			0x00010000
 #define FW_MSG_CODE_OK				0x00160000
 
+#define FW_MSG_CODE_OS_WOL_SUPPORTED            0x00800000
+#define FW_MSG_CODE_OS_WOL_NOT_SUPPORTED        0x00810000
+
 #define FW_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
 	u32 fw_mb_param;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 31f8e420c830..b71d73a41b10 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -221,6 +221,10 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 		dev_info->fw_eng = FW_ENGINEERING_VERSION;
 		dev_info->mf_mode = cdev->mf_mode;
 		dev_info->tx_switching = true;
+
+		if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support ==
+		    QED_WOL_SUPPORT_PME)
+			dev_info->wol_support = true;
 	} else {
 		qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major,
 				      &dev_info->fw_minor, &dev_info->fw_rev,
@@ -1433,6 +1437,30 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
 	return status;
 }
 
+static int qed_update_wol(struct qed_dev *cdev, bool enabled)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int rc = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	rc = qed_mcp_ov_update_wol(hwfn, ptt, enabled ? QED_OV_WOL_ENABLED
+				   : QED_OV_WOL_DISABLED);
+	if (rc)
+		goto out;
+	rc = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV);
+
+out:
+	qed_ptt_release(hwfn, ptt);
+	return rc;
+}
+
 static int qed_update_drv_state(struct qed_dev *cdev, bool active)
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
@@ -1541,6 +1569,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.update_drv_state = &qed_update_drv_state,
 	.update_mac = &qed_update_mac,
 	.update_mtu = &qed_update_mtu,
+	.update_wol = &qed_update_wol,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 8be61570ce6b..768b35b1dca0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -330,6 +330,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
 				 struct qed_mcp_mb_params *p_mb_params)
 {
 	u32 union_data_addr;
+
 	int rc;
 
 	/* MCP not initialized */
@@ -375,11 +376,32 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
 		u32 *o_mcp_param)
 {
 	struct qed_mcp_mb_params mb_params;
+	union drv_union_data data_src;
 	int rc;
 
 	memset(&mb_params, 0, sizeof(mb_params));
+	memset(&data_src, 0, sizeof(data_src));
 	mb_params.cmd = cmd;
 	mb_params.param = param;
+
+	/* In case of UNLOAD_DONE, set the primary MAC */
+	if ((cmd == DRV_MSG_CODE_UNLOAD_DONE) &&
+	    (p_hwfn->cdev->wol_config == QED_OV_WOL_ENABLED)) {
+		u8 *p_mac = p_hwfn->cdev->wol_mac;
+
+		data_src.wol_mac.mac_upper = p_mac[0] << 8 | p_mac[1];
+		data_src.wol_mac.mac_lower = p_mac[2] << 24 | p_mac[3] << 16 |
+					     p_mac[4] << 8 | p_mac[5];
+
+		DP_VERBOSE(p_hwfn,
+			   (QED_MSG_SP | NETIF_MSG_IFDOWN),
+			   "Setting WoL MAC: %pM --> [%08x,%08x]\n",
+			   p_mac, data_src.wol_mac.mac_upper,
+			   data_src.wol_mac.mac_lower);
+
+		mb_params.p_data_src = &data_src;
+	}
+
 	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
 	if (rc)
 		return rc;
@@ -1058,6 +1080,9 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 		info->mac[3] = (u8)(shmem_info.mac_lower >> 16);
 		info->mac[4] = (u8)(shmem_info.mac_lower >> 8);
 		info->mac[5] = (u8)(shmem_info.mac_lower);
+
+		/* Store primary MAC for later possible WoL */
+		memcpy(&p_hwfn->cdev->wol_mac, info->mac, ETH_ALEN);
 	} else {
 		DP_NOTICE(p_hwfn, "MAC is 0 in shmem\n");
 	}
@@ -1071,13 +1096,28 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 
 	info->mtu = (u16)shmem_info.mtu_size;
 
+	p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_NONE;
+	p_hwfn->cdev->wol_config = (u8)QED_OV_WOL_DEFAULT;
+	if (qed_mcp_is_init(p_hwfn)) {
+		u32 resp = 0, param = 0;
+		int rc;
+
+		rc = qed_mcp_cmd(p_hwfn, p_ptt,
+				 DRV_MSG_CODE_OS_WOL, 0, &resp, &param);
+		if (rc)
+			return rc;
+		if (resp == FW_MSG_CODE_OS_WOL_SUPPORTED)
+			p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_PME;
+	}
+
 	DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_IFUP),
-		   "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x\n",
+		   "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x wol %02x\n",
 		info->pause_on_host, info->protocol,
 		info->bandwidth_min, info->bandwidth_max,
 		info->mac[0], info->mac[1], info->mac[2],
 		info->mac[3], info->mac[4], info->mac[5],
-		info->wwn_port, info->wwn_node, info->ovlan);
+		info->wwn_port, info->wwn_node,
+		info->ovlan, (u8)p_hwfn->hw_info.b_wol_support);
 
 	return 0;
 }
@@ -1322,6 +1362,9 @@ int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
 	if (rc)
 		DP_ERR(p_hwfn, "Failed to send mac address, rc = %d\n", rc);
 
+	/* Store primary MAC for later possible WoL */
+	memcpy(p_hwfn->cdev->wol_mac, mac, ETH_ALEN);
+
 	return rc;
 }
 
@@ -1332,6 +1375,12 @@ int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
 	u32 drv_mb_param;
 	int rc;
 
+	if (p_hwfn->hw_info.b_wol_support == QED_WOL_SUPPORT_NONE) {
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "Can't change WoL configuration when WoL isn't supported\n");
+		return -EINVAL;
+	}
+
 	switch (wol) {
 	case QED_OV_WOL_DEFAULT:
 		drv_mb_param = DRV_MB_PARAM_WOL_DEFAULT;
@@ -1352,6 +1401,9 @@ int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
 	if (rc)
 		DP_ERR(p_hwfn, "Failed to send wol mode, rc = %d\n", rc);
 
+	/* Store the WoL update for a future unload */
+	p_hwfn->cdev->wol_config = (u8)wol;
+
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index cf8d3547aecf..0cba21bf9d5f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -193,6 +193,8 @@ struct qede_dev {
 	u16				vxlan_dst_port;
 	u16				geneve_dst_port;
 
+	bool wol_enabled;
+
 	struct qede_rdma_dev		rdma_info;
 };
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index a8094088b9ac..327c614e76aa 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -483,6 +483,45 @@ static void qede_get_drvinfo(struct net_device *ndev,
 	strlcpy(info->bus_info, pci_name(edev->pdev), sizeof(info->bus_info));
 }
 
+static void qede_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct qede_dev *edev = netdev_priv(ndev);
+
+	if (edev->dev_info.common.wol_support) {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = edev->wol_enabled ? WAKE_MAGIC : 0;
+	}
+}
+
+static int qede_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct qede_dev *edev = netdev_priv(ndev);
+	bool wol_requested;
+	int rc;
+
+	if (wol->wolopts & ~WAKE_MAGIC) {
+		DP_INFO(edev,
+			"Can't support WoL options other than magic-packet\n");
+		return -EINVAL;
+	}
+
+	wol_requested = !!(wol->wolopts & WAKE_MAGIC);
+	if (wol_requested == edev->wol_enabled)
+		return 0;
+
+	/* Need to actually change configuration */
+	if (!edev->dev_info.common.wol_support) {
+		DP_INFO(edev, "Device doesn't support WoL\n");
+		return -EINVAL;
+	}
+
+	rc = edev->ops->common->update_wol(edev->cdev, wol_requested);
+	if (!rc)
+		edev->wol_enabled = wol_requested;
+
+	return rc;
+}
+
 static u32 qede_get_msglevel(struct net_device *ndev)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
@@ -1449,6 +1488,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
 	.get_drvinfo = qede_get_drvinfo,
 	.get_regs_len = qede_get_regs_len,
 	.get_regs = qede_get_regs,
+	.get_wol = qede_get_wol,
+	.set_wol = qede_set_wol,
 	.get_msglevel = qede_get_msglevel,
 	.set_msglevel = qede_set_msglevel,
 	.nway_reset = qede_nway_reset,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index df0bd0ce2b18..873f2ebe249e 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -95,6 +95,7 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
 #define TX_TIMEOUT		(5 * HZ)
 
 static void qede_remove(struct pci_dev *pdev);
+static void qede_shutdown(struct pci_dev *pdev);
 static int qede_alloc_rx_buffer(struct qede_dev *edev,
 				struct qede_rx_queue *rxq);
 static void qede_link_update(void *dev, struct qed_link_output *link);
@@ -166,6 +167,7 @@ static struct pci_driver qede_pci_driver = {
 	.id_table = qede_pci_tbl,
 	.probe = qede_probe,
 	.remove = qede_remove,
+	.shutdown = qede_shutdown,
 #ifdef CONFIG_QED_SRIOV
 	.sriov_configure = qede_sriov_configure,
 #endif
@@ -2705,6 +2707,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 
 	/* Use global ops since we've freed edev */
 	qed_ops->common->slowpath_stop(cdev);
+	if (system_state == SYSTEM_POWER_OFF)
+		return;
 	qed_ops->common->remove(cdev);
 
 	dev_info(&pdev->dev, "Ending qede_remove successfully\n");
@@ -2715,6 +2719,11 @@ static void qede_remove(struct pci_dev *pdev)
 	__qede_remove(pdev, QEDE_REMOVE_NORMAL);
 }
 
+static void qede_shutdown(struct pci_dev *pdev)
+{
+	__qede_remove(pdev, QEDE_REMOVE_NORMAL);
+}
+
 /* -------------------------------------------------------------------------
  * START OF LOAD / UNLOAD
  * -------------------------------------------------------------------------
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index ffc2d2f5e88f..ea095b4893aa 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -268,6 +268,8 @@ struct qed_dev_info {
 	bool		tx_switching;
 	bool		rdma_supported;
 	u16		mtu;
+
+	bool wol_support;
 };
 
 enum qed_sb_type {
@@ -591,6 +593,14 @@ struct qed_common_ops {
  *
  */
 	int (*update_mtu)(struct qed_dev *cdev, u16 mtu);
+
+/**
+ * @brief update_wol - update of changes in the WoL configuration
+ *
+ * @param cdev
+ * @param enabled - true iff WoL should be enabled.
+ */
+	int (*update_wol) (struct qed_dev *cdev, bool enabled);
 };
 
 #define MASK_FIELD(_name, _value) \
-- 
cgit v1.2.3


From 2edbff8dcb5da324fd4c4fe953629e4f6ca73c99 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <Tomer.Tayar@cavium.com>
Date: Mon, 31 Oct 2016 07:14:27 +0200
Subject: qed: Learn resources from management firmware

Currently, each interfaces assumes it receives an equal portion
of HW/FW resources, but this is wasteful - different partitions
[and specifically, parititions exposing different protocol support]
might require different resources.

Implement a new resource learning scheme where the information is
received directly from the management firmware [which has knowledge
of all of the functions and can serve as arbiter].

Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h     |   6 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 291 ++++++++++++++++++++++++------
 drivers/net/ethernet/qlogic/qed/qed_hsi.h |  46 +++++
 drivers/net/ethernet/qlogic/qed/qed_l2.c  |   2 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c |  42 +++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h |  15 ++
 include/linux/qed/qed_eth_if.h            |   2 +-
 7 files changed, 341 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 6d3013fe6987..50b8a01ff512 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -154,7 +154,10 @@ struct qed_qm_iids {
 	u32 tids;
 };
 
-enum QED_RESOURCES {
+/* HW / FW resources, output of features supported below, most information
+ * is received from MFW.
+ */
+enum qed_resources {
 	QED_SB,
 	QED_L2_QUEUE,
 	QED_VPORT,
@@ -166,6 +169,7 @@ enum QED_RESOURCES {
 	QED_RDMA_CNQ_RAM,
 	QED_ILT,
 	QED_LL2_QUEUE,
+	QED_CMDQS_CQS,
 	QED_RDMA_STATS_QUEUE,
 	QED_MAX_RESC,
 };
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index d996afe833ee..5be7b8a25425 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1512,47 +1512,240 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 		   RESC_NUM(p_hwfn, QED_SB), num_features);
 }
 
-static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
+static enum resource_id_enum qed_hw_get_mfw_res_id(enum qed_resources res_id)
+{
+	enum resource_id_enum mfw_res_id = RESOURCE_NUM_INVALID;
+
+	switch (res_id) {
+	case QED_SB:
+		mfw_res_id = RESOURCE_NUM_SB_E;
+		break;
+	case QED_L2_QUEUE:
+		mfw_res_id = RESOURCE_NUM_L2_QUEUE_E;
+		break;
+	case QED_VPORT:
+		mfw_res_id = RESOURCE_NUM_VPORT_E;
+		break;
+	case QED_RSS_ENG:
+		mfw_res_id = RESOURCE_NUM_RSS_ENGINES_E;
+		break;
+	case QED_PQ:
+		mfw_res_id = RESOURCE_NUM_PQ_E;
+		break;
+	case QED_RL:
+		mfw_res_id = RESOURCE_NUM_RL_E;
+		break;
+	case QED_MAC:
+	case QED_VLAN:
+		/* Each VFC resource can accommodate both a MAC and a VLAN */
+		mfw_res_id = RESOURCE_VFC_FILTER_E;
+		break;
+	case QED_ILT:
+		mfw_res_id = RESOURCE_ILT_E;
+		break;
+	case QED_LL2_QUEUE:
+		mfw_res_id = RESOURCE_LL2_QUEUE_E;
+		break;
+	case QED_RDMA_CNQ_RAM:
+	case QED_CMDQS_CQS:
+		/* CNQ/CMDQS are the same resource */
+		mfw_res_id = RESOURCE_CQS_E;
+		break;
+	case QED_RDMA_STATS_QUEUE:
+		mfw_res_id = RESOURCE_RDMA_STATS_QUEUE_E;
+		break;
+	default:
+		break;
+	}
+
+	return mfw_res_id;
+}
+
+static u32 qed_hw_get_dflt_resc_num(struct qed_hwfn *p_hwfn,
+				    enum qed_resources res_id)
 {
-	u8 enabled_func_idx = p_hwfn->enabled_func_idx;
-	u32 *resc_start = p_hwfn->hw_info.resc_start;
 	u8 num_funcs = p_hwfn->num_funcs_on_engine;
-	u32 *resc_num = p_hwfn->hw_info.resc_num;
 	struct qed_sb_cnt_info sb_cnt_info;
-	int i, max_vf_vlan_filters;
+	u32 dflt_resc_num = 0;
 
-	memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+	switch (res_id) {
+	case QED_SB:
+		memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+		qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+		dflt_resc_num = sb_cnt_info.sb_cnt;
+		break;
+	case QED_L2_QUEUE:
+		dflt_resc_num = MAX_NUM_L2_QUEUES_BB / num_funcs;
+		break;
+	case QED_VPORT:
+		dflt_resc_num = MAX_NUM_VPORTS_BB / num_funcs;
+		break;
+	case QED_RSS_ENG:
+		dflt_resc_num = ETH_RSS_ENGINE_NUM_BB / num_funcs;
+		break;
+	case QED_PQ:
+		/* The granularity of the PQs is 8 */
+		dflt_resc_num = MAX_QM_TX_QUEUES_BB / num_funcs;
+		dflt_resc_num &= ~0x7;
+		break;
+	case QED_RL:
+		dflt_resc_num = MAX_QM_GLOBAL_RLS / num_funcs;
+		break;
+	case QED_MAC:
+	case QED_VLAN:
+		/* Each VFC resource can accommodate both a MAC and a VLAN */
+		dflt_resc_num = ETH_NUM_MAC_FILTERS / num_funcs;
+		break;
+	case QED_ILT:
+		dflt_resc_num = PXP_NUM_ILT_RECORDS_BB / num_funcs;
+		break;
+	case QED_LL2_QUEUE:
+		dflt_resc_num = MAX_NUM_LL2_RX_QUEUES / num_funcs;
+		break;
+	case QED_RDMA_CNQ_RAM:
+	case QED_CMDQS_CQS:
+		/* CNQ/CMDQS are the same resource */
+		dflt_resc_num = NUM_OF_CMDQS_CQS / num_funcs;
+		break;
+	case QED_RDMA_STATS_QUEUE:
+		dflt_resc_num = RDMA_NUM_STATISTIC_COUNTERS_BB / num_funcs;
+		break;
+	default:
+		break;
+	}
 
-#ifdef CONFIG_QED_SRIOV
-	max_vf_vlan_filters = QED_ETH_MAX_VF_NUM_VLAN_FILTERS;
-#else
-	max_vf_vlan_filters = 0;
-#endif
+	return dflt_resc_num;
+}
+
+static const char *qed_hw_get_resc_name(enum qed_resources res_id)
+{
+	switch (res_id) {
+	case QED_SB:
+		return "SB";
+	case QED_L2_QUEUE:
+		return "L2_QUEUE";
+	case QED_VPORT:
+		return "VPORT";
+	case QED_RSS_ENG:
+		return "RSS_ENG";
+	case QED_PQ:
+		return "PQ";
+	case QED_RL:
+		return "RL";
+	case QED_MAC:
+		return "MAC";
+	case QED_VLAN:
+		return "VLAN";
+	case QED_RDMA_CNQ_RAM:
+		return "RDMA_CNQ_RAM";
+	case QED_ILT:
+		return "ILT";
+	case QED_LL2_QUEUE:
+		return "LL2_QUEUE";
+	case QED_CMDQS_CQS:
+		return "CMDQS_CQS";
+	case QED_RDMA_STATS_QUEUE:
+		return "RDMA_STATS_QUEUE";
+	default:
+		return "UNKNOWN_RESOURCE";
+	}
+}
 
-	qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn,
+				enum qed_resources res_id)
+{
+	u32 dflt_resc_num = 0, dflt_resc_start = 0, mcp_resp, mcp_param;
+	u32 *p_resc_num, *p_resc_start;
+	struct resource_info resc_info;
+	int rc;
+
+	p_resc_num = &RESC_NUM(p_hwfn, res_id);
+	p_resc_start = &RESC_START(p_hwfn, res_id);
+
+	/* Default values assumes that each function received equal share */
+	dflt_resc_num = qed_hw_get_dflt_resc_num(p_hwfn, res_id);
+	if (!dflt_resc_num) {
+		DP_ERR(p_hwfn,
+		       "Failed to get default amount for resource %d [%s]\n",
+		       res_id, qed_hw_get_resc_name(res_id));
+		return -EINVAL;
+	}
+	dflt_resc_start = dflt_resc_num * p_hwfn->enabled_func_idx;
+
+	memset(&resc_info, 0, sizeof(resc_info));
+	resc_info.res_id = qed_hw_get_mfw_res_id(res_id);
+	if (resc_info.res_id == RESOURCE_NUM_INVALID) {
+		DP_ERR(p_hwfn,
+		       "Failed to match resource %d [%s] with the MFW resources\n",
+		       res_id, qed_hw_get_resc_name(res_id));
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_get_resc_info(p_hwfn, p_hwfn->p_main_ptt, &resc_info,
+				   &mcp_resp, &mcp_param);
+	if (rc) {
+		DP_NOTICE(p_hwfn,
+			  "MFW response failure for an allocation request for resource %d [%s]\n",
+			  res_id, qed_hw_get_resc_name(res_id));
+		return rc;
+	}
+
+	/* Default driver values are applied in the following cases:
+	 * - The resource allocation MB command is not supported by the MFW
+	 * - There is an internal error in the MFW while processing the request
+	 * - The resource ID is unknown to the MFW
+	 */
+	if (mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_OK &&
+	    mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED) {
+		DP_NOTICE(p_hwfn,
+			  "Resource %d [%s]: No allocation info was received [mcp_resp 0x%x]. Applying default values [num %d, start %d].\n",
+			  res_id,
+			  qed_hw_get_resc_name(res_id),
+			  mcp_resp, dflt_resc_num, dflt_resc_start);
+		*p_resc_num = dflt_resc_num;
+		*p_resc_start = dflt_resc_start;
+		goto out;
+	}
+
+	/* Special handling for status blocks; Would be revised in future */
+	if (res_id == QED_SB) {
+		resc_info.size -= 1;
+		resc_info.offset -= p_hwfn->enabled_func_idx;
+	}
+
+	*p_resc_num = resc_info.size;
+	*p_resc_start = resc_info.offset;
+
+out:
+	/* PQs have to divide by 8 [that's the HW granularity].
+	 * Reduce number so it would fit.
+	 */
+	if ((res_id == QED_PQ) && ((*p_resc_num % 8) || (*p_resc_start % 8))) {
+		DP_INFO(p_hwfn,
+			"PQs need to align by 8; Number %08x --> %08x, Start %08x --> %08x\n",
+			*p_resc_num,
+			(*p_resc_num) & ~0x7,
+			*p_resc_start, (*p_resc_start) & ~0x7);
+		*p_resc_num &= ~0x7;
+		*p_resc_start &= ~0x7;
+	}
 
-	resc_num[QED_SB] = min_t(u32,
-				 (MAX_SB_PER_PATH_BB / num_funcs),
-				 sb_cnt_info.sb_cnt);
-	resc_num[QED_L2_QUEUE] = MAX_NUM_L2_QUEUES_BB / num_funcs;
-	resc_num[QED_VPORT] = MAX_NUM_VPORTS_BB / num_funcs;
-	resc_num[QED_RSS_ENG] = ETH_RSS_ENGINE_NUM_BB / num_funcs;
-	resc_num[QED_PQ] = MAX_QM_TX_QUEUES_BB / num_funcs;
-	resc_num[QED_RL] = min_t(u32, 64, resc_num[QED_VPORT]);
-	resc_num[QED_MAC] = ETH_NUM_MAC_FILTERS / num_funcs;
-	resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) /
-			     num_funcs;
-	resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs;
-	resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs;
-	resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs;
-	resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB /
-					 num_funcs;
-
-	for (i = 0; i < QED_MAX_RESC; i++)
-		resc_start[i] = resc_num[i] * enabled_func_idx;
+	return 0;
+}
+
+static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
+{
+	u8 res_id;
+	int rc;
+
+	for (res_id = 0; res_id < QED_MAX_RESC; res_id++) {
+		rc = qed_hw_set_resc_info(p_hwfn, res_id);
+		if (rc)
+			return rc;
+	}
 
 	/* Sanity for ILT */
-	if (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB) {
+	if ((RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB)) {
 		DP_NOTICE(p_hwfn, "Can't assign ILT pages [%08x,...,%08x]\n",
 			  RESC_START(p_hwfn, QED_ILT),
 			  RESC_END(p_hwfn, QED_ILT) - 1);
@@ -1562,34 +1755,12 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 	qed_hw_set_feat(p_hwfn);
 
 	DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
-		   "The numbers for each resource are:\n"
-		   "SB = %d start = %d\n"
-		   "L2_QUEUE = %d start = %d\n"
-		   "VPORT = %d start = %d\n"
-		   "PQ = %d start = %d\n"
-		   "RL = %d start = %d\n"
-		   "MAC = %d start = %d\n"
-		   "VLAN = %d start = %d\n"
-		   "ILT = %d start = %d\n"
-		   "LL2_QUEUE = %d start = %d\n",
-		   p_hwfn->hw_info.resc_num[QED_SB],
-		   p_hwfn->hw_info.resc_start[QED_SB],
-		   p_hwfn->hw_info.resc_num[QED_L2_QUEUE],
-		   p_hwfn->hw_info.resc_start[QED_L2_QUEUE],
-		   p_hwfn->hw_info.resc_num[QED_VPORT],
-		   p_hwfn->hw_info.resc_start[QED_VPORT],
-		   p_hwfn->hw_info.resc_num[QED_PQ],
-		   p_hwfn->hw_info.resc_start[QED_PQ],
-		   p_hwfn->hw_info.resc_num[QED_RL],
-		   p_hwfn->hw_info.resc_start[QED_RL],
-		   p_hwfn->hw_info.resc_num[QED_MAC],
-		   p_hwfn->hw_info.resc_start[QED_MAC],
-		   p_hwfn->hw_info.resc_num[QED_VLAN],
-		   p_hwfn->hw_info.resc_start[QED_VLAN],
-		   p_hwfn->hw_info.resc_num[QED_ILT],
-		   p_hwfn->hw_info.resc_start[QED_ILT],
-		   RESC_NUM(p_hwfn, QED_LL2_QUEUE),
-		   RESC_START(p_hwfn, QED_LL2_QUEUE));
+		   "The numbers for each resource are:\n");
+	for (res_id = 0; res_id < QED_MAX_RESC; res_id++)
+		DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, "%s = %d start = %d\n",
+			   qed_hw_get_resc_name(res_id),
+			   RESC_NUM(p_hwfn, res_id),
+			   RESC_START(p_hwfn, res_id));
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 1d113ce814e1..048f9a342413 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -8529,6 +8529,41 @@ struct mdump_config_stc {
 	u32 valid_logs;
 };
 
+enum resource_id_enum {
+	RESOURCE_NUM_SB_E = 0,
+	RESOURCE_NUM_L2_QUEUE_E = 1,
+	RESOURCE_NUM_VPORT_E = 2,
+	RESOURCE_NUM_VMQ_E = 3,
+	RESOURCE_FACTOR_NUM_RSS_PF_E = 4,
+	RESOURCE_FACTOR_RSS_PER_VF_E = 5,
+	RESOURCE_NUM_RL_E = 6,
+	RESOURCE_NUM_PQ_E = 7,
+	RESOURCE_NUM_VF_E = 8,
+	RESOURCE_VFC_FILTER_E = 9,
+	RESOURCE_ILT_E = 10,
+	RESOURCE_CQS_E = 11,
+	RESOURCE_GFT_PROFILES_E = 12,
+	RESOURCE_NUM_TC_E = 13,
+	RESOURCE_NUM_RSS_ENGINES_E = 14,
+	RESOURCE_LL2_QUEUE_E = 15,
+	RESOURCE_RDMA_STATS_QUEUE_E = 16,
+	RESOURCE_MAX_NUM,
+	RESOURCE_NUM_INVALID = 0xFFFFFFFF
+};
+
+/* Resource ID is to be filled by the driver in the MB request
+ * Size, offset & flags to be filled by the MFW in the MB response
+ */
+struct resource_info {
+	enum resource_id_enum res_id;
+	u32 size;		/* number of allocated resources */
+	u32 offset;		/* Offset of the 1st resource */
+	u32 vf_size;
+	u32 vf_offset;
+	u32 flags;
+#define RESOURCE_ELEMENT_STRICT (1 << 0)
+};
+
 union drv_union_data {
 	u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
 	struct mcp_mac wol_mac;
@@ -8549,6 +8584,7 @@ union drv_union_data {
 	u64 reserved_stats[11];
 	struct ocbb_data_stc ocbb_info;
 	struct temperature_status_stc temp_info;
+	struct resource_info resource;
 	struct bist_nvm_image_att nvm_image_att;
 	struct mdump_config_stc mdump_config;
 };
@@ -8576,6 +8612,7 @@ struct public_drv_mb {
 
 #define DRV_MSG_CODE_BW_UPDATE_ACK		0x32000000
 #define DRV_MSG_CODE_NIG_DRAIN			0x30000000
+#define DRV_MSG_GET_RESOURCE_ALLOC_MSG          0x34000000
 #define DRV_MSG_CODE_VF_DISABLED_DONE		0xc0000000
 #define DRV_MSG_CODE_CFG_VF_MSIX		0xc0010000
 #define DRV_MSG_CODE_NVM_GET_FILE_ATT		0x00030000
@@ -8666,6 +8703,12 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_SET_LED_MODE_ON		0x1
 #define DRV_MB_PARAM_SET_LED_MODE_OFF		0x2
 
+	/* Resource Allocation params - Driver version support */
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK	0xFFFF0000
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT	16
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_MASK	0x0000FFFF
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT	0
+
 #define DRV_MB_PARAM_BIST_REGISTER_TEST		1
 #define DRV_MB_PARAM_BIST_CLOCK_TEST		2
 #define DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES	3
@@ -8694,6 +8737,9 @@ struct public_drv_mb {
 #define FW_MSG_CODE_DRV_UNLOAD_PORT		0x20120000
 #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION		0x20130000
 #define FW_MSG_CODE_DRV_UNLOAD_DONE		0x21100000
+#define FW_MSG_CODE_RESOURCE_ALLOC_OK           0x34000000
+#define FW_MSG_CODE_RESOURCE_ALLOC_UNKNOWN      0x35000000
+#define FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED   0x36000000
 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE	0xb0010000
 
 #define FW_MSG_CODE_NVM_OK			0x00010000
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 6b0e22d9fe4c..1e3a16edd16d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1691,7 +1691,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 		}
 
 		qed_vf_get_num_vlan_filters(&cdev->hwfns[0],
-					    &info->num_vlan_filters);
+					    (u8 *)&info->num_vlan_filters);
 		qed_vf_get_port_mac(&cdev->hwfns[0], info->port_mac);
 
 		info->is_legacy = !!cdev->hwfns[0].vf_iov_info->b_pre_fp_hsi;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 092748832caf..d8e499ebb99d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1683,3 +1683,45 @@ int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
 
 	return rc;
 }
+
+#define QED_RESC_ALLOC_VERSION_MAJOR    1
+#define QED_RESC_ALLOC_VERSION_MINOR    0
+#define QED_RESC_ALLOC_VERSION				     \
+	((QED_RESC_ALLOC_VERSION_MAJOR <<		     \
+	  DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT) | \
+	 (QED_RESC_ALLOC_VERSION_MINOR <<		     \
+	  DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT))
+int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  struct resource_info *p_resc_info,
+			  u32 *p_mcp_resp, u32 *p_mcp_param)
+{
+	struct qed_mcp_mb_params mb_params;
+	union drv_union_data *p_union_data;
+	int rc;
+
+	memset(&mb_params, 0, sizeof(mb_params));
+	mb_params.cmd = DRV_MSG_GET_RESOURCE_ALLOC_MSG;
+	mb_params.param = QED_RESC_ALLOC_VERSION;
+	p_union_data = (union drv_union_data *)p_resc_info;
+	mb_params.p_data_src = p_union_data;
+	mb_params.p_data_dst = p_union_data;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	*p_mcp_resp = mb_params.mcp_resp;
+	*p_mcp_param = mb_params.mcp_param;
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_SP,
+		   "MFW resource_info: version 0x%x, res_id 0x%x, size 0x%x, offset 0x%x, vf_size 0x%x, vf_offset 0x%x, flags 0x%x\n",
+		   *p_mcp_param,
+		   p_resc_info->res_id,
+		   p_resc_info->size,
+		   p_resc_info->offset,
+		   p_resc_info->vf_size,
+		   p_resc_info->vf_offset, p_resc_info->flags);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index be8152d49de2..407a2c1830fb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -689,4 +689,19 @@ int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt,
 			      enum qed_ov_eswitch eswitch);
 
+/**
+ * @brief - Gets the MFW allocation info for the given resource
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param p_resc_info - descriptor of requested resource
+ *  @param p_mcp_resp
+ *  @param p_mcp_param
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  struct resource_info *p_resc_info,
+			  u32 *p_mcp_resp, u32 *p_mcp_param);
 #endif
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 15130805d792..9755a3feb52e 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -22,7 +22,7 @@ struct qed_dev_eth_info {
 	u8	num_tc;
 
 	u8	port_mac[ETH_ALEN];
-	u8	num_vlan_filters;
+	u16	num_vlan_filters;
 	u16	num_mac_filters;
 
 	/* Legacy VF - this affects the datapath, so qede has to know */
-- 
cgit v1.2.3


From d7c816733d501b59dbdc2483f2cc8e4431fd9160 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 17 Aug 2016 14:42:08 -0700
Subject: list: Split list_add() debug checking into separate function

Right now, __list_add() code is repeated either in list.h or in
list_debug.c, but the only differences between the two versions
are the debug checks. This commit therefore extracts these debug
checks into a separate __list_add_valid() function and consolidates
__list_add(). Additionally this new __list_add_valid() function will stop
list manipulations if a corruption is detected, instead of allowing for
further corruption that may lead to even worse conditions.

This is slight refactoring of the same hardening done in PaX and Grsecurity.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Rik van Riel <riel@redhat.com>
---
 include/linux/list.h | 22 ++++++++++++++++------
 lib/list_debug.c     | 48 +++++++++++++++++++++++-------------------------
 2 files changed, 39 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 5809e9a2de5b..b6da9b1dce4d 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -28,27 +28,37 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
 	list->prev = list;
 }
 
+#ifdef CONFIG_DEBUG_LIST
+extern bool __list_add_valid(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next);
+#else
+static inline bool __list_add_valid(struct list_head *new,
+				struct list_head *prev,
+				struct list_head *next)
+{
+	return true;
+}
+#endif
+
 /*
  * Insert a new entry between two known consecutive entries.
  *
  * This is only for internal list manipulation where we know
  * the prev/next entries already!
  */
-#ifndef CONFIG_DEBUG_LIST
 static inline void __list_add(struct list_head *new,
 			      struct list_head *prev,
 			      struct list_head *next)
 {
+	if (!__list_add_valid(new, prev, next))
+		return;
+
 	next->prev = new;
 	new->next = next;
 	new->prev = prev;
 	WRITE_ONCE(prev->next, new);
 }
-#else
-extern void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next);
-#endif
 
 /**
  * list_add - add a new entry
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 3859bf63561c..149dd57b583b 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -2,8 +2,7 @@
  * Copyright 2006, Red Hat, Inc., Dave Jones
  * Released under the General Public License (GPL).
  *
- * This file contains the linked list implementations for
- * DEBUG_LIST.
+ * This file contains the linked list validation for DEBUG_LIST.
  */
 
 #include <linux/export.h>
@@ -13,33 +12,32 @@
 #include <linux/rculist.h>
 
 /*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
+ * Check that the data structures for the list manipulations are reasonably
+ * valid. Failures here indicate memory corruption (and possibly an exploit
+ * attempt).
  */
 
-void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next)
+bool __list_add_valid(struct list_head *new, struct list_head *prev,
+		      struct list_head *next)
 {
-	WARN(next->prev != prev,
-		"list_add corruption. next->prev should be "
-		"prev (%p), but was %p. (next=%p).\n",
-		prev, next->prev, next);
-	WARN(prev->next != next,
-		"list_add corruption. prev->next should be "
-		"next (%p), but was %p. (prev=%p).\n",
-		next, prev->next, prev);
-	WARN(new == prev || new == next,
-	     "list_add double add: new=%p, prev=%p, next=%p.\n",
-	     new, prev, next);
-	next->prev = new;
-	new->next = next;
-	new->prev = prev;
-	WRITE_ONCE(prev->next, new);
+	if (unlikely(next->prev != prev)) {
+		WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+			prev, next->prev, next);
+		return false;
+	}
+	if (unlikely(prev->next != next)) {
+		WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+			next, prev->next, prev);
+		return false;
+	}
+	if (unlikely(new == prev || new == next)) {
+		WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n",
+			new, prev, next);
+		return false;
+	}
+	return true;
 }
-EXPORT_SYMBOL(__list_add);
+EXPORT_SYMBOL(__list_add_valid);
 
 void __list_del_entry(struct list_head *entry)
 {
-- 
cgit v1.2.3


From 54acd4397d7e7a725c94101180cd9f38ef701acc Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 17 Aug 2016 14:42:09 -0700
Subject: rculist: Consolidate DEBUG_LIST for list_add_rcu()

This commit consolidates the debug checking for list_add_rcu() into the
new single __list_add_valid() debug function.  Notably, this commit fixes
the sanity check that was added in commit 17a801f4bfeb ("list_debug:
WARN for adding something already in the list"), which wasn't checking
RCU-protected lists.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Rik van Riel <riel@redhat.com>
---
 include/linux/rculist.h |  8 +++-----
 lib/list_debug.c        | 19 -------------------
 2 files changed, 3 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8beb98dcf14f..4f7a9561b8c4 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -45,19 +45,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
  * This is only for internal list manipulation where we know
  * the prev/next entries already!
  */
-#ifndef CONFIG_DEBUG_LIST
 static inline void __list_add_rcu(struct list_head *new,
 		struct list_head *prev, struct list_head *next)
 {
+	if (!__list_add_valid(new, prev, next))
+		return;
+
 	new->next = next;
 	new->prev = prev;
 	rcu_assign_pointer(list_next_rcu(prev), new);
 	next->prev = new;
 }
-#else
-void __list_add_rcu(struct list_head *new,
-		    struct list_head *prev, struct list_head *next);
-#endif
 
 /**
  * list_add_rcu - add a new entry to rcu-protected list
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 149dd57b583b..d0b89b9d0736 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -77,22 +77,3 @@ void list_del(struct list_head *entry)
 	entry->prev = LIST_POISON2;
 }
 EXPORT_SYMBOL(list_del);
-
-/*
- * RCU variants.
- */
-void __list_add_rcu(struct list_head *new,
-		    struct list_head *prev, struct list_head *next)
-{
-	WARN(next->prev != prev,
-		"list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
-		prev, next->prev, next);
-	WARN(prev->next != next,
-		"list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
-		next, prev->next, prev);
-	new->next = next;
-	new->prev = prev;
-	rcu_assign_pointer(list_next_rcu(prev), new);
-	next->prev = new;
-}
-EXPORT_SYMBOL(__list_add_rcu);
-- 
cgit v1.2.3


From 0cd340dcb05c4a43742fe156f36737bb2a321bfd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 17 Aug 2016 14:42:10 -0700
Subject: list: Split list_del() debug checking into separate function

Similar to the list_add() debug consolidation, this commit consolidates
the debug checking performed during CONFIG_DEBUG_LIST into a new
__list_del_entry_valid() function, and stops list updates when corruption
is found.

Refactored from same hardening in PaX and Grsecurity.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Rik van Riel <riel@redhat.com>
---
 include/linux/list.h | 15 +++++++++------
 lib/list_debug.c     | 53 +++++++++++++++++++++++-----------------------------
 2 files changed, 32 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index b6da9b1dce4d..d1039ecaf94f 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -32,6 +32,7 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
 extern bool __list_add_valid(struct list_head *new,
 			      struct list_head *prev,
 			      struct list_head *next);
+extern bool __list_del_entry_valid(struct list_head *entry);
 #else
 static inline bool __list_add_valid(struct list_head *new,
 				struct list_head *prev,
@@ -39,6 +40,10 @@ static inline bool __list_add_valid(struct list_head *new,
 {
 	return true;
 }
+static inline bool __list_del_entry_valid(struct list_head *entry)
+{
+	return true;
+}
 #endif
 
 /*
@@ -106,22 +111,20 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
  * Note: list_empty() on entry does not return true after this, the entry is
  * in an undefined state.
  */
-#ifndef CONFIG_DEBUG_LIST
 static inline void __list_del_entry(struct list_head *entry)
 {
+	if (!__list_del_entry_valid(entry))
+		return;
+
 	__list_del(entry->prev, entry->next);
 }
 
 static inline void list_del(struct list_head *entry)
 {
-	__list_del(entry->prev, entry->next);
+	__list_del_entry(entry);
 	entry->next = LIST_POISON1;
 	entry->prev = LIST_POISON2;
 }
-#else
-extern void __list_del_entry(struct list_head *entry);
-extern void list_del(struct list_head *entry);
-#endif
 
 /**
  * list_replace - replace old entry by new one
diff --git a/lib/list_debug.c b/lib/list_debug.c
index d0b89b9d0736..276565fca2a6 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -39,41 +39,34 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
 }
 EXPORT_SYMBOL(__list_add_valid);
 
-void __list_del_entry(struct list_head *entry)
+bool __list_del_entry_valid(struct list_head *entry)
 {
 	struct list_head *prev, *next;
 
 	prev = entry->prev;
 	next = entry->next;
 
-	if (WARN(next == LIST_POISON1,
-		"list_del corruption, %p->next is LIST_POISON1 (%p)\n",
-		entry, LIST_POISON1) ||
-	    WARN(prev == LIST_POISON2,
-		"list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
-		entry, LIST_POISON2) ||
-	    WARN(prev->next != entry,
-		"list_del corruption. prev->next should be %p, "
-		"but was %p\n", entry, prev->next) ||
-	    WARN(next->prev != entry,
-		"list_del corruption. next->prev should be %p, "
-		"but was %p\n", entry, next->prev))
-		return;
-
-	__list_del(prev, next);
-}
-EXPORT_SYMBOL(__list_del_entry);
+	if (unlikely(next == LIST_POISON1)) {
+		WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+			entry, LIST_POISON1);
+		return false;
+	}
+	if (unlikely(prev == LIST_POISON2)) {
+		WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+			entry, LIST_POISON2);
+		return false;
+	}
+	if (unlikely(prev->next != entry)) {
+		WARN(1, "list_del corruption. prev->next should be %p, but was %p\n",
+			entry, prev->next);
+		return false;
+	}
+	if (unlikely(next->prev != entry)) {
+		WARN(1, "list_del corruption. next->prev should be %p, but was %p\n",
+			entry, next->prev);
+		return false;
+	}
+	return true;
 
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is
- * in an undefined state.
- */
-void list_del(struct list_head *entry)
-{
-	__list_del_entry(entry);
-	entry->next = LIST_POISON1;
-	entry->prev = LIST_POISON2;
 }
-EXPORT_SYMBOL(list_del);
+EXPORT_SYMBOL(__list_del_entry_valid);
-- 
cgit v1.2.3


From de54ebbe26bb371a6f1fbc0593372232f04e3107 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 17 Aug 2016 14:42:11 -0700
Subject: bug: Provide toggle for BUG on data corruption

The kernel checks for cases of data structure corruption under some
CONFIGs (e.g. CONFIG_DEBUG_LIST). When corruption is detected, some
systems may want to BUG() immediately instead of letting the system run
with known corruption.  Usually these kinds of manipulation primitives can
be used by security flaws to gain arbitrary memory write control. This
provides a new config CONFIG_BUG_ON_DATA_CORRUPTION and a corresponding
macro CHECK_DATA_CORRUPTION for handling these situations. Notably, even
if not BUGing, the kernel should not continue processing the corrupted
structure.

This is inspired by similar hardening by Syed Rameez Mustafa in MSM
kernels, and in PaX and Grsecurity, which is likely in response to earlier
removal of the BUG calls in commit 924d9addb9b1 ("list debugging: use
WARN() instead of BUG()").

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Rik van Riel <riel@redhat.com>
---
 include/linux/bug.h | 17 ++++++++++++++++
 lib/Kconfig.debug   | 10 ++++++++++
 lib/list_debug.c    | 57 +++++++++++++++++++++--------------------------------
 3 files changed, 49 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 292d6a10b0c2..baff2e8fc8a8 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -121,4 +121,21 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr,
 }
 
 #endif	/* CONFIG_GENERIC_BUG */
+
+/*
+ * Since detected data corruption should stop operation on the affected
+ * structures, this returns false if the corruption condition is found.
+ */
+#define CHECK_DATA_CORRUPTION(condition, fmt, ...)			 \
+	do {								 \
+		if (unlikely(condition)) {				 \
+			if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
+				pr_err(fmt, ##__VA_ARGS__);		 \
+				BUG();					 \
+			} else						 \
+				WARN(1, fmt, ##__VA_ARGS__);		 \
+			return false;					 \
+		}							 \
+	} while (0)
+
 #endif	/* _LINUX_BUG_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 33bc56cf60d7..07a6fac930c5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1960,6 +1960,16 @@ config TEST_STATIC_KEYS
 
 	  If unsure, say N.
 
+config BUG_ON_DATA_CORRUPTION
+	bool "Trigger a BUG when data corruption is detected"
+	select CONFIG_DEBUG_LIST
+	help
+	  Select this option if the kernel should BUG when it encounters
+	  data corruption in kernel memory structures when they get checked
+	  for validity.
+
+	  If unsure, say N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 276565fca2a6..7f7bfa55eb6d 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,21 +20,16 @@
 bool __list_add_valid(struct list_head *new, struct list_head *prev,
 		      struct list_head *next)
 {
-	if (unlikely(next->prev != prev)) {
-		WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
-			prev, next->prev, next);
-		return false;
-	}
-	if (unlikely(prev->next != next)) {
-		WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
-			next, prev->next, prev);
-		return false;
-	}
-	if (unlikely(new == prev || new == next)) {
-		WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n",
-			new, prev, next);
-		return false;
-	}
+	CHECK_DATA_CORRUPTION(next->prev != prev,
+		"list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+		prev, next->prev, next);
+	CHECK_DATA_CORRUPTION(prev->next != next,
+		"list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+		next, prev->next, prev);
+	CHECK_DATA_CORRUPTION(new == prev || new == next,
+		"list_add double add: new=%p, prev=%p, next=%p.\n",
+		new, prev, next);
+
 	return true;
 }
 EXPORT_SYMBOL(__list_add_valid);
@@ -46,26 +41,18 @@ bool __list_del_entry_valid(struct list_head *entry)
 	prev = entry->prev;
 	next = entry->next;
 
-	if (unlikely(next == LIST_POISON1)) {
-		WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
-			entry, LIST_POISON1);
-		return false;
-	}
-	if (unlikely(prev == LIST_POISON2)) {
-		WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
-			entry, LIST_POISON2);
-		return false;
-	}
-	if (unlikely(prev->next != entry)) {
-		WARN(1, "list_del corruption. prev->next should be %p, but was %p\n",
-			entry, prev->next);
-		return false;
-	}
-	if (unlikely(next->prev != entry)) {
-		WARN(1, "list_del corruption. next->prev should be %p, but was %p\n",
-			entry, next->prev);
-		return false;
-	}
+	CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+		"list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+		entry, LIST_POISON1);
+	CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
+		"list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+		entry, LIST_POISON2);
+	CHECK_DATA_CORRUPTION(prev->next != entry,
+		"list_del corruption. prev->next should be %p, but was %p\n",
+		entry, prev->next);
+	CHECK_DATA_CORRUPTION(next->prev != entry,
+		"list_del corruption. next->prev should be %p, but was %p\n",
+		entry, next->prev);
 	return true;
 
 }
-- 
cgit v1.2.3


From 556d299fcb4af8f2e8eacf311c4eee352c746788 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Mon, 31 Oct 2016 13:21:02 +0100
Subject: net: pim: add common pimhdr struct and helpers

Add the common pimhdr structure and helpers to access it, also cleanup the
format of the header file.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pim.h | 44 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pim.h b/include/linux/pim.h
index e1d756f81348..354235a2691b 100644
--- a/include/linux/pim.h
+++ b/include/linux/pim.h
@@ -1,6 +1,7 @@
 #ifndef __LINUX_PIM_H
 #define __LINUX_PIM_H
 
+#include <linux/skbuff.h>
 #include <asm/byteorder.h>
 
 /* Message types - V1 */
@@ -13,20 +14,47 @@
 
 #define PIM_NULL_REGISTER	cpu_to_be32(0x40000000)
 
-static inline bool ipmr_pimsm_enabled(void)
-{
-	return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2);
-}
+/* RFC7761, sec 4.9:
+ * The PIM header common to all PIM messages is:
+ *   0                   1                   2                   3
+ *   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |PIM Ver| Type  |   Reserved    |           Checksum            |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct pimhdr {
+	__u8	type;
+	__u8	reserved;
+	__be16	csum;
+};
 
 /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */
-struct pimreghdr
-{
+struct pimreghdr {
 	__u8	type;
 	__u8	reserved;
 	__be16	csum;
 	__be32	flags;
 };
 
-struct sk_buff;
-extern int pim_rcv_v1(struct sk_buff *);
+int pim_rcv_v1(struct sk_buff *skb);
+
+static inline bool ipmr_pimsm_enabled(void)
+{
+	return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2);
+}
+
+static inline struct pimhdr *pim_hdr(const struct sk_buff *skb)
+{
+	return (struct pimhdr *)skb_transport_header(skb);
+}
+
+static inline u8 pim_hdr_version(const struct pimhdr *pimhdr)
+{
+	return pimhdr->type >> 4;
+}
+
+static inline u8 pim_hdr_type(const struct pimhdr *pimhdr)
+{
+	return pimhdr->type & 0xf;
+}
 #endif
-- 
cgit v1.2.3


From 20bb6ce9879e19eee7539329eaa2408d12b00306 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Mon, 31 Oct 2016 13:21:03 +0100
Subject: net: pim: add a helper to check for IPv4 all pim routers address

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pim.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pim.h b/include/linux/pim.h
index 354235a2691b..1b6c0dbba94e 100644
--- a/include/linux/pim.h
+++ b/include/linux/pim.h
@@ -57,4 +57,10 @@ static inline u8 pim_hdr_type(const struct pimhdr *pimhdr)
 {
 	return pimhdr->type & 0xf;
 }
+
+/* check if the address is 224.0.0.13, RFC7761 sec 4.3.1 */
+static inline bool pim_ipv4_all_pim_routers(__be32 addr)
+{
+	return addr == htonl(0xE000000D);
+}
 #endif
-- 
cgit v1.2.3


From 56245cae19f5ccb371fa63b09bb6b9ce7c0f1266 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Mon, 31 Oct 2016 13:21:04 +0100
Subject: net: pim: add all RFC7761 message types

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pim.h | 31 ++++++++++++++++++++++++++++++-
 net/ipv4/ipmr.c     |  2 +-
 net/ipv6/ip6mr.c    |  2 +-
 3 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pim.h b/include/linux/pim.h
index 1b6c0dbba94e..0e81b2778ae0 100644
--- a/include/linux/pim.h
+++ b/include/linux/pim.h
@@ -10,7 +10,36 @@
 
 /* Message types - V2 */
 #define PIM_VERSION		2
-#define PIM_REGISTER		1
+
+/* RFC7761, sec 4.9:
+ *  Type
+ *        Types for specific PIM messages.  PIM Types are:
+ *
+ *  Message Type                          Destination
+ *  ---------------------------------------------------------------------
+ *  0 = Hello                             Multicast to ALL-PIM-ROUTERS
+ *  1 = Register                          Unicast to RP
+ *  2 = Register-Stop                     Unicast to source of Register
+ *                                        packet
+ *  3 = Join/Prune                        Multicast to ALL-PIM-ROUTERS
+ *  4 = Bootstrap                         Multicast to ALL-PIM-ROUTERS
+ *  5 = Assert                            Multicast to ALL-PIM-ROUTERS
+ *  6 = Graft (used in PIM-DM only)       Unicast to RPF'(S)
+ *  7 = Graft-Ack (used in PIM-DM only)   Unicast to source of Graft
+ *                                        packet
+ *  8 = Candidate-RP-Advertisement        Unicast to Domain's BSR
+ */
+enum {
+	PIM_TYPE_HELLO,
+	PIM_TYPE_REGISTER,
+	PIM_TYPE_REGISTER_STOP,
+	PIM_TYPE_JOIN_PRUNE,
+	PIM_TYPE_BOOTSTRAP,
+	PIM_TYPE_ASSERT,
+	PIM_TYPE_GRAFT,
+	PIM_TYPE_GRAFT_ACK,
+	PIM_TYPE_CANDIDATE_RP_ADV
+};
 
 #define PIM_NULL_REGISTER	cpu_to_be32(0x40000000)
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5f006e13de56..51d71a70fbbe 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2053,7 +2053,7 @@ static int pim_rcv(struct sk_buff *skb)
 		goto drop;
 
 	pim = (struct pimreghdr *)skb_transport_header(skb);
-	if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) ||
+	if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
 	    (pim->flags & PIM_NULL_REGISTER) ||
 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7f4265b1649b..52101b37ad6e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -636,7 +636,7 @@ static int pim6_rcv(struct sk_buff *skb)
 		goto drop;
 
 	pim = (struct pimreghdr *)skb_transport_header(skb);
-	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
+	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 	    (pim->flags & PIM_NULL_REGISTER) ||
 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			     sizeof(*pim), IPPROTO_PIM,
-- 
cgit v1.2.3


From e950604782440c8635d289552bb5db58658fcbe9 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Fri, 7 Oct 2016 21:23:12 -0700
Subject: rpmsg: Introduce a driver override mechanism

Similar to other subsystems it's useful to provide a mechanism to force
a specific driver match on a device, so introduce this.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/rpmsg/rpmsg_core.c | 3 +++
 include/linux/rpmsg.h      | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index b6ea9ffa7381..087d4db896c8 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -315,6 +315,9 @@ static int rpmsg_dev_match(struct device *dev, struct device_driver *drv)
 	const struct rpmsg_device_id *ids = rpdrv->id_table;
 	unsigned int i;
 
+	if (rpdev->driver_override)
+		return !strcmp(rpdev->driver_override, drv->name);
+
 	if (ids)
 		for (i = 0; ids[i].name[0]; i++)
 			if (rpmsg_id_match(rpdev, &ids[i]))
diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index 452d393cc8dd..7ad6c205f110 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -64,6 +64,7 @@ struct rpmsg_channel_info {
  * rpmsg_device - device that belong to the rpmsg bus
  * @dev: the device struct
  * @id: device id (used to match between rpmsg drivers and devices)
+ * @driver_override: driver name to force a match
  * @src: local address
  * @dst: destination address
  * @ept: the rpmsg endpoint of this channel
@@ -72,6 +73,7 @@ struct rpmsg_channel_info {
 struct rpmsg_device {
 	struct device dev;
 	struct rpmsg_device_id id;
+	char *driver_override;
 	u32 src;
 	u32 dst;
 	struct rpmsg_endpoint *ept;
-- 
cgit v1.2.3


From 2c8a57088045a58958372d405586c16e3e12f4e1 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Fri, 21 Oct 2016 10:25:37 -0700
Subject: rpmsg: Provide function stubs for API

Provide function stubs for the rpmsg API to allow clients to be compile
tested without having CONFIG_RPMSG enabled.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/rpmsg.h | 123 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 113 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index 7ad6c205f110..18f9e1ae4b7e 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -37,6 +37,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/err.h>
 #include <linux/mod_devicetable.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
@@ -134,6 +135,8 @@ struct rpmsg_driver {
 	int (*callback)(struct rpmsg_device *, void *, int, void *, u32);
 };
 
+#if IS_ENABLED(CONFIG_RPMSG)
+
 int register_rpmsg_device(struct rpmsg_device *dev);
 void unregister_rpmsg_device(struct rpmsg_device *dev);
 int __register_rpmsg_driver(struct rpmsg_driver *drv, struct module *owner);
@@ -143,6 +146,116 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *,
 					rpmsg_rx_cb_t cb, void *priv,
 					struct rpmsg_channel_info chinfo);
 
+int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len);
+int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
+int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
+			  void *data, int len);
+
+int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len);
+int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
+int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
+			     void *data, int len);
+
+#else
+
+static inline int register_rpmsg_device(struct rpmsg_device *dev)
+{
+	return -ENXIO;
+}
+
+static inline void unregister_rpmsg_device(struct rpmsg_device *dev)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+}
+
+static inline int __register_rpmsg_driver(struct rpmsg_driver *drv,
+					  struct module *owner)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return -ENXIO;
+}
+
+static inline void unregister_rpmsg_driver(struct rpmsg_driver *drv)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+}
+
+static inline void rpmsg_destroy_ept(struct rpmsg_endpoint *ept)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+}
+
+static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev,
+						      rpmsg_rx_cb_t cb,
+						      void *priv,
+						      struct rpmsg_channel_info chinfo)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return ERR_PTR(-ENXIO);
+}
+
+static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return -ENXIO;
+}
+
+static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len,
+			       u32 dst)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return -ENXIO;
+
+}
+
+static inline int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src,
+					u32 dst, void *data, int len)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return -ENXIO;
+}
+
+static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return -ENXIO;
+}
+
+static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data,
+				  int len, u32 dst)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return -ENXIO;
+}
+
+static inline int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src,
+					   u32 dst, void *data, int len)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return -ENXIO;
+}
+
+#endif /* IS_ENABLED(CONFIG_RPMSG) */
+
 /* use a macro to avoid include chaining to get THIS_MODULE */
 #define register_rpmsg_driver(drv) \
 	__register_rpmsg_driver(drv, THIS_MODULE)
@@ -159,14 +272,4 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *,
 	module_driver(__rpmsg_driver, register_rpmsg_driver, \
 			unregister_rpmsg_driver)
 
-int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len);
-int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
-int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
-			  void *data, int len);
-
-int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len);
-int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
-int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
-			     void *data, int len);
-
 #endif /* _LINUX_RPMSG_H */
-- 
cgit v1.2.3


From d71d9ae14a0942fae519d890a743b12679e3d153 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:03 -0600
Subject: blk-cgroup: use op_is_sync to check for synchronous requests

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index ddaf28d0988f..01b62e7bac74 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -599,7 +599,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 
 	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
 
-	if (op & REQ_SYNC)
+	if (op_is_sync(op))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
-- 
cgit v1.2.3


From 6f6b29171a192e84b666c816e49d2175afbbb09f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:07 -0600
Subject: block: don't use REQ_SYNC in the READ_SYNC definition

Reads are synchronous per definition, don't add another flag for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e3e878f12b25..5e0078fceed7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -196,7 +196,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define READ			REQ_OP_READ
 #define WRITE			REQ_OP_WRITE
 
-#define READ_SYNC		REQ_SYNC
+#define READ_SYNC		0
 #define WRITE_SYNC		(REQ_SYNC | REQ_NOIDLE)
 #define WRITE_ODIRECT		REQ_SYNC
 #define WRITE_FLUSH		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
-- 
cgit v1.2.3


From b685d3d65ac791406e0dfd8779cc9b3707fea5a3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:08 -0600
Subject: block: treat REQ_FUA and REQ_PREFLUSH as synchronous
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of requiring everyone to specify the REQ_SYNC flag aѕ well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 8 +++++++-
 include/linux/fs.h        | 6 +++---
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3fa62cabe8d2..107d23d18096 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -216,9 +216,15 @@ static inline bool op_is_write(unsigned int op)
 	return (op & 1);
 }
 
+/*
+ * Reads are always treated as synchronous, as are requests with the FUA or
+ * PREFLUSH flag.  Other operations may be marked as synchronous using the
+ * REQ_SYNC flag.
+ */
 static inline bool op_is_sync(unsigned int op)
 {
-	return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC);
+	return (op & REQ_OP_MASK) == REQ_OP_READ ||
+		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 }
 
 typedef unsigned int blk_qc_t;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5e0078fceed7..ccedccb28ec8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -199,9 +199,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define READ_SYNC		0
 #define WRITE_SYNC		(REQ_SYNC | REQ_NOIDLE)
 #define WRITE_ODIRECT		REQ_SYNC
-#define WRITE_FLUSH		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
-#define WRITE_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_FUA)
-#define WRITE_FLUSH_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
+#define WRITE_FLUSH		(REQ_NOIDLE | REQ_PREFLUSH)
+#define WRITE_FUA		(REQ_NOIDLE | REQ_FUA)
+#define WRITE_FLUSH_FUA		(REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what
-- 
cgit v1.2.3


From a2b809672ee6fcb4d5756ea815725b3dbaea654e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:09 -0600
Subject: block: replace REQ_NOIDLE with REQ_IDLE

Noidle should be the default for writes as seen by all the compounds
definitions in fs.h using it.  In fact only direct I/O really should
be using NODILE, so turn the whole flag around to get the defaults
right, which will make our life much easier especially onces the
WRITE_* defines go away.

This assumes all the existing "raw" users of REQ_SYNC for writes
want noidle behavior, which seems to be spot on from a quick audit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/cfq-iosched.txt | 32 ++++++++++++++++----------------
 block/cfq-iosched.c                 | 11 ++++++++---
 drivers/block/drbd/drbd_actlog.c    |  2 +-
 include/linux/blk_types.h           |  4 ++--
 include/linux/fs.h                  | 10 +++++-----
 include/trace/events/f2fs.h         |  2 +-
 6 files changed, 33 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt
index 1e4f835a659d..895bd3813115 100644
--- a/Documentation/block/cfq-iosched.txt
+++ b/Documentation/block/cfq-iosched.txt
@@ -240,11 +240,11 @@ All cfq queues doing synchronous sequential IO go on to sync-idle tree.
 On this tree we idle on each queue individually.
 
 All synchronous non-sequential queues go on sync-noidle tree. Also any
-request which are marked with REQ_NOIDLE go on this service tree. On this
-tree we do not idle on individual queues instead idle on the whole group
-of queues or the tree. So if there are 4 queues waiting for IO to dispatch
-we will idle only once last queue has dispatched the IO and there is
-no more IO on this service tree.
+synchronous write request which is not marked with REQ_IDLE goes on this
+service tree. On this tree we do not idle on individual queues instead idle
+on the whole group of queues or the tree. So if there are 4 queues waiting
+for IO to dispatch we will idle only once last queue has dispatched the IO
+and there is no more IO on this service tree.
 
 All async writes go on async service tree. There is no idling on async
 queues.
@@ -257,17 +257,17 @@ tree idling provides isolation with buffered write queues on async tree.
 
 FAQ
 ===
-Q1. Why to idle at all on queues marked with REQ_NOIDLE.
+Q1. Why to idle at all on queues not marked with REQ_IDLE.
 
-A1. We only do tree idle (all queues on sync-noidle tree) on queues marked
-    with REQ_NOIDLE. This helps in providing isolation with all the sync-idle
+A1. We only do tree idle (all queues on sync-noidle tree) on queues not marked
+    with REQ_IDLE. This helps in providing isolation with all the sync-idle
     queues. Otherwise in presence of many sequential readers, other
     synchronous IO might not get fair share of disk.
 
     For example, if there are 10 sequential readers doing IO and they get
-    100ms each. If a REQ_NOIDLE request comes in, it will be scheduled
-    roughly after 1 second. If after completion of REQ_NOIDLE request we
-    do not idle, and after a couple of milli seconds a another REQ_NOIDLE
+    100ms each. If a !REQ_IDLE request comes in, it will be scheduled
+    roughly after 1 second. If after completion of !REQ_IDLE request we
+    do not idle, and after a couple of milli seconds a another !REQ_IDLE
     request comes in, again it will be scheduled after 1second. Repeat it
     and notice how a workload can lose its disk share and suffer due to
     multiple sequential readers.
@@ -276,16 +276,16 @@ A1. We only do tree idle (all queues on sync-noidle tree) on queues marked
     context of fsync, and later some journaling data is written. Journaling
     data comes in only after fsync has finished its IO (atleast for ext4
     that seemed to be the case). Now if one decides not to idle on fsync
-    thread due to REQ_NOIDLE, then next journaling write will not get
+    thread due to !REQ_IDLE, then next journaling write will not get
     scheduled for another second. A process doing small fsync, will suffer
     badly in presence of multiple sequential readers.
 
-    Hence doing tree idling on threads using REQ_NOIDLE flag on requests
+    Hence doing tree idling on threads using !REQ_IDLE flag on requests
     provides isolation from multiple sequential readers and at the same
     time we do not idle on individual threads.
 
-Q2. When to specify REQ_NOIDLE
-A2. I would think whenever one is doing synchronous write and not expecting
+Q2. When to specify REQ_IDLE
+A2. I would think whenever one is doing synchronous write and expecting
     more writes to be dispatched from same context soon, should be able
-    to specify REQ_NOIDLE on writes and that probably should work well for
+    to specify REQ_IDLE on writes and that probably should work well for
     most of the cases.
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index f28db97c3fe0..dcbed8c9c82c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3914,6 +3914,12 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		cfqq->seek_history |= (sdist > CFQQ_SEEK_THR);
 }
 
+static inline bool req_noidle(struct request *req)
+{
+	return req_op(req) == REQ_OP_WRITE &&
+		(req->cmd_flags & (REQ_SYNC | REQ_IDLE)) == REQ_SYNC;
+}
+
 /*
  * Disable idle window if the process thinks too long or seeks so much that
  * it doesn't matter
@@ -3935,7 +3941,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	if (cfqq->queued[0] + cfqq->queued[1] >= 4)
 		cfq_mark_cfqq_deep(cfqq);
 
-	if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
+	if (cfqq->next_rq && req_noidle(cfqq->next_rq))
 		enable_idle = 0;
 	else if (!atomic_read(&cic->icq.ioc->active_ref) ||
 		 !cfqd->cfq_slice_idle ||
@@ -4220,8 +4226,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	const int sync = rq_is_sync(rq);
 	u64 now = ktime_get_ns();
 
-	cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
-		     !!(rq->cmd_flags & REQ_NOIDLE));
+	cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", req_noidle(rq));
 
 	cfq_update_hw_tag(cfqd);
 
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 2d3d50ab74bf..8d7bcfa49c12 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -148,7 +148,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 
 	if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags))
 		op_flags |= REQ_FUA | REQ_PREFLUSH;
-	op_flags |= REQ_SYNC | REQ_NOIDLE;
+	op_flags |= REQ_SYNC;
 
 	bio = bio_alloc_drbd(GFP_NOIO);
 	bio->bi_bdev = bdev->md_bdev;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 107d23d18096..63b750a3b165 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -175,7 +175,7 @@ enum req_flag_bits {
 	__REQ_META,		/* metadata io request */
 	__REQ_PRIO,		/* boost priority in cfq */
 	__REQ_NOMERGE,		/* don't touch this for merging */
-	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
+	__REQ_IDLE,		/* anticipate more IO after this one */
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
@@ -190,7 +190,7 @@ enum req_flag_bits {
 #define REQ_META		(1ULL << __REQ_META)
 #define REQ_PRIO		(1ULL << __REQ_PRIO)
 #define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
-#define REQ_NOIDLE		(1ULL << __REQ_NOIDLE)
+#define REQ_IDLE		(1ULL << __REQ_IDLE)
 #define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
 #define REQ_FUA			(1ULL << __REQ_FUA)
 #define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ccedccb28ec8..46a74209917f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -197,11 +197,11 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define WRITE			REQ_OP_WRITE
 
 #define READ_SYNC		0
-#define WRITE_SYNC		(REQ_SYNC | REQ_NOIDLE)
-#define WRITE_ODIRECT		REQ_SYNC
-#define WRITE_FLUSH		(REQ_NOIDLE | REQ_PREFLUSH)
-#define WRITE_FUA		(REQ_NOIDLE | REQ_FUA)
-#define WRITE_FLUSH_FUA		(REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
+#define WRITE_SYNC		REQ_SYNC
+#define WRITE_ODIRECT		(REQ_SYNC | REQ_IDLE)
+#define WRITE_FLUSH		REQ_PREFLUSH
+#define WRITE_FUA		REQ_FUA
+#define WRITE_FLUSH_FUA		(REQ_PREFLUSH | REQ_FUA)
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 903a09165bb1..a9d34424450d 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -32,7 +32,7 @@ TRACE_DEFINE_ENUM(LFS);
 TRACE_DEFINE_ENUM(SSR);
 TRACE_DEFINE_ENUM(__REQ_RAHEAD);
 TRACE_DEFINE_ENUM(__REQ_SYNC);
-TRACE_DEFINE_ENUM(__REQ_NOIDLE);
+TRACE_DEFINE_ENUM(__REQ_IDLE);
 TRACE_DEFINE_ENUM(__REQ_PREFLUSH);
 TRACE_DEFINE_ENUM(__REQ_FUA);
 TRACE_DEFINE_ENUM(__REQ_PRIO);
-- 
cgit v1.2.3


From 70fd76140a6cb63262bd47b68d57b42e889c10ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:10 -0600
Subject: block,fs: use REQ_* flags directly

Remove the WRITE_* and READ_SYNC wrappers, and just use the flags
directly.  Where applicable this also drops usage of the
bio_set_op_attrs wrapper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c                   |  4 ++--
 drivers/block/drbd/drbd_receiver.c  |  2 +-
 drivers/block/xen-blkback/blkback.c | 10 ++++----
 drivers/md/bcache/btree.c           |  4 ++--
 drivers/md/bcache/debug.c           |  4 ++--
 drivers/md/bcache/request.c         |  2 +-
 drivers/md/bcache/super.c           |  4 ++--
 drivers/md/dm-bufio.c               |  2 +-
 drivers/md/dm-log.c                 |  2 +-
 drivers/md/dm-raid1.c               |  4 ++--
 drivers/md/dm-snap-persistent.c     |  4 ++--
 drivers/md/dm.c                     |  2 +-
 drivers/md/md.c                     |  4 ++--
 drivers/md/raid5-cache.c            |  4 ++--
 drivers/md/raid5.c                  |  2 +-
 drivers/nvme/target/io-cmd.c        |  4 ++--
 drivers/target/target_core_iblock.c |  8 +++----
 fs/btrfs/disk-io.c                  |  6 ++---
 fs/btrfs/extent_io.c                | 16 ++++++-------
 fs/btrfs/inode.c                    |  6 ++---
 fs/btrfs/scrub.c                    |  2 +-
 fs/btrfs/volumes.c                  |  2 +-
 fs/btrfs/volumes.h                  |  2 +-
 fs/buffer.c                         |  8 +++----
 fs/direct-io.c                      |  2 +-
 fs/ext4/mmp.c                       |  6 ++---
 fs/ext4/page-io.c                   |  2 +-
 fs/ext4/super.c                     |  2 +-
 fs/f2fs/checkpoint.c                |  4 ++--
 fs/f2fs/data.c                      | 16 ++++++-------
 fs/f2fs/gc.c                        |  6 ++---
 fs/f2fs/inline.c                    |  2 +-
 fs/f2fs/node.c                      |  4 ++--
 fs/f2fs/segment.c                   |  8 +++----
 fs/f2fs/super.c                     |  2 +-
 fs/gfs2/log.c                       |  4 ++--
 fs/gfs2/meta_io.c                   |  6 ++---
 fs/gfs2/ops_fstype.c                |  2 +-
 fs/hfsplus/super.c                  |  4 ++--
 fs/jbd2/checkpoint.c                |  2 +-
 fs/jbd2/commit.c                    |  9 +++----
 fs/jbd2/journal.c                   | 15 ++++++------
 fs/jbd2/revoke.c                    |  2 +-
 fs/jfs/jfs_logmgr.c                 |  4 ++--
 fs/mpage.c                          |  6 ++---
 fs/nilfs2/super.c                   |  2 +-
 fs/ocfs2/cluster/heartbeat.c        |  2 +-
 fs/reiserfs/journal.c               |  6 +++--
 fs/xfs/xfs_aops.c                   | 11 +++++----
 fs/xfs/xfs_buf.c                    |  2 +-
 include/linux/fs.h                  | 47 -------------------------------------
 include/trace/events/f2fs.h         | 10 ++++----
 kernel/power/swap.c                 | 19 +++++++--------
 53 files changed, 133 insertions(+), 182 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 95f1d4d357df..d35beca18481 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	}
 
 	flush_rq->cmd_type = REQ_TYPE_FS;
-	flush_rq->cmd_flags = REQ_OP_FLUSH | WRITE_FLUSH;
+	flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
 	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
 	flush_rq->rq_disk = first_rq->rq_disk;
 	flush_rq->end_io = flush_end_io;
@@ -486,7 +486,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 
 	bio = bio_alloc(gfp_mask, 0);
 	bio->bi_bdev = bdev;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 
 	ret = submit_bio_wait(bio);
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 942384f34e22..a89538cb3eaa 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1266,7 +1266,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
 	bio->bi_bdev = device->ldev->backing_bdev;
 	bio->bi_private = octx;
 	bio->bi_end_io = one_flush_endio;
-	bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH);
+	bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
 
 	device->flush_jif = jiffies;
 	set_bit(FLUSH_PENDING, &device->flags);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 4a80ee752597..726c32e35db9 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1253,14 +1253,14 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	case BLKIF_OP_WRITE:
 		ring->st_wr_req++;
 		operation = REQ_OP_WRITE;
-		operation_flags = WRITE_ODIRECT;
+		operation_flags = REQ_SYNC | REQ_IDLE;
 		break;
 	case BLKIF_OP_WRITE_BARRIER:
 		drain = true;
 	case BLKIF_OP_FLUSH_DISKCACHE:
 		ring->st_f_req++;
 		operation = REQ_OP_WRITE;
-		operation_flags = WRITE_FLUSH;
+		operation_flags = REQ_PREFLUSH;
 		break;
 	default:
 		operation = 0; /* make gcc happy */
@@ -1272,7 +1272,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	nseg = req->operation == BLKIF_OP_INDIRECT ?
 	       req->u.indirect.nr_segments : req->u.rw.nr_segments;
 
-	if (unlikely(nseg == 0 && operation_flags != WRITE_FLUSH) ||
+	if (unlikely(nseg == 0 && operation_flags != REQ_PREFLUSH) ||
 	    unlikely((req->operation != BLKIF_OP_INDIRECT) &&
 		     (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
 	    unlikely((req->operation == BLKIF_OP_INDIRECT) &&
@@ -1334,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	}
 
 	/* Wait on all outstanding I/O's and once that has been completed
-	 * issue the WRITE_FLUSH.
+	 * issue the flush.
 	 */
 	if (drain)
 		xen_blk_drain_io(pending_req->ring);
@@ -1380,7 +1380,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 
 	/* This will be hit if the operation was a flush or discard. */
 	if (!bio) {
-		BUG_ON(operation_flags != WRITE_FLUSH);
+		BUG_ON(operation_flags != REQ_PREFLUSH);
 
 		bio = bio_alloc(GFP_KERNEL, 0);
 		if (unlikely(bio == NULL))
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 81d3db40cd7b..6fdd8e252760 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -297,7 +297,7 @@ static void bch_btree_node_read(struct btree *b)
 	bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9;
 	bio->bi_end_io	= btree_node_read_endio;
 	bio->bi_private	= &cl;
-	bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
+	bio->bi_opf = REQ_OP_READ | REQ_META;
 
 	bch_bio_map(bio, b->keys.set[0].data);
 
@@ -393,7 +393,7 @@ static void do_btree_node_write(struct btree *b)
 	b->bio->bi_end_io	= btree_node_write_endio;
 	b->bio->bi_private	= cl;
 	b->bio->bi_iter.bi_size	= roundup(set_bytes(i), block_bytes(b->c));
-	bio_set_op_attrs(b->bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA);
+	b->bio->bi_opf		= REQ_OP_WRITE | REQ_META | REQ_FUA;
 	bch_bio_map(b->bio, i);
 
 	/*
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 333a1e5f6ae6..1c9130ae0073 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -52,7 +52,7 @@ void bch_btree_verify(struct btree *b)
 	bio->bi_bdev		= PTR_CACHE(b->c, &b->key, 0)->bdev;
 	bio->bi_iter.bi_sector	= PTR_OFFSET(&b->key, 0);
 	bio->bi_iter.bi_size	= KEY_SIZE(&v->key) << 9;
-	bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
+	bio->bi_opf		= REQ_OP_READ | REQ_META;
 	bch_bio_map(bio, sorted);
 
 	submit_bio_wait(bio);
@@ -113,7 +113,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 	check = bio_clone(bio, GFP_NOIO);
 	if (!check)
 		return;
-	bio_set_op_attrs(check, REQ_OP_READ, READ_SYNC);
+	check->bi_opf = REQ_OP_READ;
 
 	if (bio_alloc_pages(check, GFP_NOIO))
 		goto out_put;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index e8a2b693c928..0d99b5f4b3e6 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -923,7 +923,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
 			flush->bi_bdev	= bio->bi_bdev;
 			flush->bi_end_io = request_endio;
 			flush->bi_private = cl;
-			bio_set_op_attrs(flush, REQ_OP_WRITE, WRITE_FLUSH);
+			flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 
 			closure_bio_submit(flush, cl);
 		}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 849ad441cd76..988edf928466 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -381,7 +381,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
 		return "bad uuid pointer";
 
 	bkey_copy(&c->uuid_bucket, k);
-	uuid_io(c, REQ_OP_READ, READ_SYNC, k, cl);
+	uuid_io(c, REQ_OP_READ, 0, k, cl);
 
 	if (j->version < BCACHE_JSET_VERSION_UUIDv1) {
 		struct uuid_entry_v0	*u0 = (void *) c->uuids;
@@ -600,7 +600,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
 			ca->prio_last_buckets[bucket_nr] = bucket;
 			bucket_nr++;
 
-			prio_io(ca, bucket, REQ_OP_READ, READ_SYNC);
+			prio_io(ca, bucket, REQ_OP_READ, 0);
 
 			if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8))
 				pr_warn("bad csum reading priorities");
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 125aedc3875f..b3ba142e59a4 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1316,7 +1316,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
 {
 	struct dm_io_request io_req = {
 		.bi_op = REQ_OP_WRITE,
-		.bi_op_flags = WRITE_FLUSH,
+		.bi_op_flags = REQ_PREFLUSH,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = c->dm_io,
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 07fc1ad42ec5..33e71ea6cc14 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -308,7 +308,7 @@ static int flush_header(struct log_c *lc)
 	};
 
 	lc->io_req.bi_op = REQ_OP_WRITE;
-	lc->io_req.bi_op_flags = WRITE_FLUSH;
+	lc->io_req.bi_op_flags = REQ_PREFLUSH;
 
 	return dm_io(&lc->io_req, 1, &null_location, NULL);
 }
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index bdf1606f67bc..1a176d7c8b90 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -261,7 +261,7 @@ static int mirror_flush(struct dm_target *ti)
 	struct mirror *m;
 	struct dm_io_request io_req = {
 		.bi_op = REQ_OP_WRITE,
-		.bi_op_flags = WRITE_FLUSH,
+		.bi_op_flags = REQ_PREFLUSH,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = ms->io_client,
@@ -657,7 +657,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
 	struct mirror *m;
 	struct dm_io_request io_req = {
 		.bi_op = REQ_OP_WRITE,
-		.bi_op_flags = bio->bi_opf & WRITE_FLUSH_FUA,
+		.bi_op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH),
 		.mem.type = DM_IO_BIO,
 		.mem.ptr.bio = bio,
 		.notify.fn = write_callback,
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index b8cf956b577b..b93476c3ba3f 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -741,7 +741,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
 	/*
 	 * Commit exceptions to disk.
 	 */
-	if (ps->valid && area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA))
+	if (ps->valid && area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA))
 		ps->valid = 0;
 
 	/*
@@ -818,7 +818,7 @@ static int persistent_commit_merge(struct dm_exception_store *store,
 	for (i = 0; i < nr_merged; i++)
 		clear_exception(ps, ps->current_committed - 1 - i);
 
-	r = area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA);
+	r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA);
 	if (r < 0)
 		return r;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 147af9536d0c..b2abfa41af3e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1527,7 +1527,7 @@ static struct mapped_device *alloc_dev(int minor)
 
 	bio_init(&md->flush_bio);
 	md->flush_bio.bi_bdev = md->bdev;
-	bio_set_op_attrs(&md->flush_bio, REQ_OP_WRITE, WRITE_FLUSH);
+	md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 
 	dm_stats_init(&md->stats);
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index eac84d8ff724..b69ec7da4bae 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -394,7 +394,7 @@ static void submit_flushes(struct work_struct *ws)
 			bi->bi_end_io = md_end_flush;
 			bi->bi_private = rdev;
 			bi->bi_bdev = rdev->bdev;
-			bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH);
+			bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 			atomic_inc(&mddev->flush_pending);
 			submit_bio(bi);
 			rcu_read_lock();
@@ -743,7 +743,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
 	bio_add_page(bio, page, size, 0);
 	bio->bi_private = rdev;
 	bio->bi_end_io = super_written;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA);
+	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA;
 
 	atomic_inc(&mddev->pending_writes);
 	submit_bio(bio);
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 1b1ab4a1d132..28d015c6fffe 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -685,7 +685,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
 	bio_reset(&log->flush_bio);
 	log->flush_bio.bi_bdev = log->rdev->bdev;
 	log->flush_bio.bi_end_io = r5l_log_flush_endio;
-	bio_set_op_attrs(&log->flush_bio, REQ_OP_WRITE, WRITE_FLUSH);
+	log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 	submit_bio(&log->flush_bio);
 }
 
@@ -1053,7 +1053,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
 	mb->checksum = cpu_to_le32(crc);
 
 	if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE,
-			  WRITE_FUA, false)) {
+			  REQ_FUA, false)) {
 		__free_page(page);
 		return -EIO;
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 92ac251e91e6..70acdd379e44 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -913,7 +913,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 		if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
 			op = REQ_OP_WRITE;
 			if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
-				op_flags = WRITE_FUA;
+				op_flags = REQ_FUA;
 			if (test_bit(R5_Discard, &sh->dev[i].flags))
 				op = REQ_OP_DISCARD;
 		} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 4a96c2049b7b..c2784cfc5e29 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -58,7 +58,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
 
 	if (req->cmd->rw.opcode == nvme_cmd_write) {
 		op = REQ_OP_WRITE;
-		op_flags = WRITE_ODIRECT;
+		op_flags = REQ_SYNC | REQ_IDLE;
 		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
 			op_flags |= REQ_FUA;
 	} else {
@@ -109,7 +109,7 @@ static void nvmet_execute_flush(struct nvmet_req *req)
 	bio->bi_bdev = req->ns->bdev;
 	bio->bi_private = req;
 	bio->bi_end_io = nvmet_bio_done;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 
 	submit_bio(bio);
 }
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 372d744315f3..d316ed537d59 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -388,7 +388,7 @@ iblock_execute_sync_cache(struct se_cmd *cmd)
 	bio = bio_alloc(GFP_KERNEL, 0);
 	bio->bi_end_io = iblock_end_io_flush;
 	bio->bi_bdev = ib_dev->ibd_bd;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 	if (!immed)
 		bio->bi_private = cmd;
 	submit_bio(bio);
@@ -686,15 +686,15 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
 		struct request_queue *q = bdev_get_queue(ib_dev->ibd_bd);
 		/*
-		 * Force writethrough using WRITE_FUA if a volatile write cache
+		 * Force writethrough using REQ_FUA if a volatile write cache
 		 * is not enabled, or if initiator set the Force Unit Access bit.
 		 */
 		op = REQ_OP_WRITE;
 		if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) {
 			if (cmd->se_cmd_flags & SCF_FUA)
-				op_flags = WRITE_FUA;
+				op_flags = REQ_FUA;
 			else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
-				op_flags = WRITE_FUA;
+				op_flags = REQ_FUA;
 		}
 	} else {
 		op = REQ_OP_READ;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c8454a8e35f2..fe10afd51e02 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3485,9 +3485,9 @@ static int write_dev_supers(struct btrfs_device *device,
 		 * to go down lazy.
 		 */
 		if (i == 0)
-			ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh);
+			ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh);
 		else
-			ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
+			ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
 		if (ret)
 			errors++;
 	}
@@ -3551,7 +3551,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 
 	bio->bi_end_io = btrfs_end_empty_barrier;
 	bio->bi_bdev = device->bdev;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 	init_completion(&device->flush_wait);
 	bio->bi_private = &device->flush_wait;
 	device->flush_bio = bio;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 66a755150056..ff87bff7bdb6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -127,7 +127,7 @@ struct extent_page_data {
 	 */
 	unsigned int extent_locked:1;
 
-	/* tells the submit_bio code to use a WRITE_SYNC */
+	/* tells the submit_bio code to use REQ_SYNC */
 	unsigned int sync_io:1;
 };
 
@@ -2047,7 +2047,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
 		return -EIO;
 	}
 	bio->bi_bdev = dev->bdev;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
+	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
 	bio_add_page(bio, page, length, pg_offset);
 
 	if (btrfsic_submit_bio_wait(bio)) {
@@ -2388,7 +2388,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	struct inode *inode = page->mapping->host;
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct bio *bio;
-	int read_mode;
+	int read_mode = 0;
 	int ret;
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2404,9 +2404,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	}
 
 	if (failed_bio->bi_vcnt > 1)
-		read_mode = READ_SYNC | REQ_FAILFAST_DEV;
-	else
-		read_mode = READ_SYNC;
+		read_mode |= REQ_FAILFAST_DEV;
 
 	phy_offset >>= inode->i_sb->s_blocksize_bits;
 	bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
@@ -3484,7 +3482,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 	unsigned long nr_written = 0;
 
 	if (wbc->sync_mode == WB_SYNC_ALL)
-		write_flags = WRITE_SYNC;
+		write_flags = REQ_SYNC;
 
 	trace___extent_writepage(page, inode, wbc);
 
@@ -3729,7 +3727,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 	unsigned long i, num_pages;
 	unsigned long bio_flags = 0;
 	unsigned long start, end;
-	int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META;
+	int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META;
 	int ret = 0;
 
 	clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -4076,7 +4074,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
 		int ret;
 
 		bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
-				 epd->sync_io ? WRITE_SYNC : 0);
+				 epd->sync_io ? REQ_SYNC : 0);
 
 		ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
 		BUG_ON(ret < 0); /* -ENOMEM */
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9a377079af26..c8eb82a416b3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7917,7 +7917,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 	struct io_failure_record *failrec;
 	struct bio *bio;
 	int isector;
-	int read_mode;
+	int read_mode = 0;
 	int ret;
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -7936,9 +7936,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 	if ((failed_bio->bi_vcnt > 1)
 		|| (failed_bio->bi_io_vec->bv_len
 			> BTRFS_I(inode)->root->sectorsize))
-		read_mode = READ_SYNC | REQ_FAILFAST_DEV;
-	else
-		read_mode = READ_SYNC;
+		read_mode |= REQ_FAILFAST_DEV;
 
 	isector = start - btrfs_io_bio(failed_bio)->logical;
 	isector >>= inode->i_sb->s_blocksize_bits;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index fffb9ab8526e..ff3078234d94 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4440,7 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
 	bio->bi_iter.bi_size = 0;
 	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
 	bio->bi_bdev = dev->bdev;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
+	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
 	ret = bio_add_page(bio, page, PAGE_SIZE, 0);
 	if (ret != PAGE_SIZE) {
 leave_with_eio:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index deda46cf1292..0d7d635d8bfb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6023,7 +6023,7 @@ static void btrfs_end_bio(struct bio *bio)
 				else
 					btrfs_dev_stat_inc(dev,
 						BTRFS_DEV_STAT_READ_ERRS);
-				if ((bio->bi_opf & WRITE_FLUSH) == WRITE_FLUSH)
+				if (bio->bi_opf & REQ_PREFLUSH)
 					btrfs_dev_stat_inc(dev,
 						BTRFS_DEV_STAT_FLUSH_ERRS);
 				btrfs_dev_stat_print_on_error(dev);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 09ed29c67848..f137ffe6654c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -62,7 +62,7 @@ struct btrfs_device {
 	int running_pending;
 	/* regular prio bios */
 	struct btrfs_pending_bios pending_bios;
-	/* WRITE_SYNC bios */
+	/* sync bios */
 	struct btrfs_pending_bios pending_sync_bios;
 
 	struct block_device *bdev;
diff --git a/fs/buffer.c b/fs/buffer.c
index a29335867e30..bc7c2bb30a9b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -753,7 +753,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 				 * still in flight on potentially older
 				 * contents.
 				 */
-				write_dirty_buffer(bh, WRITE_SYNC);
+				write_dirty_buffer(bh, REQ_SYNC);
 
 				/*
 				 * Kick off IO for the previous mapping. Note
@@ -1684,7 +1684,7 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode *
  * prevents this contention from occurring.
  *
  * If block_write_full_page() is called with wbc->sync_mode ==
- * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
+ * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
  * causes the writes to be flagged as synchronous writes.
  */
 int __block_write_full_page(struct inode *inode, struct page *page,
@@ -1697,7 +1697,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	struct buffer_head *bh, *head;
 	unsigned int blocksize, bbits;
 	int nr_underway = 0;
-	int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
+	int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0);
 
 	head = create_page_buffers(page, inode,
 					(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -3210,7 +3210,7 @@ EXPORT_SYMBOL(__sync_dirty_buffer);
 
 int sync_dirty_buffer(struct buffer_head *bh)
 {
-	return __sync_dirty_buffer(bh, WRITE_SYNC);
+	return __sync_dirty_buffer(bh, REQ_SYNC);
 }
 EXPORT_SYMBOL(sync_dirty_buffer);
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index fb9aa16a7727..a5138c564019 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1209,7 +1209,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	dio->inode = inode;
 	if (iov_iter_rw(iter) == WRITE) {
 		dio->op = REQ_OP_WRITE;
-		dio->op_flags = WRITE_ODIRECT;
+		dio->op_flags = REQ_SYNC | REQ_IDLE;
 	} else {
 		dio->op = REQ_OP_READ;
 	}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index d89754ef1aab..eb9835638680 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -35,7 +35,7 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
 }
 
 /*
- * Write the MMP block using WRITE_SYNC to try to get the block on-disk
+ * Write the MMP block using REQ_SYNC to try to get the block on-disk
  * faster.
  */
 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
@@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
 	lock_buffer(bh);
 	bh->b_end_io = end_buffer_write_sync;
 	get_bh(bh);
-	submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh);
+	submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh);
 	wait_on_buffer(bh);
 	sb_end_write(sb);
 	if (unlikely(!buffer_uptodate(bh)))
@@ -88,7 +88,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
 	get_bh(*bh);
 	lock_buffer(*bh);
 	(*bh)->b_end_io = end_buffer_read_sync;
-	submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh);
+	submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, *bh);
 	wait_on_buffer(*bh);
 	if (!buffer_uptodate(*bh)) {
 		ret = -EIO;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 0094923e5ebf..e0b3b54cdef3 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -340,7 +340,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
 
 	if (bio) {
 		int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
-				  WRITE_SYNC : 0;
+				  REQ_SYNC : 0;
 		bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
 		submit_bio(io->io_bio);
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6db81fbcbaa6..f31eb286af90 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4553,7 +4553,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	unlock_buffer(sbh);
 	if (sync) {
 		error = __sync_dirty_buffer(sbh,
-			test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
+			test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC);
 		if (error)
 			return error;
 
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 7e9b504bd8b2..d935c06a84f0 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -65,7 +65,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
 		.sbi = sbi,
 		.type = META,
 		.op = REQ_OP_READ,
-		.op_flags = READ_SYNC | REQ_META | REQ_PRIO,
+		.op_flags = REQ_META | REQ_PRIO,
 		.old_blkaddr = index,
 		.new_blkaddr = index,
 		.encrypted_page = NULL,
@@ -160,7 +160,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
 		.sbi = sbi,
 		.type = META,
 		.op = REQ_OP_READ,
-		.op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
+		.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
 		.encrypted_page = NULL,
 	};
 	struct blk_plug plug;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9ae194fd2fdb..b80bf10603d7 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -198,11 +198,9 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
 	if (type >= META_FLUSH) {
 		io->fio.type = META_FLUSH;
 		io->fio.op = REQ_OP_WRITE;
-		if (test_opt(sbi, NOBARRIER))
-			io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO;
-		else
-			io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META |
-								REQ_PRIO;
+		io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO;
+		if (!test_opt(sbi, NOBARRIER))
+			io->fio.op_flags |= REQ_FUA;
 	}
 	__submit_merged_bio(io);
 out:
@@ -483,7 +481,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
 		return page;
 	f2fs_put_page(page, 0);
 
-	page = get_read_data_page(inode, index, READ_SYNC, false);
+	page = get_read_data_page(inode, index, 0, false);
 	if (IS_ERR(page))
 		return page;
 
@@ -509,7 +507,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
 repeat:
-	page = get_read_data_page(inode, index, READ_SYNC, for_write);
+	page = get_read_data_page(inode, index, 0, for_write);
 	if (IS_ERR(page))
 		return page;
 
@@ -1251,7 +1249,7 @@ static int f2fs_write_data_page(struct page *page,
 		.sbi = sbi,
 		.type = DATA,
 		.op = REQ_OP_WRITE,
-		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
+		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0,
 		.page = page,
 		.encrypted_page = NULL,
 	};
@@ -1663,7 +1661,7 @@ repeat:
 			err = PTR_ERR(bio);
 			goto fail;
 		}
-		bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
+		bio->bi_opf = REQ_OP_READ;
 		if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 			bio_put(bio);
 			err = -EFAULT;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 93985c64d8a8..9eb11b2244ea 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -550,7 +550,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
 		.sbi = F2FS_I_SB(inode),
 		.type = DATA,
 		.op = REQ_OP_READ,
-		.op_flags = READ_SYNC,
+		.op_flags = 0,
 		.encrypted_page = NULL,
 	};
 	struct dnode_of_data dn;
@@ -625,7 +625,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
 	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
 
 	fio.op = REQ_OP_WRITE;
-	fio.op_flags = WRITE_SYNC;
+	fio.op_flags = REQ_SYNC;
 	fio.new_blkaddr = newaddr;
 	f2fs_submit_page_mbio(&fio);
 
@@ -663,7 +663,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
 			.sbi = F2FS_I_SB(inode),
 			.type = DATA,
 			.op = REQ_OP_WRITE,
-			.op_flags = WRITE_SYNC,
+			.op_flags = REQ_SYNC,
 			.page = page,
 			.encrypted_page = NULL,
 		};
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 5f1a67f756af..2e7f54c191b4 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -111,7 +111,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
 		.sbi = F2FS_I_SB(dn->inode),
 		.type = DATA,
 		.op = REQ_OP_WRITE,
-		.op_flags = WRITE_SYNC | REQ_PRIO,
+		.op_flags = REQ_SYNC | REQ_PRIO,
 		.page = page,
 		.encrypted_page = NULL,
 	};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 01177ecdeab8..932f3f8bb57b 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1134,7 +1134,7 @@ repeat:
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
-	err = read_node_page(page, READ_SYNC);
+	err = read_node_page(page, 0);
 	if (err < 0) {
 		f2fs_put_page(page, 1);
 		return ERR_PTR(err);
@@ -1570,7 +1570,7 @@ static int f2fs_write_node_page(struct page *page,
 		.sbi = sbi,
 		.type = NODE,
 		.op = REQ_OP_WRITE,
-		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
+		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0,
 		.page = page,
 		.encrypted_page = NULL,
 	};
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index fc886f008449..f1b4a1775ebe 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -259,7 +259,7 @@ static int __commit_inmem_pages(struct inode *inode,
 		.sbi = sbi,
 		.type = DATA,
 		.op = REQ_OP_WRITE,
-		.op_flags = WRITE_SYNC | REQ_PRIO,
+		.op_flags = REQ_SYNC | REQ_PRIO,
 		.encrypted_page = NULL,
 	};
 	bool submit_bio = false;
@@ -420,7 +420,7 @@ repeat:
 		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
 
 		bio->bi_bdev = sbi->sb->s_bdev;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+		bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 		ret = submit_bio_wait(bio);
 
 		llist_for_each_entry_safe(cmd, next,
@@ -454,7 +454,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
 
 		atomic_inc(&fcc->submit_flush);
 		bio->bi_bdev = sbi->sb->s_bdev;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+		bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 		ret = submit_bio_wait(bio);
 		atomic_dec(&fcc->submit_flush);
 		bio_put(bio);
@@ -1515,7 +1515,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
 		.sbi = sbi,
 		.type = META,
 		.op = REQ_OP_WRITE,
-		.op_flags = WRITE_SYNC | REQ_META | REQ_PRIO,
+		.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
 		.old_blkaddr = page->index,
 		.new_blkaddr = page->index,
 		.page = page,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 6132b4ce4e4c..2cac6bb86080 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1238,7 +1238,7 @@ static int __f2fs_commit_super(struct buffer_head *bh,
 	unlock_buffer(bh);
 
 	/* it's rare case, we can do fua all the time */
-	return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
+	return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA);
 }
 
 static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e58ccef09c91..27c00a16def0 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -657,7 +657,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	struct gfs2_log_header *lh;
 	unsigned int tail;
 	u32 hash;
-	int op_flags = WRITE_FLUSH_FUA | REQ_META;
+	int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META;
 	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 	lh = page_address(page);
@@ -682,7 +682,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
 		gfs2_ordered_wait(sdp);
 		log_flush_wait(sdp);
-		op_flags = WRITE_SYNC | REQ_META | REQ_PRIO;
+		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
 	}
 
 	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 373639a59782..e562b1191c9c 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -38,7 +38,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 	struct buffer_head *bh, *head;
 	int nr_underway = 0;
 	int write_flags = REQ_META | REQ_PRIO |
-		(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
+		(wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0);
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(!page_has_buffers(page));
@@ -285,7 +285,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 		}
 	}
 
-	gfs2_submit_bhs(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
+	gfs2_submit_bhs(REQ_OP_READ, REQ_META | REQ_PRIO, bhs, num);
 	if (!(flags & DIO_WAIT))
 		return 0;
 
@@ -453,7 +453,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 	if (buffer_uptodate(first_bh))
 		goto out;
 	if (!buffer_locked(first_bh))
-		ll_rw_block(REQ_OP_READ, READ_SYNC | REQ_META, 1, &first_bh);
+		ll_rw_block(REQ_OP_READ, REQ_META, 1, &first_bh);
 
 	dblock++;
 	extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ff72ac6439c8..a34308df927f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -246,7 +246,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
 
 	bio->bi_end_io = end_bio_io_page;
 	bio->bi_private = page;
-	bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC | REQ_META);
+	bio_set_op_attrs(bio, REQ_OP_READ, REQ_META);
 	submit_bio(bio);
 	wait_on_page_locked(page);
 	bio_put(bio);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 11854dd84572..67aedf4c2e7c 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -221,7 +221,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
 	error2 = hfsplus_submit_bio(sb,
 				   sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
 				   sbi->s_vhdr_buf, NULL, REQ_OP_WRITE,
-				   WRITE_SYNC);
+				   REQ_SYNC);
 	if (!error)
 		error = error2;
 	if (!write_backup)
@@ -230,7 +230,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
 	error2 = hfsplus_submit_bio(sb,
 				  sbi->part_start + sbi->sect_count - 2,
 				  sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE,
-				  WRITE_SYNC);
+				  REQ_SYNC);
 	if (!error)
 		error2 = error;
 out:
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 684996c8a3a4..4055f51617ef 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -186,7 +186,7 @@ __flush_batch(journal_t *journal, int *batch_count)
 
 	blk_start_plug(&plug);
 	for (i = 0; i < *batch_count; i++)
-		write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC);
+		write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC);
 	blk_finish_plug(&plug);
 
 	for (i = 0; i < *batch_count; i++) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 31f8ca046639..8c514367ba5a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -155,9 +155,10 @@ static int journal_submit_commit_record(journal_t *journal,
 
 	if (journal->j_flags & JBD2_BARRIER &&
 	    !jbd2_has_feature_async_commit(journal))
-		ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC | WRITE_FLUSH_FUA, bh);
+		ret = submit_bh(REQ_OP_WRITE,
+			REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh);
 	else
-		ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
+		ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
 
 	*cbh = bh;
 	return ret;
@@ -402,7 +403,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 		jbd2_journal_update_sb_log_tail(journal,
 						journal->j_tail_sequence,
 						journal->j_tail,
-						WRITE_SYNC);
+						REQ_SYNC);
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	} else {
 		jbd_debug(3, "superblock not updated\n");
@@ -717,7 +718,7 @@ start_journal_io:
 				clear_buffer_dirty(bh);
 				set_buffer_uptodate(bh);
 				bh->b_end_io = journal_end_buffer_io_sync;
-				submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
+				submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
 			}
 			cond_resched();
 			stats.run.rs_blocks_logged += bufs;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 927da4956a89..8ed971eeab44 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -913,7 +913,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
 	 * space and if we lose sb update during power failure we'd replay
 	 * old transaction with possibly newly overwritten data.
 	 */
-	ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+	ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA);
 	if (ret)
 		goto out;
 
@@ -1306,7 +1306,7 @@ static int journal_reset(journal_t *journal)
 		/* Lock here to make assertions happy... */
 		mutex_lock(&journal->j_checkpoint_mutex);
 		/*
-		 * Update log tail information. We use WRITE_FUA since new
+		 * Update log tail information. We use REQ_FUA since new
 		 * transaction will start reusing journal space and so we
 		 * must make sure information about current log tail is on
 		 * disk before that.
@@ -1314,7 +1314,7 @@ static int journal_reset(journal_t *journal)
 		jbd2_journal_update_sb_log_tail(journal,
 						journal->j_tail_sequence,
 						journal->j_tail,
-						WRITE_FUA);
+						REQ_FUA);
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	}
 	return jbd2_journal_start_thread(journal);
@@ -1454,7 +1454,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
 	sb->s_errno    = cpu_to_be32(journal->j_errno);
 	read_unlock(&journal->j_state_lock);
 
-	jbd2_write_superblock(journal, WRITE_FUA);
+	jbd2_write_superblock(journal, REQ_FUA);
 }
 EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
 
@@ -1720,7 +1720,8 @@ int jbd2_journal_destroy(journal_t *journal)
 				++journal->j_transaction_sequence;
 			write_unlock(&journal->j_state_lock);
 
-			jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA);
+			jbd2_mark_journal_empty(journal,
+					REQ_PREFLUSH | REQ_FUA);
 			mutex_unlock(&journal->j_checkpoint_mutex);
 		} else
 			err = -EIO;
@@ -1979,7 +1980,7 @@ int jbd2_journal_flush(journal_t *journal)
 	 * the magic code for a fully-recovered superblock.  Any future
 	 * commits of data to the journal will restore the current
 	 * s_start value. */
-	jbd2_mark_journal_empty(journal, WRITE_FUA);
+	jbd2_mark_journal_empty(journal, REQ_FUA);
 	mutex_unlock(&journal->j_checkpoint_mutex);
 	write_lock(&journal->j_state_lock);
 	J_ASSERT(!journal->j_running_transaction);
@@ -2025,7 +2026,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
 	if (write) {
 		/* Lock to make assertions happy... */
 		mutex_lock(&journal->j_checkpoint_mutex);
-		jbd2_mark_journal_empty(journal, WRITE_FUA);
+		jbd2_mark_journal_empty(journal, REQ_FUA);
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	}
 
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 91171dc352cb..cfc38b552118 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -648,7 +648,7 @@ static void flush_descriptor(journal_t *journal,
 	set_buffer_jwrite(descriptor);
 	BUFFER_TRACE(descriptor, "write");
 	set_buffer_dirty(descriptor);
-	write_dirty_buffer(descriptor, WRITE_SYNC);
+	write_dirty_buffer(descriptor, REQ_SYNC);
 }
 #endif
 
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index a21ea8b3e5fa..bb1da1feafeb 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2002,7 +2002,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
 
 	bio->bi_end_io = lbmIODone;
 	bio->bi_private = bp;
-	bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
+	bio->bi_opf = REQ_OP_READ;
 	/*check if journaling to disk has been disabled*/
 	if (log->no_integrity) {
 		bio->bi_iter.bi_size = 0;
@@ -2146,7 +2146,7 @@ static void lbmStartIO(struct lbuf * bp)
 
 	bio->bi_end_io = lbmIODone;
 	bio->bi_private = bp;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
+	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
 
 	/* check if journaling to disk has been disabled */
 	if (log->no_integrity) {
diff --git a/fs/mpage.c b/fs/mpage.c
index d2413af0823a..f35e2819d0c6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 	struct buffer_head map_bh;
 	loff_t i_size = i_size_read(inode);
 	int ret = 0;
-	int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : 0);
+	int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0);
 
 	if (page_has_buffers(page)) {
 		struct buffer_head *head = page_buffers(page);
@@ -705,7 +705,7 @@ mpage_writepages(struct address_space *mapping,
 		ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
 		if (mpd.bio) {
 			int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
-				  WRITE_SYNC : 0);
+				  REQ_SYNC : 0);
 			mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
 		}
 	}
@@ -726,7 +726,7 @@ int mpage_writepage(struct page *page, get_block_t get_block,
 	int ret = __mpage_writepage(page, wbc, &mpd);
 	if (mpd.bio) {
 		int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
-			  WRITE_SYNC : 0);
+			  REQ_SYNC : 0);
 		mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
 	}
 	return ret;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index c95d369e90aa..12eeae62a2b1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -189,7 +189,7 @@ static int nilfs_sync_super(struct super_block *sb, int flag)
 	set_buffer_dirty(nilfs->ns_sbh[0]);
 	if (nilfs_test_opt(nilfs, BARRIER)) {
 		err = __sync_dirty_buffer(nilfs->ns_sbh[0],
-					  WRITE_SYNC | WRITE_FLUSH_FUA);
+					  REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
 	} else {
 		err = sync_dirty_buffer(nilfs->ns_sbh[0]);
 	}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 636abcbd4650..52eef16edb01 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -627,7 +627,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg,
 	slot = o2nm_this_node();
 
 	bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE,
-				 WRITE_SYNC);
+				 REQ_SYNC);
 	if (IS_ERR(bio)) {
 		status = PTR_ERR(bio);
 		mlog_errno(status);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index bc2dde2423c2..aa40c242f1db 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1111,7 +1111,8 @@ static int flush_commit_list(struct super_block *s,
 		mark_buffer_dirty(jl->j_commit_bh) ;
 		depth = reiserfs_write_unlock_nested(s);
 		if (reiserfs_barrier_flush(s))
-			__sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA);
+			__sync_dirty_buffer(jl->j_commit_bh,
+					REQ_PREFLUSH | REQ_FUA);
 		else
 			sync_dirty_buffer(jl->j_commit_bh);
 		reiserfs_write_lock_nested(s, depth);
@@ -1269,7 +1270,8 @@ static int _update_journal_header_block(struct super_block *sb,
 		depth = reiserfs_write_unlock_nested(sb);
 
 		if (reiserfs_barrier_flush(sb))
-			__sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA);
+			__sync_dirty_buffer(journal->j_header_bh,
+					REQ_PREFLUSH | REQ_FUA);
 		else
 			sync_dirty_buffer(journal->j_header_bh);
 
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3e57a56cf829..594e02c485b2 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -495,8 +495,10 @@ xfs_submit_ioend(
 
 	ioend->io_bio->bi_private = ioend;
 	ioend->io_bio->bi_end_io = xfs_end_bio;
-	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
-			 (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+	ioend->io_bio->bi_opf = REQ_OP_WRITE;
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		ioend->io_bio->bi_opf |= REQ_SYNC;
+
 	/*
 	 * If we are failing the IO now, just mark the ioend with an
 	 * error and finish it. This will run IO completion immediately
@@ -567,8 +569,9 @@ xfs_chain_bio(
 
 	bio_chain(ioend->io_bio, new);
 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
-	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
-			  (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+	ioend->io_bio->bi_opf = REQ_OP_WRITE;
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		ioend->io_bio->bi_opf |= REQ_SYNC;
 	submit_bio(ioend->io_bio);
 	ioend->io_bio = new;
 }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index b5b9bffe3520..33c435f3316c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1304,7 +1304,7 @@ _xfs_buf_ioapply(
 	if (bp->b_flags & XBF_WRITE) {
 		op = REQ_OP_WRITE;
 		if (bp->b_flags & XBF_SYNCIO)
-			op_flags = WRITE_SYNC;
+			op_flags = REQ_SYNC;
 		if (bp->b_flags & XBF_FUA)
 			op_flags |= REQ_FUA;
 		if (bp->b_flags & XBF_FLUSH)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 46a74209917f..7a1b78ab7c15 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -151,58 +151,11 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
  */
 #define CHECK_IOVEC_ONLY -1
 
-/*
- * The below are the various read and write flags that we support. Some of
- * them include behavioral modifiers that send information down to the
- * block layer and IO scheduler. They should be used along with a req_op.
- * Terminology:
- *
- *	The block layer uses device plugging to defer IO a little bit, in
- *	the hope that we will see more IO very shortly. This increases
- *	coalescing of adjacent IO and thus reduces the number of IOs we
- *	have to send to the device. It also allows for better queuing,
- *	if the IO isn't mergeable. If the caller is going to be waiting
- *	for the IO, then he must ensure that the device is unplugged so
- *	that the IO is dispatched to the driver.
- *
- *	All IO is handled async in Linux. This is fine for background
- *	writes, but for reads or writes that someone waits for completion
- *	on, we want to notify the block layer and IO scheduler so that they
- *	know about it. That allows them to make better scheduling
- *	decisions. So when the below references 'sync' and 'async', it
- *	is referencing this priority hint.
- *
- * With that in mind, the available types are:
- *
- * READ			A normal read operation. Device will be plugged.
- * READ_SYNC		A synchronous read. Device is not plugged, caller can
- *			immediately wait on this read without caring about
- *			unplugging.
- * WRITE		A normal async write. Device will be plugged.
- * WRITE_SYNC		Synchronous write. Identical to WRITE, but passes down
- *			the hint that someone will be waiting on this IO
- *			shortly. The write equivalent of READ_SYNC.
- * WRITE_ODIRECT	Special case write for O_DIRECT only.
- * WRITE_FLUSH		Like WRITE_SYNC but with preceding cache flush.
- * WRITE_FUA		Like WRITE_SYNC but data is guaranteed to be on
- *			non-volatile media on completion.
- * WRITE_FLUSH_FUA	Combination of WRITE_FLUSH and FUA. The IO is preceded
- *			by a cache flush and data is guaranteed to be on
- *			non-volatile media on completion.
- *
- */
 #define RW_MASK			REQ_OP_WRITE
 
 #define READ			REQ_OP_READ
 #define WRITE			REQ_OP_WRITE
 
-#define READ_SYNC		0
-#define WRITE_SYNC		REQ_SYNC
-#define WRITE_ODIRECT		(REQ_SYNC | REQ_IDLE)
-#define WRITE_FLUSH		REQ_PREFLUSH
-#define WRITE_FUA		REQ_FUA
-#define WRITE_FLUSH_FUA		(REQ_PREFLUSH | REQ_FUA)
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index a9d34424450d..5da2c829a718 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -55,7 +55,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 		{ IPU,		"IN-PLACE" },				\
 		{ OPU,		"OUT-OF-PLACE" })
 
-#define F2FS_BIO_FLAG_MASK(t)	(t & (REQ_RAHEAD | WRITE_FLUSH_FUA))
+#define F2FS_BIO_FLAG_MASK(t)	(t & (REQ_RAHEAD | REQ_PREFLUSH | REQ_FUA))
 #define F2FS_BIO_EXTRA_MASK(t)	(t & (REQ_META | REQ_PRIO))
 
 #define show_bio_type(op_flags)	show_bio_op_flags(op_flags), 		\
@@ -65,11 +65,9 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 	__print_symbolic(F2FS_BIO_FLAG_MASK(flags),			\
 		{ 0,			"WRITE" },			\
 		{ REQ_RAHEAD, 		"READAHEAD" },			\
-		{ READ_SYNC, 		"READ_SYNC" },			\
-		{ WRITE_SYNC, 		"WRITE_SYNC" },			\
-		{ WRITE_FLUSH,		"WRITE_FLUSH" },		\
-		{ WRITE_FUA, 		"WRITE_FUA" },			\
-		{ WRITE_FLUSH_FUA,	"WRITE_FLUSH_FUA" })
+		{ REQ_SYNC, 		"REQ_SYNC" },			\
+		{ REQ_PREFLUSH,		"REQ_PREFLUSH" },		\
+		{ REQ_FUA,		"REQ_FUA" })
 
 #define show_bio_extra(type)						\
 	__print_symbolic(F2FS_BIO_EXTRA_MASK(type),			\
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index a3b1e617bcdc..32e0c232efba 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -307,7 +307,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
 {
 	int error;
 
-	hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block,
+	hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block,
 		      swsusp_header, NULL);
 	if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
 	    !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
@@ -317,7 +317,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
 		swsusp_header->flags = flags;
 		if (flags & SF_CRC32_MODE)
 			swsusp_header->crc32 = handle->crc32;
-		error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC,
+		error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC,
 				      swsusp_resume_block, swsusp_header, NULL);
 	} else {
 		printk(KERN_ERR "PM: Swap header not found!\n");
@@ -397,7 +397,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
 	} else {
 		src = buf;
 	}
-	return hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, offset, src, hb);
+	return hib_submit_io(REQ_OP_WRITE, REQ_SYNC, offset, src, hb);
 }
 
 static void release_swap_writer(struct swap_map_handle *handle)
@@ -1000,8 +1000,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
 			return -ENOMEM;
 		}
 
-		error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset,
-				      tmp->map, NULL);
+		error = hib_submit_io(REQ_OP_READ, 0, offset, tmp->map, NULL);
 		if (error) {
 			release_swap_reader(handle);
 			return error;
@@ -1025,7 +1024,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
 	offset = handle->cur->entries[handle->k];
 	if (!offset)
 		return -EFAULT;
-	error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset, buf, hb);
+	error = hib_submit_io(REQ_OP_READ, 0, offset, buf, hb);
 	if (error)
 		return error;
 	if (++handle->k >= MAP_PAGE_ENTRIES) {
@@ -1534,7 +1533,7 @@ int swsusp_check(void)
 	if (!IS_ERR(hib_resume_bdev)) {
 		set_blocksize(hib_resume_bdev, PAGE_SIZE);
 		clear_page(swsusp_header);
-		error = hib_submit_io(REQ_OP_READ, READ_SYNC,
+		error = hib_submit_io(REQ_OP_READ, 0,
 					swsusp_resume_block,
 					swsusp_header, NULL);
 		if (error)
@@ -1543,7 +1542,7 @@ int swsusp_check(void)
 		if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
 			memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
 			/* Reset swap signature now */
-			error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC,
+			error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC,
 						swsusp_resume_block,
 						swsusp_header, NULL);
 		} else {
@@ -1588,11 +1587,11 @@ int swsusp_unmark(void)
 {
 	int error;
 
-	hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block,
+	hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block,
 		      swsusp_header, NULL);
 	if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
 		memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
-		error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC,
+		error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC,
 					swsusp_resume_block,
 					swsusp_header, NULL);
 	} else {
-- 
cgit v1.2.3


From d38499530e5f170d30f32d3841fade204e63081d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:11 -0600
Subject: fs: decouple READ and WRITE from the block layer ops

Move READ and WRITE to kernel.h and don't define them in terms of block
layer ops; they are our generic data direction indicators these days
and have no more resemblance with the block layer ops.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h    |  6 ++++++
 include/linux/fs.h     | 13 -------------
 include/linux/kernel.h |  4 ++++
 include/linux/uio.h    |  2 +-
 4 files changed, 11 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 87ce64dafb93..fe9a17017608 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -62,6 +62,12 @@
 #define bio_sectors(bio)	((bio)->bi_iter.bi_size >> 9)
 #define bio_end_sector(bio)	((bio)->bi_iter.bi_sector + bio_sectors((bio)))
 
+/*
+ * Return the data direction, READ or WRITE.
+ */
+#define bio_data_dir(bio) \
+	(op_is_write(bio_op(bio)) ? WRITE : READ)
+
 /*
  * Check whether this bio carries any data or not. A NULL bio is allowed.
  */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7a1b78ab7c15..0ad36e0c7fa7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -151,11 +151,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
  */
 #define CHECK_IOVEC_ONLY -1
 
-#define RW_MASK			REQ_OP_WRITE
-
-#define READ			REQ_OP_READ
-#define WRITE			REQ_OP_WRITE
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
@@ -2452,14 +2447,6 @@ extern void make_bad_inode(struct inode *);
 extern bool is_bad_inode(struct inode *);
 
 #ifdef CONFIG_BLOCK
-/*
- * return data direction, READ or WRITE
- */
-static inline int bio_data_dir(struct bio *bio)
-{
-	return op_is_write(bio_op(bio)) ? WRITE : READ;
-}
-
 extern void check_disk_size_change(struct gendisk *disk,
 				   struct block_device *bdev);
 extern int revalidate_disk(struct gendisk *);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index bc6ed52a39b9..01b6b460c34d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -50,6 +50,10 @@
 #define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
 #define IS_ALIGNED(x, a)		(((x) & ((typeof(x))(a) - 1)) == 0)
 
+/* generic data direction definitions */
+#define READ			0
+#define WRITE			1
+
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
 #define u64_to_user_ptr(x) (		\
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e22b544d039..d5aba1512b8b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -125,7 +125,7 @@ static inline bool iter_is_iovec(const struct iov_iter *i)
  *
  * The ?: is just for type safety.
  */
-#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK)
+#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE))
 
 /*
  * Cap the iov_iter by given limit; note that the second argument is
-- 
cgit v1.2.3


From 1e3914d4cf4e14653b7917b0e965217465cb7a9c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:12 -0600
Subject: block, fs: move submit_bio to bio.h

This is where all the other bio operations live, so users must include
bio.h anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 2 ++
 include/linux/fs.h  | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index fe9a17017608..5c604b4914bf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -404,6 +404,8 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
 
 }
 
+extern blk_qc_t submit_bio(struct bio *);
+
 extern void bio_endio(struct bio *);
 
 static inline void bio_io_error(struct bio *bio)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0ad36e0c7fa7..5b0a9b77534d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2717,7 +2717,6 @@ static inline void remove_inode_hash(struct inode *inode)
 extern void inode_sb_list_add(struct inode *inode);
 
 #ifdef CONFIG_BLOCK
-extern blk_qc_t submit_bio(struct bio *);
 extern int bdev_read_only(struct block_device *);
 #endif
 extern int set_blocksize(struct block_device *, int);
-- 
cgit v1.2.3


From 2f8b544477e627a42e66902e948d87f86554aeca Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:13 -0600
Subject: block,fs: untangle fs.h and blk_types.h

Nothing in fs.h should require blk_types.h to be included.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/9p/vfs_addr.c          | 1 +
 fs/cifs/connect.c         | 1 +
 fs/cifs/transport.c       | 1 +
 fs/gfs2/dir.c             | 1 +
 fs/isofs/compress.c       | 1 +
 fs/ntfs/logfile.c         | 1 +
 fs/ocfs2/buffer_head_io.c | 1 +
 fs/orangefs/inode.c       | 1 +
 fs/reiserfs/stree.c       | 1 +
 fs/squashfs/block.c       | 1 +
 fs/udf/dir.c              | 1 +
 fs/udf/directory.c        | 1 +
 fs/udf/inode.c            | 1 +
 fs/ufs/balloc.c           | 1 +
 include/linux/fs.h        | 2 +-
 include/linux/swap.h      | 1 +
 include/linux/writeback.h | 2 ++
 lib/iov_iter.c            | 1 +
 18 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6181ad79e1a5..5ca1fb0043f6 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -34,6 +34,7 @@
 #include <linux/idr.h>
 #include <linux/sched.h>
 #include <linux/uio.h>
+#include <linux/bvec.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index aab5227979e2..db726e8311ca 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -41,6 +41,7 @@
 #include <keys/user-type.h>
 #include <net/ipv6.h>
 #include <linux/parser.h>
+#include <linux/bvec.h>
 
 #include "cifspdu.h"
 #include "cifsglob.h"
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 206a597b2293..5f02edc819af 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -28,6 +28,7 @@
 #include <linux/delay.h>
 #include <linux/freezer.h>
 #include <linux/tcp.h>
+#include <linux/bvec.h>
 #include <linux/highmem.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 3cdde5f5d399..79113219be5f 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -62,6 +62,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/vmalloc.h>
+#include <linux/bio.h>
 
 #include "gfs2.h"
 #include "incore.h"
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 44af14b2e916..9bb2fe35799d 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -18,6 +18,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/bio.h>
 
 #include <linux/vmalloc.h>
 #include <linux/zlib.h>
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 761f12f7f3ef..353379ff6057 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -27,6 +27,7 @@
 #include <linux/buffer_head.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/bio.h>
 
 #include "attrib.h"
 #include "aops.h"
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 8f040f88ade4..d9ebe11c8990 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -26,6 +26,7 @@
 #include <linux/fs.h>
 #include <linux/types.h>
 #include <linux/highmem.h>
+#include <linux/bio.h>
 
 #include <cluster/masklog.h>
 
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index ef3b4eb54cf2..551bc74ed2b8 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -8,6 +8,7 @@
  *  Linux VFS inode operations.
  */
 
+#include <linux/bvec.h>
 #include "protocol.h"
 #include "orangefs-kernel.h"
 #include "orangefs-bufmap.h"
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index a97e352d05d3..0037aea97d39 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -11,6 +11,7 @@
 #include <linux/time.h>
 #include <linux/string.h>
 #include <linux/pagemap.h>
+#include <linux/bio.h>
 #include "reiserfs.h"
 #include <linux/buffer_head.h>
 #include <linux/quotaops.h>
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index ce62a380314f..2751476e6b6e 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/buffer_head.h>
+#include <linux/bio.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index aaec13c95253..2d0e028067eb 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,6 +30,7 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/bio.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 988d5352bdb8..7aa48bd7cbaf 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -16,6 +16,7 @@
 
 #include <linux/fs.h>
 #include <linux/string.h>
+#include <linux/bio.h>
 
 struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 					 struct udf_fileident_bh *fibh,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index aad46401ede5..0f3db71753aa 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -38,6 +38,7 @@
 #include <linux/crc-itu-t.h>
 #include <linux/mpage.h>
 #include <linux/uio.h>
+#include <linux/bio.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 67e085d591d8..b035af54f538 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -15,6 +15,7 @@
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
 #include <linux/bitops.h>
+#include <linux/bio.h>
 #include <asm/byteorder.h>
 
 #include "ufs_fs.h"
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5b0a9b77534d..8533e9d59c29 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -28,7 +28,6 @@
 #include <linux/uidgid.h>
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
-#include <linux/blk_types.h>
 #include <linux/workqueue.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/delayed_call.h>
@@ -38,6 +37,7 @@
 
 struct backing_dev_info;
 struct bdi_writeback;
+struct bio;
 struct export_operations;
 struct hd_geometry;
 struct iovec;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a56523cefb9b..3a6aebc23001 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/atomic.h>
 #include <linux/page-flags.h>
+#include <linux/blk_types.h>
 #include <asm/page.h>
 
 struct notifier_block;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 797100e10010..e4c38703bf4e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -10,6 +10,8 @@
 #include <linux/flex_proportions.h>
 #include <linux/backing-dev-defs.h>
 
+struct bio;
+
 DECLARE_PER_CPU(int, dirty_throttle_leaks);
 
 /*
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f0c7f1481bae..efc953c47572 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1,4 +1,5 @@
 #include <linux/export.h>
+#include <linux/bvec.h>
 #include <linux/uio.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
-- 
cgit v1.2.3


From 9f08217120568afdfb59973a89a675e649c0096d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:15 -0600
Subject: ceph: don't include blk_types.h in messenger.h

The file only needs the struct bvec_iter delcaration, which is available
from bvec.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/ceph/messenger.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 8dbd7879fdc6..67bcef2ecddb 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -1,7 +1,7 @@
 #ifndef __FS_CEPH_MESSENGER_H
 #define __FS_CEPH_MESSENGER_H
 
-#include <linux/blk_types.h>
+#include <linux/bvec.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/net.h>
-- 
cgit v1.2.3


From be297968da22cf40c9c419df51e71ba8856a2ec2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:16 -0600
Subject: mm: only include blk_types in swap.h if CONFIG_SWAP is enabled

It's only needed for the CONFIG_SWAP-only use of bio_end_io_t.

Because CONFIG_SWAP implies CONFIG_BLOCK this will allow to drop some
ifdefs in blk_types.h.

Instead we'll need to add a few explicit includes that were implicit
before, though.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/staging/lustre/include/linux/lnet/types.h | 1 +
 drivers/staging/lustre/lustre/llite/rw.c          | 1 +
 fs/ntfs/aops.c                                    | 1 +
 fs/ntfs/mft.c                                     | 1 +
 fs/reiserfs/inode.c                               | 1 +
 fs/splice.c                                       | 1 +
 include/linux/swap.h                              | 4 +++-
 7 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h
index f8be0e2f7bf7..8ca1e9d0cfe2 100644
--- a/drivers/staging/lustre/include/linux/lnet/types.h
+++ b/drivers/staging/lustre/include/linux/lnet/types.h
@@ -34,6 +34,7 @@
 #define __LNET_TYPES_H__
 
 #include <linux/types.h>
+#include <linux/bvec.h>
 
 /** \addtogroup lnet
  * @{
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 50c0152ba022..76a6836cdf70 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -47,6 +47,7 @@
 #include <linux/pagemap.h>
 /* current_is_kswapd() */
 #include <linux/swap.h>
+#include <linux/bvec.h>
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index fe251f187ff8..d0cf6fee5c77 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -29,6 +29,7 @@
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
 #include <linux/bit_spinlock.h>
+#include <linux/bio.h>
 
 #include "aops.h"
 #include "attrib.h"
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index d3c009626032..b6f402194f02 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -23,6 +23,7 @@
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
+#include <linux/bio.h>
 
 #include "attrib.h"
 #include "aops.h"
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 58b2dedb2a3a..cfeae9b0a2b7 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -19,6 +19,7 @@
 #include <linux/quotaops.h>
 #include <linux/swap.h>
 #include <linux/uio.h>
+#include <linux/bio.h>
 
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
diff --git a/fs/splice.c b/fs/splice.c
index 153d4f3bd441..51492f26915a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -17,6 +17,7 @@
  * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
  *
  */
+#include <linux/bvec.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/pagemap.h>
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3a6aebc23001..bfee1af1f54f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -11,7 +11,6 @@
 #include <linux/fs.h>
 #include <linux/atomic.h>
 #include <linux/page-flags.h>
-#include <linux/blk_types.h>
 #include <asm/page.h>
 
 struct notifier_block;
@@ -352,6 +351,9 @@ extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 
 #ifdef CONFIG_SWAP
+
+#include <linux/blk_types.h> /* for bio_end_io_t */
+
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-- 
cgit v1.2.3


From 7281b4526cefc898d180850b54d1369f38c6b202 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:17 -0600
Subject: block: remove the CONFIG_BLOCK ifdef in blk_types.h

Now that we have a separate header for struct bio_vec there is absolutely
no excuse for including this header from non-block I/O code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 63b750a3b165..bb921028e7c5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,7 +17,6 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 
-#ifdef CONFIG_BLOCK
 /*
  * main unit of I/O for the block layer and lower layers (ie drivers and
  * stacking drivers)
@@ -126,8 +125,6 @@ struct bio {
 #define BVEC_POOL_OFFSET	(16 - BVEC_POOL_BITS)
 #define BVEC_POOL_IDX(bio)	((bio)->bi_flags >> BVEC_POOL_OFFSET)
 
-#endif /* CONFIG_BLOCK */
-
 /*
  * Operations and flags common to the bio and request structures.
  * We use 8 bits for encoding the operation, and the remaining 24 for flags.
-- 
cgit v1.2.3


From 1d796d6a9641fbfcd90fcfaf6fb4894a13d0304f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 1 Nov 2016 09:52:57 -0600
Subject: block: add REQ_BACKGROUND

This adds a new request flag, REQ_BACKGROUND, that callers can use to
tell the block layer that this is background (non-urgent) IO.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/blk_types.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index bb921028e7c5..562ac46cb790 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -177,6 +177,7 @@ enum req_flag_bits {
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
+	__REQ_BACKGROUND,	/* background IO */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -192,6 +193,7 @@ enum req_flag_bits {
 #define REQ_FUA			(1ULL << __REQ_FUA)
 #define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
+#define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
-- 
cgit v1.2.3


From 7637241e651ec36e409412869f986dd5f097735f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 1 Nov 2016 10:00:38 -0600
Subject: writeback: add wbc_to_write_flags()

Add wbc_to_write_flags(), which returns the write modifier flags to use,
based on a struct writeback_control. No functional changes in this
patch, but it prepares us for factoring other wbc fields for write type.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/buffer.c               | 2 +-
 fs/f2fs/data.c            | 2 +-
 fs/f2fs/node.c            | 2 +-
 fs/gfs2/meta_io.c         | 3 +--
 fs/mpage.c                | 2 +-
 fs/xfs/xfs_aops.c         | 8 ++------
 include/linux/writeback.h | 9 +++++++++
 mm/page_io.c              | 5 +----
 8 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index bc7c2bb30a9b..af5776da814a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1697,7 +1697,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	struct buffer_head *bh, *head;
 	unsigned int blocksize, bbits;
 	int nr_underway = 0;
-	int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0);
+	int write_flags = wbc_to_write_flags(wbc);
 
 	head = create_page_buffers(page, inode,
 					(1 << BH_Dirty)|(1 << BH_Uptodate));
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b80bf10603d7..9e5561fa4cb6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1249,7 +1249,7 @@ static int f2fs_write_data_page(struct page *page,
 		.sbi = sbi,
 		.type = DATA,
 		.op = REQ_OP_WRITE,
-		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0,
+		.op_flags = wbc_to_write_flags(wbc),
 		.page = page,
 		.encrypted_page = NULL,
 	};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 932f3f8bb57b..d1e29deb4598 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1570,7 +1570,7 @@ static int f2fs_write_node_page(struct page *page,
 		.sbi = sbi,
 		.type = NODE,
 		.op = REQ_OP_WRITE,
-		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0,
+		.op_flags = wbc_to_write_flags(wbc),
 		.page = page,
 		.encrypted_page = NULL,
 	};
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index e562b1191c9c..49db8ef13fdf 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 {
 	struct buffer_head *bh, *head;
 	int nr_underway = 0;
-	int write_flags = REQ_META | REQ_PRIO |
-		(wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0);
+	int write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc);
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(!page_has_buffers(page));
diff --git a/fs/mpage.c b/fs/mpage.c
index f35e2819d0c6..98fc11aa7e0b 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 	struct buffer_head map_bh;
 	loff_t i_size = i_size_read(inode);
 	int ret = 0;
-	int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0);
+	int op_flags = wbc_to_write_flags(wbc);
 
 	if (page_has_buffers(page)) {
 		struct buffer_head *head = page_buffers(page);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 594e02c485b2..6be5204a06d3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -495,9 +495,7 @@ xfs_submit_ioend(
 
 	ioend->io_bio->bi_private = ioend;
 	ioend->io_bio->bi_end_io = xfs_end_bio;
-	ioend->io_bio->bi_opf = REQ_OP_WRITE;
-	if (wbc->sync_mode == WB_SYNC_ALL)
-		ioend->io_bio->bi_opf |= REQ_SYNC;
+	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
 
 	/*
 	 * If we are failing the IO now, just mark the ioend with an
@@ -569,9 +567,7 @@ xfs_chain_bio(
 
 	bio_chain(ioend->io_bio, new);
 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
-	ioend->io_bio->bi_opf = REQ_OP_WRITE;
-	if (wbc->sync_mode == WB_SYNC_ALL)
-		ioend->io_bio->bi_opf |= REQ_SYNC;
+	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
 	submit_bio(ioend->io_bio);
 	ioend->io_bio = new;
 }
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index e4c38703bf4e..50c96ee8108f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -9,6 +9,7 @@
 #include <linux/fs.h>
 #include <linux/flex_proportions.h>
 #include <linux/backing-dev-defs.h>
+#include <linux/blk_types.h>
 
 struct bio;
 
@@ -102,6 +103,14 @@ struct writeback_control {
 #endif
 };
 
+static inline int wbc_to_write_flags(struct writeback_control *wbc)
+{
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		return REQ_SYNC;
+
+	return 0;
+}
+
 /*
  * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
  * and are measured against each other in.  There always is one global
diff --git a/mm/page_io.c b/mm/page_io.c
index a2651f58c86a..23f6d0d3470f 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -320,10 +320,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		ret = -ENOMEM;
 		goto out;
 	}
-	if (wbc->sync_mode == WB_SYNC_ALL)
-		bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC);
-	else
-		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+	bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
 	count_vm_event(PSWPOUT);
 	set_page_writeback(page);
 	unlock_page(page);
-- 
cgit v1.2.3


From 13edd5e7315a26b448c5f7f33fc7721b1e0c17ef Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 1 Nov 2016 10:01:35 -0600
Subject: writeback: mark background writeback as such

If we're doing background type writes, then use the appropriate
background write flags for that.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/writeback.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 50c96ee8108f..c78f9f0920b5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -107,6 +107,8 @@ static inline int wbc_to_write_flags(struct writeback_control *wbc)
 {
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		return REQ_SYNC;
+	else if (wbc->for_kupdate || wbc->for_background)
+		return REQ_BACKGROUND;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 2cefe4dbaadf83b236caab46705b4b5a4958e3b6 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Mon, 31 Oct 2016 11:59:24 -0600
Subject: block: add bio_iov_iter_get_pages()

This is a helper that pins down a range from an iov_iter and adds it to
a bio without requiring a separate memory allocation for the page array.
It will be used for upcoming direct I/O implementations for block devices
and iomap based file systems.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
[hch: ported to the iov_iter interface, renamed and added comments.
      All blame should be directed to me and all fame should go to Kent
      after this!]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c         | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/bio.h |  1 +
 2 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index db85c5753a76..2cf6ebabc68c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -847,6 +847,55 @@ done:
 }
 EXPORT_SYMBOL(bio_add_page);
 
+/**
+ * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * @bio: bio to add pages to
+ * @iter: iov iterator describing the region to be mapped
+ *
+ * Pins as many pages from *iter and appends them to @bio's bvec array. The
+ * pages will have to be released using put_page() when done.
+ */
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
+	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
+	struct page **pages = (struct page **)bv;
+	size_t offset, diff;
+	ssize_t size;
+
+	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (unlikely(size <= 0))
+		return size ? size : -EFAULT;
+	nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
+
+	/*
+	 * Deep magic below:  We need to walk the pinned pages backwards
+	 * because we are abusing the space allocated for the bio_vecs
+	 * for the page array.  Because the bio_vecs are larger than the
+	 * page pointers by definition this will always work.  But it also
+	 * means we can't use bio_add_page, so any changes to it's semantics
+	 * need to be reflected here as well.
+	 */
+	bio->bi_iter.bi_size += size;
+	bio->bi_vcnt += nr_pages;
+
+	diff = (nr_pages * PAGE_SIZE - offset) - size;
+	while (nr_pages--) {
+		bv[nr_pages].bv_page = pages[nr_pages];
+		bv[nr_pages].bv_len = PAGE_SIZE;
+		bv[nr_pages].bv_offset = 0;
+	}
+
+	bv[0].bv_offset += offset;
+	bv[0].bv_len -= offset;
+	if (diff)
+		bv[bio->bi_vcnt - 1].bv_len -= diff;
+
+	iov_iter_advance(iter, size);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
+
 struct submit_bio_ret {
 	struct completion event;
 	int error;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5c604b4914bf..d367cd37a7f7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -427,6 +427,7 @@ void bio_chain(struct bio *, struct bio *);
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
 				    const struct iov_iter *, gfp_t);
-- 
cgit v1.2.3


From fd00144301d64f1742541a3c5e64cd1c51f39c55 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 28 Oct 2016 17:19:37 -0700
Subject: blk-mq: Introduce blk_mq_queue_stopped()

The function blk_queue_stopped() allows to test whether or not a
traditional request queue has been stopped. Introduce a helper
function that allows block drivers to query easily whether or not
one or more hardware contexts of a blk-mq queue have been stopped.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 20 ++++++++++++++++++++
 include/linux/blk-mq.h |  1 +
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2864e191cc86..28bf667bfe09 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -944,6 +944,26 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
 }
 EXPORT_SYMBOL(blk_mq_run_hw_queues);
 
+/**
+ * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
+ * @q: request queue.
+ *
+ * The caller is responsible for serializing this function against
+ * blk_mq_{start,stop}_hw_queue().
+ */
+bool blk_mq_queue_stopped(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		if (blk_mq_hctx_stopped(hctx))
+			return true;
+
+	return false;
+}
+EXPORT_SYMBOL(blk_mq_queue_stopped);
+
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
 	cancel_work(&hctx->run_work);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 535ab2e13d2e..aa930009fcd3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -223,6 +223,7 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs
 void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq, int error);
 
+bool blk_mq_queue_stopped(struct request_queue *q);
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
-- 
cgit v1.2.3


From 9b7dd572cc439fa92e120290eb74d0295567c5a0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 28 Oct 2016 17:20:49 -0700
Subject: blk-mq: Remove blk_mq_cancel_requeue_work()

Since blk_mq_requeue_work() no longer restarts stopped queues
canceling requeue work is no longer needed to prevent that a
stopped queue would be restarted. Hence remove this function.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c           | 6 ------
 drivers/md/dm-rq.c       | 2 --
 drivers/nvme/host/core.c | 1 -
 include/linux/blk-mq.h   | 1 -
 4 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d95034ae64f6..a461823644fb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -526,12 +526,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
 }
 EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
 
-void blk_mq_cancel_requeue_work(struct request_queue *q)
-{
-	cancel_delayed_work_sync(&q->requeue_work);
-}
-EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
-
 void blk_mq_kick_requeue_list(struct request_queue *q)
 {
 	kblockd_schedule_delayed_work(&q->requeue_work, 0);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index a9e9e781bb77..060ccc5a4b1c 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -116,8 +116,6 @@ static void dm_mq_stop_queue(struct request_queue *q)
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	/* Avoid that requeuing could restart the queue. */
-	blk_mq_cancel_requeue_work(q);
 	blk_mq_stop_hw_queues(q);
 }
 
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 329381a28edf..a764c2aa00a1 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2081,7 +2081,6 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
 		queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
 		spin_unlock_irq(ns->queue->queue_lock);
 
-		blk_mq_cancel_requeue_work(ns->queue);
 		blk_mq_stop_hw_queues(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index aa930009fcd3..a85a20f80aaa 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -217,7 +217,6 @@ void __blk_mq_end_request(struct request *rq, int error);
 
 void blk_mq_requeue_request(struct request *rq);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
-void blk_mq_cancel_requeue_work(struct request_queue *q);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 void blk_mq_abort_requeue_list(struct request_queue *q);
-- 
cgit v1.2.3


From 6a83e74d214a47a1371cd2e6a783264fcba7d428 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 2 Nov 2016 10:09:51 -0600
Subject: blk-mq: Introduce blk_mq_quiesce_queue()

blk_mq_quiesce_queue() waits until ongoing .queue_rq() invocations
have finished. This function does *not* wait until all outstanding
requests have finished (this means invocation of request.end_io()).
The algorithm used by blk_mq_quiesce_queue() is as follows:
* Hold either an RCU read lock or an SRCU read lock around
  .queue_rq() calls. The former is used if .queue_rq() does not
  block and the latter if .queue_rq() may block.
* blk_mq_quiesce_queue() first calls blk_mq_stop_hw_queues()
  followed by synchronize_srcu() or synchronize_rcu(). The latter
  call waits for .queue_rq() invocations that started before
  blk_mq_quiesce_queue() was called.
* The blk_mq_hctx_stopped() calls that control whether or not
  .queue_rq() will be called are called with the (S)RCU read lock
  held. This is necessary to avoid race conditions against
  blk_mq_quiesce_queue().

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/Kconfig          |  1 +
 block/blk-mq.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/blk-mq.h |  3 +++
 include/linux/blkdev.h |  1 +
 4 files changed, 69 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index 6b0ad08f0677..3a024440a669 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -5,6 +5,7 @@ menuconfig BLOCK
        bool "Enable the block layer" if EXPERT
        default y
        select SBITMAP
+       select SRCU
        help
 	 Provide block layer support for the kernel.
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a461823644fb..3dc323543293 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -115,6 +115,33 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
+/**
+ * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * @q: request queue.
+ *
+ * Note: this function does not prevent that the struct request end_io()
+ * callback function is invoked. Additionally, it is not prevented that
+ * new queue_rq() calls occur unless the queue has been stopped first.
+ */
+void blk_mq_quiesce_queue(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+	bool rcu = false;
+
+	blk_mq_stop_hw_queues(q);
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (hctx->flags & BLK_MQ_F_BLOCKING)
+			synchronize_srcu(&hctx->queue_rq_srcu);
+		else
+			rcu = true;
+	}
+	if (rcu)
+		synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
+
 void blk_mq_wake_waiters(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
@@ -766,7 +793,7 @@ static inline unsigned int queued_to_index(unsigned int queued)
  * of IO. In particular, we'd like FIFO behaviour on handling existing
  * items on the hctx->dispatch list. Ignore that for now.
  */
-static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
+static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
 	struct request *rq;
@@ -778,9 +805,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	if (unlikely(blk_mq_hctx_stopped(hctx)))
 		return;
 
-	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
-		cpu_online(hctx->next_cpu));
-
 	hctx->run++;
 
 	/*
@@ -871,6 +895,24 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	}
 }
 
+static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
+{
+	int srcu_idx;
+
+	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
+		cpu_online(hctx->next_cpu));
+
+	if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
+		rcu_read_lock();
+		blk_mq_process_rq_list(hctx);
+		rcu_read_unlock();
+	} else {
+		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+		blk_mq_process_rq_list(hctx);
+		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+	}
+}
+
 /*
  * It'd be great if the workqueue API had a way to pass
  * in a mask and had some smarts for more clever placement.
@@ -1268,7 +1310,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
 	struct blk_mq_alloc_data data;
 	struct request *rq;
-	unsigned int request_count = 0;
+	unsigned int request_count = 0, srcu_idx;
 	struct blk_plug *plug;
 	struct request *same_queue_rq = NULL;
 	blk_qc_t cookie;
@@ -1311,7 +1353,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_bio_to_request(rq, bio);
 
 		/*
-		 * We do limited pluging. If the bio can be merged, do that.
+		 * We do limited plugging. If the bio can be merged, do that.
 		 * Otherwise the existing request in the plug list will be
 		 * issued. So the plug list will have one request at most
 		 */
@@ -1331,7 +1373,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_put_ctx(data.ctx);
 		if (!old_rq)
 			goto done;
-		blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
+
+		if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
+			rcu_read_lock();
+			blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
+			rcu_read_unlock();
+		} else {
+			srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
+			blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
+			srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
+		}
 		goto done;
 	}
 
@@ -1610,6 +1661,9 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 	if (set->ops->exit_hctx)
 		set->ops->exit_hctx(hctx, hctx_idx);
 
+	if (hctx->flags & BLK_MQ_F_BLOCKING)
+		cleanup_srcu_struct(&hctx->queue_rq_srcu);
+
 	blk_mq_remove_cpuhp(hctx);
 	blk_free_flush_queue(hctx->fq);
 	sbitmap_free(&hctx->ctx_map);
@@ -1690,6 +1744,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
 				   flush_start_tag + hctx_idx, node))
 		goto free_fq;
 
+	if (hctx->flags & BLK_MQ_F_BLOCKING)
+		init_srcu_struct(&hctx->queue_rq_srcu);
+
 	return 0;
 
  free_fq:
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a85a20f80aaa..ed20ac74c62a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -3,6 +3,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/sbitmap.h>
+#include <linux/srcu.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -35,6 +36,8 @@ struct blk_mq_hw_ctx {
 
 	struct blk_mq_tags	*tags;
 
+	struct srcu_struct	queue_rq_srcu;
+
 	unsigned long		queued;
 	unsigned long		run;
 #define BLK_MQ_MAX_DISPATCH_ORDER	7
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8396da2bb698..13d893a69b46 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -918,6 +918,7 @@ extern void __blk_run_queue(struct request_queue *q);
 extern void __blk_run_queue_uncond(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
+extern void blk_mq_quiesce_queue(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
-- 
cgit v1.2.3


From 2b053aca76b48e681be57b34ca3a8c2c10b275c5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 28 Oct 2016 17:21:41 -0700
Subject: blk-mq: Add a kick_requeue_list argument to blk_mq_requeue_request()

Most blk_mq_requeue_request() and blk_mq_add_to_requeue_list() calls
are followed by kicking the requeue list. Hence add an argument to
these two functions that allows to kick the requeue list. This was
proposed by Christoph Hellwig.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c            |  5 +----
 block/blk-mq.c               | 10 +++++++---
 drivers/block/xen-blkfront.c |  2 +-
 drivers/md/dm-rq.c           |  2 +-
 drivers/nvme/host/core.c     |  2 +-
 drivers/scsi/scsi_lib.c      |  4 +---
 include/linux/blk-mq.h       |  5 +++--
 7 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index d35beca18481..c486b7aa62ee 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -134,10 +134,7 @@ static void blk_flush_restore_request(struct request *rq)
 static bool blk_flush_queue_rq(struct request *rq, bool add_front)
 {
 	if (rq->q->mq_ops) {
-		struct request_queue *q = rq->q;
-
-		blk_mq_add_to_requeue_list(rq, add_front);
-		blk_mq_kick_requeue_list(q);
+		blk_mq_add_to_requeue_list(rq, add_front, true);
 		return false;
 	} else {
 		if (add_front)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3dc323543293..8d3de5bd4d6f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -492,12 +492,12 @@ static void __blk_mq_requeue_request(struct request *rq)
 	}
 }
 
-void blk_mq_requeue_request(struct request *rq)
+void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
 {
 	__blk_mq_requeue_request(rq);
 
 	BUG_ON(blk_queued_rq(rq));
-	blk_mq_add_to_requeue_list(rq, true);
+	blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
 }
 EXPORT_SYMBOL(blk_mq_requeue_request);
 
@@ -531,7 +531,8 @@ static void blk_mq_requeue_work(struct work_struct *work)
 	blk_mq_run_hw_queues(q, false);
 }
 
-void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
+				bool kick_requeue_list)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
@@ -550,6 +551,9 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
 		list_add_tail(&rq->queuelist, &q->requeue_list);
 	}
 	spin_unlock_irqrestore(&q->requeue_lock, flags);
+
+	if (kick_requeue_list)
+		blk_mq_kick_requeue_list(q);
 }
 EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 71ca36eab558..c000fdf048b2 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2043,7 +2043,7 @@ static int blkif_recover(struct blkfront_info *info)
 		/* Requeue pending requests (flush or discard) */
 		list_del_init(&req->queuelist);
 		BUG_ON(req->nr_phys_segments > segs);
-		blk_mq_requeue_request(req);
+		blk_mq_requeue_request(req, false);
 	}
 	blk_mq_start_stopped_hw_queues(info->rq, true);
 	blk_mq_kick_requeue_list(info->rq);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 060ccc5a4b1c..315257959fc0 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -347,7 +347,7 @@ EXPORT_SYMBOL(dm_mq_kick_requeue_list);
 
 static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
 {
-	blk_mq_requeue_request(rq);
+	blk_mq_requeue_request(rq, false);
 	__dm_mq_kick_requeue_list(rq->q, msecs);
 }
 
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a764c2aa00a1..e8070e3cc7c7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -203,7 +203,7 @@ void nvme_requeue_req(struct request *req)
 {
 	unsigned long flags;
 
-	blk_mq_requeue_request(req);
+	blk_mq_requeue_request(req, false);
 	spin_lock_irqsave(req->q->queue_lock, flags);
 	if (!blk_queue_stopped(req->q))
 		blk_mq_kick_requeue_list(req->q);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2b78ff12bf3c..2e35132f8be1 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -86,10 +86,8 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
 static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
 {
 	struct scsi_device *sdev = cmd->device;
-	struct request_queue *q = cmd->request->q;
 
-	blk_mq_requeue_request(cmd->request);
-	blk_mq_kick_requeue_list(q);
+	blk_mq_requeue_request(cmd->request, true);
 	put_device(&sdev->sdev_gendev);
 }
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ed20ac74c62a..35a0af5ede6d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -218,8 +218,9 @@ void blk_mq_start_request(struct request *rq);
 void blk_mq_end_request(struct request *rq, int error);
 void __blk_mq_end_request(struct request *rq, int error);
 
-void blk_mq_requeue_request(struct request *rq);
-void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
+void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
+				bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 void blk_mq_abort_requeue_list(struct request_queue *q);
-- 
cgit v1.2.3


From bc8ee596afe8f35b379f87575c46d800dd8e7e68 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Tue, 1 Nov 2016 16:32:25 +0100
Subject: net: mii: add generic function to support ksetting support

The old ethtool api (get_setting and set_setting) has generic mii
functions mii_ethtool_sset and mii_ethtool_gset.

To support the new ethtool api ({get|set}_link_ksettings), we add
two generics mii function mii_ethtool_{get|set}_link_ksettings_get.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mii.c   | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mii.h |   4 ++
 2 files changed, 199 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index 993570b1e2ae..0443546fc427 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -134,6 +134,101 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 	return 0;
 }
 
+/**
+ * mii_ethtool_get_link_ksettings - get settings that are specified in @cmd
+ * @mii: MII interface
+ * @cmd: requested ethtool_link_ksettings
+ *
+ * The @cmd parameter is expected to have been cleared before calling
+ * mii_ethtool_get_link_ksettings().
+ *
+ * Returns 0 for success, negative on error.
+ */
+int mii_ethtool_get_link_ksettings(struct mii_if_info *mii,
+				   struct ethtool_link_ksettings *cmd)
+{
+	struct net_device *dev = mii->dev;
+	u16 bmcr, bmsr, ctrl1000 = 0, stat1000 = 0;
+	u32 nego, supported, advertising, lp_advertising;
+
+	supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
+		     SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
+		     SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
+	if (mii->supports_gmii)
+		supported |= SUPPORTED_1000baseT_Half |
+			SUPPORTED_1000baseT_Full;
+
+	/* only supports twisted-pair */
+	cmd->base.port = PORT_MII;
+
+	/* this isn't fully supported at higher layers */
+	cmd->base.phy_address = mii->phy_id;
+	cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C22;
+
+	advertising = ADVERTISED_TP | ADVERTISED_MII;
+
+	bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+	bmsr = mii->mdio_read(dev, mii->phy_id, MII_BMSR);
+	if (mii->supports_gmii) {
+		ctrl1000 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000);
+		stat1000 = mii->mdio_read(dev, mii->phy_id, MII_STAT1000);
+	}
+	if (bmcr & BMCR_ANENABLE) {
+		advertising |= ADVERTISED_Autoneg;
+		cmd->base.autoneg = AUTONEG_ENABLE;
+
+		advertising |= mii_get_an(mii, MII_ADVERTISE);
+		if (mii->supports_gmii)
+			advertising |= mii_ctrl1000_to_ethtool_adv_t(ctrl1000);
+
+		if (bmsr & BMSR_ANEGCOMPLETE) {
+			lp_advertising = mii_get_an(mii, MII_LPA);
+			lp_advertising |=
+					mii_stat1000_to_ethtool_lpa_t(stat1000);
+		} else {
+			lp_advertising = 0;
+		}
+
+		nego = advertising & lp_advertising;
+
+		if (nego & (ADVERTISED_1000baseT_Full |
+			    ADVERTISED_1000baseT_Half)) {
+			cmd->base.speed = SPEED_1000;
+			cmd->base.duplex = !!(nego & ADVERTISED_1000baseT_Full);
+		} else if (nego & (ADVERTISED_100baseT_Full |
+				   ADVERTISED_100baseT_Half)) {
+			cmd->base.speed = SPEED_100;
+			cmd->base.duplex = !!(nego & ADVERTISED_100baseT_Full);
+		} else {
+			cmd->base.speed = SPEED_10;
+			cmd->base.duplex = !!(nego & ADVERTISED_10baseT_Full);
+		}
+	} else {
+		cmd->base.autoneg = AUTONEG_DISABLE;
+
+		cmd->base.speed = ((bmcr & BMCR_SPEED1000 &&
+				    (bmcr & BMCR_SPEED100) == 0) ?
+				   SPEED_1000 :
+				   ((bmcr & BMCR_SPEED100) ?
+				    SPEED_100 : SPEED_10));
+		cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ?
+			DUPLEX_FULL : DUPLEX_HALF;
+	}
+
+	mii->full_duplex = cmd->base.duplex;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
+						lp_advertising);
+
+	/* ignore maxtxpkt, maxrxpkt for now */
+
+	return 0;
+}
+
 /**
  * mii_ethtool_sset - set settings that are specified in @ecmd
  * @mii: MII interface
@@ -226,6 +321,104 @@ int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 	return 0;
 }
 
+/**
+ * mii_ethtool_set_link_ksettings - set settings that are specified in @cmd
+ * @mii: MII interfaces
+ * @cmd: requested ethtool_link_ksettings
+ *
+ * Returns 0 for success, negative on error.
+ */
+int mii_ethtool_set_link_ksettings(struct mii_if_info *mii,
+				   const struct ethtool_link_ksettings *cmd)
+{
+	struct net_device *dev = mii->dev;
+	u32 speed = cmd->base.speed;
+
+	if (speed != SPEED_10 &&
+	    speed != SPEED_100 &&
+	    speed != SPEED_1000)
+		return -EINVAL;
+	if (cmd->base.duplex != DUPLEX_HALF && cmd->base.duplex != DUPLEX_FULL)
+		return -EINVAL;
+	if (cmd->base.port != PORT_MII)
+		return -EINVAL;
+	if (cmd->base.phy_address != mii->phy_id)
+		return -EINVAL;
+	if (cmd->base.autoneg != AUTONEG_DISABLE &&
+	    cmd->base.autoneg != AUTONEG_ENABLE)
+		return -EINVAL;
+	if ((speed == SPEED_1000) && (!mii->supports_gmii))
+		return -EINVAL;
+
+	/* ignore supported, maxtxpkt, maxrxpkt */
+
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		u32 bmcr, advert, tmp;
+		u32 advert2 = 0, tmp2 = 0;
+		u32 advertising;
+
+		ethtool_convert_link_mode_to_legacy_u32(
+			&advertising, cmd->link_modes.advertising);
+
+		if ((advertising & (ADVERTISED_10baseT_Half |
+				    ADVERTISED_10baseT_Full |
+				    ADVERTISED_100baseT_Half |
+				    ADVERTISED_100baseT_Full |
+				    ADVERTISED_1000baseT_Half |
+				    ADVERTISED_1000baseT_Full)) == 0)
+			return -EINVAL;
+
+		/* advertise only what has been requested */
+		advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
+		tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
+		if (mii->supports_gmii) {
+			advert2 = mii->mdio_read(dev, mii->phy_id,
+						 MII_CTRL1000);
+			tmp2 = advert2 &
+				~(ADVERTISE_1000HALF | ADVERTISE_1000FULL);
+		}
+		tmp |= ethtool_adv_to_mii_adv_t(advertising);
+
+		if (mii->supports_gmii)
+			tmp2 |= ethtool_adv_to_mii_ctrl1000_t(advertising);
+		if (advert != tmp) {
+			mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
+			mii->advertising = tmp;
+		}
+		if ((mii->supports_gmii) && (advert2 != tmp2))
+			mii->mdio_write(dev, mii->phy_id, MII_CTRL1000, tmp2);
+
+		/* turn on autonegotiation, and force a renegotiate */
+		bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+		bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
+		mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr);
+
+		mii->force_media = 0;
+	} else {
+		u32 bmcr, tmp;
+
+		/* turn off auto negotiation, set speed and duplexity */
+		bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+		tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 |
+			       BMCR_SPEED1000 | BMCR_FULLDPLX);
+		if (speed == SPEED_1000)
+			tmp |= BMCR_SPEED1000;
+		else if (speed == SPEED_100)
+			tmp |= BMCR_SPEED100;
+		if (cmd->base.duplex == DUPLEX_FULL) {
+			tmp |= BMCR_FULLDPLX;
+			mii->full_duplex = 1;
+		} else {
+			mii->full_duplex = 0;
+		}
+		if (bmcr != tmp)
+			mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp);
+
+		mii->force_media = 1;
+	}
+	return 0;
+}
+
 /**
  * mii_check_gmii_support - check if the MII supports Gb interfaces
  * @mii: the MII interface
@@ -466,7 +659,9 @@ MODULE_LICENSE("GPL");
 EXPORT_SYMBOL(mii_link_ok);
 EXPORT_SYMBOL(mii_nway_restart);
 EXPORT_SYMBOL(mii_ethtool_gset);
+EXPORT_SYMBOL(mii_ethtool_get_link_ksettings);
 EXPORT_SYMBOL(mii_ethtool_sset);
+EXPORT_SYMBOL(mii_ethtool_set_link_ksettings);
 EXPORT_SYMBOL(mii_check_link);
 EXPORT_SYMBOL(mii_check_media);
 EXPORT_SYMBOL(mii_check_gmii_support);
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 47492c9631b3..1629a0c32679 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -31,7 +31,11 @@ struct mii_if_info {
 extern int mii_link_ok (struct mii_if_info *mii);
 extern int mii_nway_restart (struct mii_if_info *mii);
 extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
+extern int mii_ethtool_get_link_ksettings(
+	struct mii_if_info *mii, struct ethtool_link_ksettings *cmd);
 extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
+extern int mii_ethtool_set_link_ksettings(
+	struct mii_if_info *mii, const struct ethtool_link_ksettings *cmd);
 extern int mii_check_gmii_support(struct mii_if_info *mii);
 extern void mii_check_link (struct mii_if_info *mii);
 extern unsigned int mii_check_media (struct mii_if_info *mii,
-- 
cgit v1.2.3


From 527c02f66d263d2eeff237a2326c3278cfc03d3b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 27 May 2016 11:56:53 +0200
Subject: soc: renesas: Add R-Car RST driver

Add a driver for the Renesas R-Car Gen1 RESET/WDT and R-Car Gen2/Gen3
and RZ/G RST module.

For now this driver just provides an API to obtain the state of the mode
pins, as latched at reset time.  As this is typically called from the
probe function of a clock driver, which can run much earlier than any
initcall, calling rcar_rst_read_mode_pins() just forces an early
initialization of the driver.

Despite the current simple and almost identical handling for all
supported SoCs, the driver matches against SoC-specific compatible
values, as the features provided by the hardware module differ a lot
across the various SoC families and members.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Dirk Behme <dirk.behme@de.bosch.com>
---
 drivers/soc/renesas/Makefile         |  5 ++
 drivers/soc/renesas/rcar-rst.c       | 92 ++++++++++++++++++++++++++++++++++++
 include/linux/soc/renesas/rcar-rst.h |  6 +++
 3 files changed, 103 insertions(+)
 create mode 100644 drivers/soc/renesas/rcar-rst.c
 create mode 100644 include/linux/soc/renesas/rcar-rst.h

(limited to 'include/linux')

diff --git a/drivers/soc/renesas/Makefile b/drivers/soc/renesas/Makefile
index 623039c3514c..86cc78cd1962 100644
--- a/drivers/soc/renesas/Makefile
+++ b/drivers/soc/renesas/Makefile
@@ -1,3 +1,8 @@
+obj-$(CONFIG_ARCH_RCAR_GEN1)	+= rcar-rst.o
+obj-$(CONFIG_ARCH_RCAR_GEN2)	+= rcar-rst.o
+obj-$(CONFIG_ARCH_R8A7795)	+= rcar-rst.o
+obj-$(CONFIG_ARCH_R8A7796)	+= rcar-rst.o
+
 obj-$(CONFIG_ARCH_R8A7779)	+= rcar-sysc.o r8a7779-sysc.o
 obj-$(CONFIG_ARCH_R8A7790)	+= rcar-sysc.o r8a7790-sysc.o
 obj-$(CONFIG_ARCH_R8A7791)	+= rcar-sysc.o r8a7791-sysc.o
diff --git a/drivers/soc/renesas/rcar-rst.c b/drivers/soc/renesas/rcar-rst.c
new file mode 100644
index 000000000000..a6d1c26d3167
--- /dev/null
+++ b/drivers/soc/renesas/rcar-rst.c
@@ -0,0 +1,92 @@
+/*
+ * R-Car Gen1 RESET/WDT, R-Car Gen2, Gen3, and RZ/G RST Driver
+ *
+ * Copyright (C) 2016 Glider bvba
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/soc/renesas/rcar-rst.h>
+
+struct rst_config {
+	unsigned int modemr;	/* Mode Monitoring Register Offset */
+};
+
+static const struct rst_config rcar_rst_gen1 __initconst = {
+	.modemr = 0x20,
+};
+
+static const struct rst_config rcar_rst_gen2 __initconst = {
+	.modemr = 0x60,
+};
+
+static const struct of_device_id rcar_rst_matches[] __initconst = {
+	/* RZ/G is handled like R-Car Gen2 */
+	{ .compatible = "renesas,r8a7743-rst", .data = &rcar_rst_gen2 },
+	{ .compatible = "renesas,r8a7745-rst", .data = &rcar_rst_gen2 },
+	/* R-Car Gen1 */
+	{ .compatible = "renesas,r8a7778-reset-wdt", .data = &rcar_rst_gen1 },
+	{ .compatible = "renesas,r8a7779-reset-wdt", .data = &rcar_rst_gen1 },
+	/* R-Car Gen2 */
+	{ .compatible = "renesas,r8a7790-rst", .data = &rcar_rst_gen2 },
+	{ .compatible = "renesas,r8a7791-rst", .data = &rcar_rst_gen2 },
+	{ .compatible = "renesas,r8a7792-rst", .data = &rcar_rst_gen2 },
+	{ .compatible = "renesas,r8a7793-rst", .data = &rcar_rst_gen2 },
+	{ .compatible = "renesas,r8a7794-rst", .data = &rcar_rst_gen2 },
+	/* R-Car Gen3 is handled like R-Car Gen2 */
+	{ .compatible = "renesas,r8a7795-rst", .data = &rcar_rst_gen2 },
+	{ .compatible = "renesas,r8a7796-rst", .data = &rcar_rst_gen2 },
+	{ /* sentinel */ }
+};
+
+static void __iomem *rcar_rst_base __initdata;
+static u32 saved_mode __initdata;
+
+static int __init rcar_rst_init(void)
+{
+	const struct of_device_id *match;
+	const struct rst_config *cfg;
+	struct device_node *np;
+	void __iomem *base;
+	int error = 0;
+
+	np = of_find_matching_node_and_match(NULL, rcar_rst_matches, &match);
+	if (!np)
+		return -ENODEV;
+
+	base = of_iomap(np, 0);
+	if (!base) {
+		pr_warn("%s: Cannot map regs\n", np->full_name);
+		error = -ENOMEM;
+		goto out_put;
+	}
+
+	rcar_rst_base = base;
+	cfg = match->data;
+	saved_mode = ioread32(base + cfg->modemr);
+
+	pr_debug("%s: MODE = 0x%08x\n", np->full_name, saved_mode);
+
+out_put:
+	of_node_put(np);
+	return error;
+}
+
+int __init rcar_rst_read_mode_pins(u32 *mode)
+{
+	int error;
+
+	if (!rcar_rst_base) {
+		error = rcar_rst_init();
+		if (error)
+			return error;
+	}
+
+	*mode = saved_mode;
+	return 0;
+}
diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h
new file mode 100644
index 000000000000..a18e0783946b
--- /dev/null
+++ b/include/linux/soc/renesas/rcar-rst.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__
+#define __LINUX_SOC_RENESAS_RCAR_RST_H__
+
+int rcar_rst_read_mode_pins(u32 *mode);
+
+#endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */
-- 
cgit v1.2.3


From 7978a78c828ac8d5351b85480e60ada865b9bea9 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 1 Jun 2016 15:23:22 +0200
Subject: clk: renesas: r8a7778: Remove obsolete r8a7778_clocks_init()

The R-Car M1A board code no longer calls r8a7778_clocks_init().

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Dirk Behme <dirk.behme@de.bosch.com>
---
 drivers/clk/renesas/clk-r8a7778.c | 13 -------------
 include/linux/clk/renesas.h       |  1 -
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/renesas/clk-r8a7778.c b/drivers/clk/renesas/clk-r8a7778.c
index 07ea411098a7..886a8380e912 100644
--- a/drivers/clk/renesas/clk-r8a7778.c
+++ b/drivers/clk/renesas/clk-r8a7778.c
@@ -143,16 +143,3 @@ static void __init r8a7778_cpg_clocks_init(struct device_node *np)
 
 CLK_OF_DECLARE(r8a7778_cpg_clks, "renesas,r8a7778-cpg-clocks",
 	       r8a7778_cpg_clocks_init);
-
-void __init r8a7778_clocks_init(u32 mode)
-{
-	BUG_ON(!(mode & BIT(19)));
-
-	cpg_mode_rates = (!!(mode & BIT(18)) << 2) |
-			 (!!(mode & BIT(12)) << 1) |
-			 (!!(mode & BIT(11)));
-	cpg_mode_divs = (!!(mode & BIT(2)) << 1) |
-			(!!(mode & BIT(1)));
-
-	of_clk_init(NULL);
-}
diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h
index ba6fa4148515..2b663bba1adc 100644
--- a/include/linux/clk/renesas.h
+++ b/include/linux/clk/renesas.h
@@ -20,7 +20,6 @@ struct device;
 struct device_node;
 struct generic_pm_domain;
 
-void r8a7778_clocks_init(u32 mode);
 void r8a7779_clocks_init(u32 mode);
 void rcar_gen2_clocks_init(u32 mode);
 
-- 
cgit v1.2.3


From b9fe9421d06653d735df07954730795d907e618d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 1 Jun 2016 15:24:58 +0200
Subject: clk: renesas: r8a7779: Remove obsolete r8a7779_clocks_init()

The R-Car H1 board code no longer calls r8a7779_clocks_init().

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Dirk Behme <dirk.behme@de.bosch.com>
---
 drivers/clk/renesas/clk-r8a7779.c | 9 ---------
 include/linux/clk/renesas.h       | 1 -
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/renesas/clk-r8a7779.c b/drivers/clk/renesas/clk-r8a7779.c
index ca7551bcb115..27fbfafaf2cd 100644
--- a/drivers/clk/renesas/clk-r8a7779.c
+++ b/drivers/clk/renesas/clk-r8a7779.c
@@ -89,8 +89,6 @@ static const unsigned int cpg_plla_mult[4] __initconst = { 42, 48, 56, 64 };
  * Initialization
  */
 
-static u32 cpg_mode __initdata;
-
 static struct clk * __init
 r8a7779_cpg_register_clock(struct device_node *np, struct r8a7779_cpg *cpg,
 			   const struct cpg_clk_config *config,
@@ -178,10 +176,3 @@ static void __init r8a7779_cpg_clocks_init(struct device_node *np)
 }
 CLK_OF_DECLARE(r8a7779_cpg_clks, "renesas,r8a7779-cpg-clocks",
 	       r8a7779_cpg_clocks_init);
-
-void __init r8a7779_clocks_init(u32 mode)
-{
-	cpg_mode = mode;
-
-	of_clk_init(NULL);
-}
diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h
index 2b663bba1adc..9e969941f3f6 100644
--- a/include/linux/clk/renesas.h
+++ b/include/linux/clk/renesas.h
@@ -20,7 +20,6 @@ struct device;
 struct device_node;
 struct generic_pm_domain;
 
-void r8a7779_clocks_init(u32 mode);
 void rcar_gen2_clocks_init(u32 mode);
 
 void cpg_mstp_add_clk_domain(struct device_node *np);
-- 
cgit v1.2.3


From 3e91d07bb592895982400002020d510fd7b6e85f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 1 Jun 2016 15:26:53 +0200
Subject: clk: renesas: rcar-gen2: Remove obsolete rcar_gen2_clocks_init()

The R-Car Gen2 board code no longer calls rcar_gen2_clocks_init().

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Dirk Behme <dirk.behme@de.bosch.com>
---
 drivers/clk/renesas/clk-rcar-gen2.c | 7 -------
 include/linux/clk/renesas.h         | 2 --
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/renesas/clk-rcar-gen2.c b/drivers/clk/renesas/clk-rcar-gen2.c
index 3291fd430ad4..f39519edc645 100644
--- a/drivers/clk/renesas/clk-rcar-gen2.c
+++ b/drivers/clk/renesas/clk-rcar-gen2.c
@@ -445,10 +445,3 @@ static void __init rcar_gen2_cpg_clocks_init(struct device_node *np)
 }
 CLK_OF_DECLARE(rcar_gen2_cpg_clks, "renesas,rcar-gen2-cpg-clocks",
 	       rcar_gen2_cpg_clocks_init);
-
-void __init rcar_gen2_clocks_init(u32 mode)
-{
-	cpg_mode = mode;
-
-	of_clk_init(NULL);
-}
diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h
index 9e969941f3f6..9ebf1f8243bb 100644
--- a/include/linux/clk/renesas.h
+++ b/include/linux/clk/renesas.h
@@ -20,8 +20,6 @@ struct device;
 struct device_node;
 struct generic_pm_domain;
 
-void rcar_gen2_clocks_init(u32 mode);
-
 void cpg_mstp_add_clk_domain(struct device_node *np);
 #ifdef CONFIG_CLK_RENESAS_CPG_MSTP
 int cpg_mstp_attach_dev(struct generic_pm_domain *unused, struct device *dev);
-- 
cgit v1.2.3


From 1610a73c4175e7d63985316b52ac932b65a4dc90 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 3 Nov 2016 10:56:12 +0100
Subject: netfilter: kill NF_HOOK_THRESH() and state->tresh

Patch c5136b15ea36 ("netfilter: bridge: add and use br_nf_hook_thresh")
introduced br_nf_hook_thresh().

Replace NF_HOOK_THRESH() by br_nf_hook_thresh from
br_nf_forward_finish(), so we have no more callers for this macro.

As a result, state->thresh and explicit thresh parameter in the hook
state structure is not required anymore. And we can get rid of
skip-hook-under-thresh loop in nf_iterate() in the core path that is
only used by br_netfilter to search for the filter hook.

Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h             | 50 +++++++++--------------------------
 include/linux/netfilter_ingress.h     |  2 +-
 net/bridge/br_netfilter_hooks.c       |  8 +++---
 net/bridge/netfilter/ebtable_broute.c |  2 +-
 net/netfilter/core.c                  |  4 ---
 net/netfilter/nf_queue.c              |  2 --
 6 files changed, 19 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index abc7fdcb9eb1..e0d000f6c9bf 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -49,7 +49,6 @@ struct sock;
 
 struct nf_hook_state {
 	unsigned int hook;
-	int thresh;
 	u_int8_t pf;
 	struct net_device *in;
 	struct net_device *out;
@@ -84,7 +83,7 @@ struct nf_hook_entry {
 static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      struct nf_hook_entry *hook_entry,
 				      unsigned int hook,
-				      int thresh, u_int8_t pf,
+				      u_int8_t pf,
 				      struct net_device *indev,
 				      struct net_device *outdev,
 				      struct sock *sk,
@@ -92,7 +91,6 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	p->hook = hook;
-	p->thresh = thresh;
 	p->pf = pf;
 	p->in = indev;
 	p->out = outdev;
@@ -155,20 +153,16 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
 
 /**
- *	nf_hook_thresh - call a netfilter hook
+ *	nf_hook - call a netfilter hook
  *
  *	Returns 1 if the hook has allowed the packet to pass.  The function
  *	okfn must be invoked by the caller in this case.  Any other return
  *	value indicates the packet has been consumed by the hook.
  */
-static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
-				 struct net *net,
-				 struct sock *sk,
-				 struct sk_buff *skb,
-				 struct net_device *indev,
-				 struct net_device *outdev,
-				 int (*okfn)(struct net *, struct sock *, struct sk_buff *),
-				 int thresh)
+static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
+			  struct sock *sk, struct sk_buff *skb,
+			  struct net_device *indev, struct net_device *outdev,
+			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct nf_hook_entry *hook_head;
 	int ret = 1;
@@ -185,8 +179,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 	if (hook_head) {
 		struct nf_hook_state state;
 
-		nf_hook_state_init(&state, hook_head, hook, thresh,
-				   pf, indev, outdev, sk, net, okfn);
+		nf_hook_state_init(&state, hook_head, hook, pf, indev, outdev,
+				   sk, net, okfn);
 
 		ret = nf_hook_slow(skb, &state);
 	}
@@ -195,14 +189,6 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 	return ret;
 }
 
-static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
-			  struct sock *sk, struct sk_buff *skb,
-			  struct net_device *indev, struct net_device *outdev,
-			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
-{
-	return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN);
-}
-                   
 /* Activate hook; either okfn or kfree_skb called, unless a hook
    returns NF_STOLEN (in which case, it's up to the hook to deal with
    the consequences).
@@ -220,19 +206,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
    coders :)
 */
 
-static inline int
-NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
-	       struct sk_buff *skb, struct net_device *in,
-	       struct net_device *out,
-	       int (*okfn)(struct net *, struct sock *, struct sk_buff *),
-	       int thresh)
-{
-	int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh);
-	if (ret == 1)
-		ret = okfn(net, sk, skb);
-	return ret;
-}
-
 static inline int
 NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	     struct sk_buff *skb, struct net_device *in, struct net_device *out,
@@ -242,7 +215,7 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	int ret;
 
 	if (!cond ||
-	    ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1))
+	    ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1))
 		ret = okfn(net, sk, skb);
 	return ret;
 }
@@ -252,7 +225,10 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct
 	struct net_device *in, struct net_device *out,
 	int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
-	return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN);
+	int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
+	if (ret == 1)
+		ret = okfn(net, sk, skb);
+	return ret;
 }
 
 /* Call setsockopt() */
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 33e37fb41d5d..fd44e4131710 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -26,7 +26,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	if (unlikely(!e))
 		return 0;
 
-	nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN,
+	nf_hook_state_init(&state, e, NF_NETDEV_INGRESS,
 			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
 	return nf_hook_slow(skb, &state);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2fe9345c1407..d0d66faebe90 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -561,8 +561,8 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
 	}
 	nf_bridge_push_encap_header(skb);
 
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb,
-		       in, skb->dev, br_forward_finish, 1);
+	br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev,
+			  br_forward_finish);
 	return 0;
 }
 
@@ -1016,8 +1016,8 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
-	nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1,
-			   NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
+	nf_hook_state_init(&state, elem, hook, NFPROTO_BRIDGE, indev, outdev,
+			   sk, net, okfn);
 
 	ret = nf_hook_slow(skb, &state);
 	rcu_read_unlock();
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index ec94c6f1ae88..599679e3498d 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb)
 	struct nf_hook_state state;
 	int ret;
 
-	nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN,
+	nf_hook_state_init(&state, NULL, NF_BR_BROUTING,
 			   NFPROTO_BRIDGE, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 76014ad72ec5..cb0232c11bc8 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -309,10 +309,6 @@ unsigned int nf_iterate(struct sk_buff *skb,
 	unsigned int verdict;
 
 	while (*entryp) {
-		if (state->thresh > (*entryp)->ops.priority) {
-			*entryp = rcu_dereference((*entryp)->next);
-			continue;
-		}
 repeat:
 		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
 		if (verdict != NF_ACCEPT) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 8f08d759844a..0fb38966e5bf 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -200,8 +200,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 			verdict = NF_DROP;
 	}
 
-	entry->state.thresh = INT_MIN;
-
 	if (verdict == NF_ACCEPT) {
 		hook_entry = rcu_dereference(hook_entry->next);
 		if (hook_entry)
-- 
cgit v1.2.3


From 613dbd95723aee7abd16860745691b6c7bda20dc Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 3 Nov 2016 10:56:21 +0100
Subject: netfilter: x_tables: move hook state into xt_action_param structure

Place pointer to hook state in xt_action_param structure instead of
copying the fields that we need. After this change xt_action_param fits
into one cacheline.

This patch also adds a set of new wrapper functions to fetch relevant
hook state structure fields.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h         | 48 +++++++++++++++++++++++-------
 include/net/netfilter/nf_tables.h          | 11 +++----
 net/bridge/netfilter/ebt_arpreply.c        |  3 +-
 net/bridge/netfilter/ebt_log.c             | 11 +++----
 net/bridge/netfilter/ebt_nflog.c           |  6 ++--
 net/bridge/netfilter/ebt_redirect.c        |  6 ++--
 net/bridge/netfilter/ebtables.c            |  6 +---
 net/ipv4/netfilter/arp_tables.c            |  6 +---
 net/ipv4/netfilter/ip_tables.c             |  6 +---
 net/ipv4/netfilter/ipt_MASQUERADE.c        |  3 +-
 net/ipv4/netfilter/ipt_REJECT.c            |  4 +--
 net/ipv4/netfilter/ipt_SYNPROXY.c          |  4 +--
 net/ipv4/netfilter/ipt_rpfilter.c          |  2 +-
 net/ipv6/netfilter/ip6_tables.c            |  6 +---
 net/ipv6/netfilter/ip6t_MASQUERADE.c       |  2 +-
 net/ipv6/netfilter/ip6t_REJECT.c           | 23 ++++++++------
 net/ipv6/netfilter/ip6t_SYNPROXY.c         |  4 +--
 net/ipv6/netfilter/ip6t_rpfilter.c         |  3 +-
 net/netfilter/ipset/ip_set_core.c          |  6 ++--
 net/netfilter/ipset/ip_set_hash_netiface.c |  2 +-
 net/netfilter/xt_AUDIT.c                   | 10 +++----
 net/netfilter/xt_LOG.c                     |  6 ++--
 net/netfilter/xt_NETMAP.c                  | 20 ++++++-------
 net/netfilter/xt_NFLOG.c                   |  6 ++--
 net/netfilter/xt_NFQUEUE.c                 |  4 +--
 net/netfilter/xt_REDIRECT.c                |  4 +--
 net/netfilter/xt_TCPMSS.c                  |  4 +--
 net/netfilter/xt_TEE.c                     |  4 +--
 net/netfilter/xt_TPROXY.c                  | 16 +++++-----
 net/netfilter/xt_addrtype.c                | 10 +++----
 net/netfilter/xt_cluster.c                 |  2 +-
 net/netfilter/xt_connlimit.c               |  8 ++---
 net/netfilter/xt_conntrack.c               |  8 ++---
 net/netfilter/xt_devgroup.c                |  4 +--
 net/netfilter/xt_dscp.c                    |  2 +-
 net/netfilter/xt_ipvs.c                    |  4 +--
 net/netfilter/xt_nfacct.c                  |  2 +-
 net/netfilter/xt_osf.c                     | 10 +++----
 net/netfilter/xt_owner.c                   |  2 +-
 net/netfilter/xt_pkttype.c                 |  4 +--
 net/netfilter/xt_policy.c                  |  4 +--
 net/netfilter/xt_recent.c                  | 10 +++----
 net/netfilter/xt_set.c                     | 26 ++++++++--------
 net/netfilter/xt_socket.c                  |  4 +--
 net/sched/act_ipt.c                        | 12 ++++----
 net/sched/em_ipset.c                       | 17 ++++++-----
 46 files changed, 196 insertions(+), 169 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 2ad1a2b289b5..cd4eaf8df445 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -4,6 +4,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/static_key.h>
+#include <linux/netfilter.h>
 #include <uapi/linux/netfilter/x_tables.h>
 
 /* Test a struct->invflags and a boolean for inequality */
@@ -17,14 +18,9 @@
  * @target:	the target extension
  * @matchinfo:	per-match data
  * @targetinfo:	per-target data
- * @net		network namespace through which the action was invoked
- * @in:		input netdevice
- * @out:	output netdevice
+ * @state:	pointer to hook state this packet came from
  * @fragoff:	packet is a fragment, this is the data offset
  * @thoff:	position of transport header relative to skb->data
- * @hook:	hook number given packet came from
- * @family:	Actual NFPROTO_* through which the function is invoked
- * 		(helpful when match->family == NFPROTO_UNSPEC)
  *
  * Fields written to by extensions:
  *
@@ -38,15 +34,47 @@ struct xt_action_param {
 	union {
 		const void *matchinfo, *targinfo;
 	};
-	struct net *net;
-	const struct net_device *in, *out;
+	const struct nf_hook_state *state;
 	int fragoff;
 	unsigned int thoff;
-	unsigned int hooknum;
-	u_int8_t family;
 	bool hotdrop;
 };
 
+static inline struct net *xt_net(const struct xt_action_param *par)
+{
+	return par->state->net;
+}
+
+static inline struct net_device *xt_in(const struct xt_action_param *par)
+{
+	return par->state->in;
+}
+
+static inline const char *xt_inname(const struct xt_action_param *par)
+{
+	return par->state->in->name;
+}
+
+static inline struct net_device *xt_out(const struct xt_action_param *par)
+{
+	return par->state->out;
+}
+
+static inline const char *xt_outname(const struct xt_action_param *par)
+{
+	return par->state->out->name;
+}
+
+static inline unsigned int xt_hooknum(const struct xt_action_param *par)
+{
+	return par->state->hook;
+}
+
+static inline u_int8_t xt_family(const struct xt_action_param *par)
+{
+	return par->state->pf;
+}
+
 /**
  * struct xt_mtchk_param - parameters for match extensions'
  * checkentry functions
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 5031e072567b..44060344f958 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -30,11 +30,12 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
 				   const struct nf_hook_state *state)
 {
 	pkt->skb = skb;
-	pkt->net = pkt->xt.net = state->net;
-	pkt->in = pkt->xt.in = state->in;
-	pkt->out = pkt->xt.out = state->out;
-	pkt->hook = pkt->xt.hooknum = state->hook;
-	pkt->pf = pkt->xt.family = state->pf;
+	pkt->net = state->net;
+	pkt->in = state->in;
+	pkt->out = state->out;
+	pkt->hook = state->hook;
+	pkt->pf = state->pf;
+	pkt->xt.state = state;
 }
 
 static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 070cf134a22f..5929309beaa1 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -51,7 +51,8 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	if (diptr == NULL)
 		return EBT_DROP;
 
-	arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in,
+	arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr,
+		 (struct net_device *)xt_in(par),
 		 *diptr, shp, info->mac, shp);
 
 	return info->target;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 9a11086ba6ff..e88bd4827ac1 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -179,7 +179,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_log_info *info = par->targinfo;
 	struct nf_loginfo li;
-	struct net *net = par->net;
+	struct net *net = xt_net(par);
 
 	li.type = NF_LOG_TYPE_LOG;
 	li.u.log.level = info->loglevel;
@@ -190,11 +190,12 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	 * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo
 	 */
 	if (info->bitmask & EBT_LOG_NFLOG)
-		nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
-			      par->in, par->out, &li, "%s", info->prefix);
+		nf_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb,
+			      xt_in(par), xt_out(par), &li, "%s",
+			      info->prefix);
 	else
-		ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in,
-			       par->out, &li, info->prefix);
+		ebt_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb,
+			       xt_in(par), xt_out(par), &li, info->prefix);
 	return EBT_CONTINUE;
 }
 
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 54816150608e..c1dc48686200 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -23,16 +23,16 @@ static unsigned int
 ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nflog_info *info = par->targinfo;
+	struct net *net = xt_net(par);
 	struct nf_loginfo li;
-	struct net *net = par->net;
 
 	li.type = NF_LOG_TYPE_ULOG;
 	li.u.ulog.copy_len = info->len;
 	li.u.ulog.group = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
-	nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
-		      par->out, &li, "%s", info->prefix);
+	nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par),
+		      xt_out(par), &li, "%s", info->prefix);
 	return EBT_CONTINUE;
 }
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 2e7c4f974340..8d2a85e0594e 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -23,12 +23,12 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	if (!skb_make_writable(skb, 0))
 		return EBT_DROP;
 
-	if (par->hooknum != NF_BR_BROUTING)
+	if (xt_hooknum(par) != NF_BR_BROUTING)
 		/* rcu_read_lock()ed by nf_hook_thresh */
 		ether_addr_copy(eth_hdr(skb)->h_dest,
-				br_port_get_rcu(par->in)->br->dev->dev_addr);
+				br_port_get_rcu(xt_in(par))->br->dev->dev_addr);
 	else
-		ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr);
+		ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr);
 	skb->pkt_type = PACKET_HOST;
 	return info->target;
 }
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f5c11bbe27db..1ab6014cf0f8 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -194,12 +194,8 @@ unsigned int ebt_do_table(struct sk_buff *skb,
 	const struct ebt_table_info *private;
 	struct xt_action_param acpar;
 
-	acpar.family  = NFPROTO_BRIDGE;
-	acpar.net     = state->net;
-	acpar.in      = state->in;
-	acpar.out     = state->out;
+	acpar.state   = state;
 	acpar.hotdrop = false;
-	acpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
 	private = table->private;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b31df597fd37..e76ab23a2deb 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -217,11 +217,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	 */
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	acpar.net     = state->net;
-	acpar.in      = state->in;
-	acpar.out     = state->out;
-	acpar.hooknum = hook;
-	acpar.family  = NFPROTO_ARP;
+	acpar.state   = state;
 	acpar.hotdrop = false;
 
 	arp = arp_hdr(skb);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7c00ce90adb8..de4fa03f46f3 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -261,11 +261,7 @@ ipt_do_table(struct sk_buff *skb,
 	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 	acpar.thoff   = ip_hdrlen(skb);
 	acpar.hotdrop = false;
-	acpar.net     = state->net;
-	acpar.in      = state->in;
-	acpar.out     = state->out;
-	acpar.family  = NFPROTO_IPV4;
-	acpar.hooknum = hook;
+	acpar.state   = state;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 	local_bh_disable();
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index da7f02a0b868..34cfb9b0bc0a 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -55,7 +55,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	range.min_proto = mr->range[0].min;
 	range.max_proto = mr->range[0].max;
 
-	return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out);
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range,
+				      xt_out(par));
 }
 
 static struct xt_target masquerade_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 1d16c0f28df0..8bd0d7b26632 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -34,7 +34,7 @@ static unsigned int
 reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_reject_info *reject = par->targinfo;
-	int hook = par->hooknum;
+	int hook = xt_hooknum(par);
 
 	switch (reject->with) {
 	case IPT_ICMP_NET_UNREACHABLE:
@@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
 		break;
 	case IPT_TCP_RESET:
-		nf_send_reset(par->net, skb, hook);
+		nf_send_reset(xt_net(par), skb, hook);
 	case IPT_ICMP_ECHOREPLY:
 		/* Doesn't happen. */
 		break;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index db5b87509446..361411688221 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -263,12 +263,12 @@ static unsigned int
 synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_synproxy_info *info = par->targinfo;
-	struct net *net = par->net;
+	struct net *net = xt_net(par);
 	struct synproxy_net *snet = synproxy_pernet(net);
 	struct synproxy_options opts = {};
 	struct tcphdr *th, _th;
 
-	if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+	if (nf_ip_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP))
 		return NF_DROP;
 
 	th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 78cc64eddfc1..59b49945b481 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -95,7 +95,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	flow.flowi4_tos = RT_TOS(iph->tos);
 	flow.flowi4_scope = RT_SCOPE_UNIVERSE;
 
-	return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert;
+	return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
 }
 
 static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 55aacea24396..7eac01d5d621 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -291,11 +291,7 @@ ip6t_do_table(struct sk_buff *skb,
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
 	acpar.hotdrop = false;
-	acpar.net     = state->net;
-	acpar.in      = state->in;
-	acpar.out     = state->out;
-	acpar.family  = NFPROTO_IPV6;
-	acpar.hooknum = hook;
+	acpar.state   = state;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 7f9f45d829d2..2b1a15846f9a 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -24,7 +24,7 @@
 static unsigned int
 masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out);
+	return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
 }
 
 static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index db29bbf41b59..fa51a205918d 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -39,35 +39,40 @@ static unsigned int
 reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_reject_info *reject = par->targinfo;
-	struct net *net = par->net;
+	struct net *net = xt_net(par);
 
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
-		nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_NOROUTE, xt_hooknum(par));
 		break;
 	case IP6T_ICMP6_ADM_PROHIBITED:
-		nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED,
+				 xt_hooknum(par));
 		break;
 	case IP6T_ICMP6_NOT_NEIGHBOUR:
-		nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR,
+				 xt_hooknum(par));
 		break;
 	case IP6T_ICMP6_ADDR_UNREACH:
-		nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH,
+				 xt_hooknum(par));
 		break;
 	case IP6T_ICMP6_PORT_UNREACH:
-		nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH,
+				 xt_hooknum(par));
 		break;
 	case IP6T_ICMP6_ECHOREPLY:
 		/* Do nothing */
 		break;
 	case IP6T_TCP_RESET:
-		nf_send_reset6(net, skb, par->hooknum);
+		nf_send_reset6(net, skb, xt_hooknum(par));
 		break;
 	case IP6T_ICMP6_POLICY_FAIL:
-		nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par));
 		break;
 	case IP6T_ICMP6_REJECT_ROUTE:
-		nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE,
+				 xt_hooknum(par));
 		break;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 06bed74cf5ee..99a1216287c8 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -277,12 +277,12 @@ static unsigned int
 synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_synproxy_info *info = par->targinfo;
-	struct net *net = par->net;
+	struct net *net = xt_net(par);
 	struct synproxy_net *snet = synproxy_pernet(net);
 	struct synproxy_options opts = {};
 	struct tcphdr *th, _th;
 
-	if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+	if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP))
 		return NF_DROP;
 
 	th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 1ee1b25df096..d5263dc364a9 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -93,7 +93,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (unlikely(saddrtype == IPV6_ADDR_ANY))
 		return true ^ invert; /* not routable: forward path will drop it */
 
-	return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert;
+	return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par),
+					info->flags) ^ invert;
 }
 
 static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index a748b0c2c981..3f1b945a24d5 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -541,7 +541,7 @@ int
 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(par->net, index);
+	struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
 	int ret = 0;
 
 	BUG_ON(!set);
@@ -579,7 +579,7 @@ int
 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(par->net, index);
+	struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
 	int ret;
 
 	BUG_ON(!set);
@@ -601,7 +601,7 @@ int
 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(par->net, index);
+	struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
 	int ret = 0;
 
 	BUG_ON(!set);
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index f0f688db6213..aa1a776613b9 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -170,7 +170,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
 	e.ip &= ip_set_netmask(e.cidr);
 
-#define IFACE(dir)	(par->dir ? par->dir->name : "")
+#define IFACE(dir)	(par->state->dir ? par->state->dir->name : "")
 #define SRCDIR		(opt->flags & IPSET_DIM_TWO_SRC)
 
 	if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 4973cbddc446..19247a17e511 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -132,9 +132,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		goto errout;
 
 	audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
-			 info->type, par->hooknum, skb->len,
-			 par->in ? par->in->name : "?",
-			 par->out ? par->out->name : "?");
+			 info->type, xt_hooknum(par), skb->len,
+			 xt_in(par) ? xt_inname(par) : "?",
+			 xt_out(par) ? xt_outname(par) : "?");
 
 	if (skb->mark)
 		audit_log_format(ab, " mark=%#x", skb->mark);
@@ -144,7 +144,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
 				 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
 				 ntohs(eth_hdr(skb)->h_proto));
 
-		if (par->family == NFPROTO_BRIDGE) {
+		if (xt_family(par) == NFPROTO_BRIDGE) {
 			switch (eth_hdr(skb)->h_proto) {
 			case htons(ETH_P_IP):
 				audit_ip4(ab, skb);
@@ -157,7 +157,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		}
 	}
 
-	switch (par->family) {
+	switch (xt_family(par)) {
 	case NFPROTO_IPV4:
 		audit_ip4(ab, skb);
 		break;
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index 1763ab82bcd7..c3b2017ebe41 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -32,15 +32,15 @@ static unsigned int
 log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_log_info *loginfo = par->targinfo;
+	struct net *net = xt_net(par);
 	struct nf_loginfo li;
-	struct net *net = par->net;
 
 	li.type = NF_LOG_TYPE_LOG;
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out,
-		      &li, "%s", loginfo->prefix);
+	nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par),
+		      xt_out(par), &li, "%s", loginfo->prefix);
 	return XT_CONTINUE;
 }
 
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index b253e07cb1c5..94d0b5411192 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -33,8 +33,8 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 		netmask.ip6[i] = ~(range->min_addr.ip6[i] ^
 				   range->max_addr.ip6[i]);
 
-	if (par->hooknum == NF_INET_PRE_ROUTING ||
-	    par->hooknum == NF_INET_LOCAL_OUT)
+	if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+	    xt_hooknum(par) == NF_INET_LOCAL_OUT)
 		new_addr.in6 = ipv6_hdr(skb)->daddr;
 	else
 		new_addr.in6 = ipv6_hdr(skb)->saddr;
@@ -51,7 +51,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	newrange.min_proto	= range->min_proto;
 	newrange.max_proto	= range->max_proto;
 
-	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
 }
 
 static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -72,16 +72,16 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
-		     par->hooknum == NF_INET_POST_ROUTING ||
-		     par->hooknum == NF_INET_LOCAL_OUT ||
-		     par->hooknum == NF_INET_LOCAL_IN);
+	NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+		     xt_hooknum(par) == NF_INET_POST_ROUTING ||
+		     xt_hooknum(par) == NF_INET_LOCAL_OUT ||
+		     xt_hooknum(par) == NF_INET_LOCAL_IN);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
 
-	if (par->hooknum == NF_INET_PRE_ROUTING ||
-	    par->hooknum == NF_INET_LOCAL_OUT)
+	if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+	    xt_hooknum(par) == NF_INET_LOCAL_OUT)
 		new_ip = ip_hdr(skb)->daddr & ~netmask;
 	else
 		new_ip = ip_hdr(skb)->saddr & ~netmask;
@@ -96,7 +96,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	newrange.max_proto   = mr->range[0].max;
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
 }
 
 static int netmap_tg4_check(const struct xt_tgchk_param *par)
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 8668a5c18dc3..c7f8958cea4a 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -25,8 +25,8 @@ static unsigned int
 nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_nflog_info *info = par->targinfo;
+	struct net *net = xt_net(par);
 	struct nf_loginfo li;
-	struct net *net = par->net;
 
 	li.type		     = NF_LOG_TYPE_ULOG;
 	li.u.ulog.copy_len   = info->len;
@@ -37,8 +37,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	if (info->flags & XT_NFLOG_F_COPY_LEN)
 		li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
 
-	nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in,
-			  par->out, &li, info->prefix);
+	nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb,
+			  xt_in(par), xt_out(par), &li, info->prefix);
 	return XT_CONTINUE;
 }
 
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 8f1779ff7e30..a360b99a958a 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -43,7 +43,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 
 	if (info->queues_total > 1) {
 		queue = nfqueue_hash(skb, queue, info->queues_total,
-				     par->family, jhash_initval);
+				     xt_family(par), jhash_initval);
 	}
 	return NF_QUEUE_NR(queue);
 }
@@ -98,7 +98,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
 			queue = info->queuenum + cpu % info->queues_total;
 		} else {
 			queue = nfqueue_hash(skb, queue, info->queues_total,
-					     par->family, jhash_initval);
+					     xt_family(par), jhash_initval);
 		}
 	}
 
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
index 03f0b370e178..651dce65a30b 100644
--- a/net/netfilter/xt_REDIRECT.c
+++ b/net/netfilter/xt_REDIRECT.c
@@ -31,7 +31,7 @@
 static unsigned int
 redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum);
+	return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par));
 }
 
 static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -62,7 +62,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par)
 static unsigned int
 redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum);
+	return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par));
 }
 
 static struct xt_target redirect_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 872db2d0e2a9..27241a767f17 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 		return -1;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
-		struct net *net = par->net;
+		struct net *net = xt_net(par);
 		unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
 		unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu);
 
@@ -172,7 +172,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	 * length IPv6 header of 60, ergo the default MSS value is 1220
 	 * Since no MSS was provided, we must use the default values
 	 */
-	if (par->family == NFPROTO_IPV4)
+	if (xt_family(par) == NFPROTO_IPV4)
 		newmss = min(newmss, (u16)536);
 	else
 		newmss = min(newmss, (u16)1220);
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 0471db4032c5..1c57ace75ae6 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -33,7 +33,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	const struct xt_tee_tginfo *info = par->targinfo;
 	int oif = info->priv ? info->priv->oif : 0;
 
-	nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif);
+	nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif);
 
 	return XT_CONTINUE;
 }
@@ -45,7 +45,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	const struct xt_tee_tginfo *info = par->targinfo;
 	int oif = info->priv ? info->priv->oif : 0;
 
-	nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif);
+	nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif);
 
 	return XT_CONTINUE;
 }
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 663c4c3c9072..dbd72cc40e42 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -364,7 +364,8 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tproxy_target_info *tgi = par->targinfo;
 
-	return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
+	return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport,
+			  tgi->mark_mask, tgi->mark_value);
 }
 
 static unsigned int
@@ -372,7 +373,8 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
 
-	return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
+	return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport,
+			  tgi->mark_mask, tgi->mark_value);
 }
 
 #ifdef XT_TPROXY_HAVE_IPV6
@@ -442,7 +444,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
 		 * to a listener socket if there's one */
 		struct sock *sk2;
 
-		sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
+		sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
 					    &iph->saddr,
 					    tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
 					    hp->source,
@@ -485,10 +487,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	 * addresses, this happens if the redirect already happened
 	 * and the current packet belongs to an already established
 	 * connection */
-	sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
+	sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
 				   &iph->saddr, &iph->daddr,
 				   hp->source, hp->dest,
-				   par->in, NFT_LOOKUP_ESTABLISHED);
+				   xt_in(par), NFT_LOOKUP_ESTABLISHED);
 
 	laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
 	lport = tgi->lport ? tgi->lport : hp->dest;
@@ -500,10 +502,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	else if (!sk)
 		/* no there's no established connection, check if
 		 * there's a listener on the redirected addr/port */
-		sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
+		sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
 					   tproto, &iph->saddr, laddr,
 					   hp->source, lport,
-					   par->in, NFT_LOOKUP_LISTENER);
+					   xt_in(par), NFT_LOOKUP_LISTENER);
 
 	/* NOTE: assign_sock consumes our sk reference */
 	if (sk && tproxy_sk_is_transparent(sk)) {
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 11d6091991a4..e329dabde35f 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
 static bool
 addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct net *net = par->net;
+	struct net *net = xt_net(par);
 	const struct xt_addrtype_info *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
 	bool ret = true;
@@ -143,19 +143,19 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 static bool
 addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct net *net = par->net;
+	struct net *net = xt_net(par);
 	const struct xt_addrtype_info_v1 *info = par->matchinfo;
 	const struct iphdr *iph;
 	const struct net_device *dev = NULL;
 	bool ret = true;
 
 	if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
-		dev = par->in;
+		dev = xt_in(par);
 	else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
-		dev = par->out;
+		dev = xt_out(par);
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-	if (par->family == NFPROTO_IPV6)
+	if (xt_family(par) == NFPROTO_IPV6)
 		return addrtype_mt6(net, dev, skb, info);
 #endif
 	iph = ip_hdr(skb);
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 96fa26b20b67..9a9884a39c0e 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -112,7 +112,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	 * know, matches should not alter packets, but we are doing this here
 	 * because we would need to add a PKTTYPE target for this sole purpose.
 	 */
-	if (!xt_cluster_is_multicast_addr(skb, par->family) &&
+	if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) &&
 	    skb->pkt_type == PACKET_MULTICAST) {
 	    	pskb->pkt_type = PACKET_HOST;
 	}
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b6dc322593a3..bb3845339efd 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -317,7 +317,7 @@ static int count_them(struct net *net,
 static bool
 connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct net *net = par->net;
+	struct net *net = xt_net(par);
 	const struct xt_connlimit_info *info = par->matchinfo;
 	union nf_inet_addr addr;
 	struct nf_conntrack_tuple tuple;
@@ -332,11 +332,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 		zone = nf_ct_zone(ct);
 	} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-				      par->family, net, &tuple)) {
+				      xt_family(par), net, &tuple)) {
 		goto hotdrop;
 	}
 
-	if (par->family == NFPROTO_IPV6) {
+	if (xt_family(par) == NFPROTO_IPV6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
 		memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
 		       &iph->daddr : &iph->saddr, sizeof(addr.ip6));
@@ -347,7 +347,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 
 	connections = count_them(net, info->data, tuple_ptr, &addr,
-	                         &info->mask, par->family, zone);
+	                         &info->mask, xt_family(par), zone);
 	if (connections == 0)
 		/* kmalloc failed, drop it entirely */
 		goto hotdrop;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index a3b8f697cfc5..2dea15ebc55b 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -200,22 +200,22 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
 		return false;
 
 	if (info->match_flags & XT_CONNTRACK_ORIGSRC)
-		if (conntrack_mt_origsrc(ct, info, par->family) ^
+		if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^
 		    !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_ORIGDST)
-		if (conntrack_mt_origdst(ct, info, par->family) ^
+		if (conntrack_mt_origdst(ct, info, xt_family(par)) ^
 		    !(info->invert_flags & XT_CONNTRACK_ORIGDST))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_REPLSRC)
-		if (conntrack_mt_replsrc(ct, info, par->family) ^
+		if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^
 		    !(info->invert_flags & XT_CONNTRACK_REPLSRC))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_REPLDST)
-		if (conntrack_mt_repldst(ct, info, par->family) ^
+		if (conntrack_mt_repldst(ct, info, xt_family(par)) ^
 		    !(info->invert_flags & XT_CONNTRACK_REPLDST))
 			return false;
 
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
index d9202cdd25c9..96ebe1cdefec 100644
--- a/net/netfilter/xt_devgroup.c
+++ b/net/netfilter/xt_devgroup.c
@@ -24,12 +24,12 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_devgroup_info *info = par->matchinfo;
 
 	if (info->flags & XT_DEVGROUP_MATCH_SRC &&
-	    (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^
+	    (((info->src_group ^ xt_in(par)->group) & info->src_mask ? 1 : 0) ^
 	     ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0)))
 		return false;
 
 	if (info->flags & XT_DEVGROUP_MATCH_DST &&
-	    (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^
+	    (((info->dst_group ^ xt_out(par)->group) & info->dst_mask ? 1 : 0) ^
 	     ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0)))
 		return false;
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 64670fc5d0e1..236ac8008909 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
 
-	if (par->family == NFPROTO_IPV4)
+	if (xt_family(par) == NFPROTO_IPV4)
 		return ((ip_hdr(skb)->tos & info->tos_mask) ==
 		       info->tos_value) ^ !!info->invert;
 	else
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 71a9d95e0a81..0fdc89064488 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -48,9 +48,9 @@ static bool
 ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_ipvs_mtinfo *data = par->matchinfo;
-	struct netns_ipvs *ipvs = net_ipvs(par->net);
+	struct netns_ipvs *ipvs = net_ipvs(xt_net(par));
 	/* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
-	const u_int8_t family = par->family;
+	const u_int8_t family = xt_family(par);
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index cf327593852a..cc0518fe598e 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	nfnl_acct_update(skb, info->nfacct);
 
-	overquota = nfnl_acct_overquota(par->net, skb, info->nfacct);
+	overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct);
 
 	return overquota == NFACCT_UNDERQUOTA ? false : true;
 }
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 2455b69b5810..c05fefcec238 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -201,7 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 	unsigned char opts[MAX_IPOPTLEN];
 	const struct xt_osf_finger *kf;
 	const struct xt_osf_user_finger *f;
-	struct net *net = p->net;
+	struct net *net = xt_net(p);
 
 	if (!info)
 		return false;
@@ -326,8 +326,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 		fcount++;
 
 		if (info->flags & XT_OSF_LOG)
-			nf_log_packet(net, p->family, p->hooknum, skb,
-				      p->in, p->out, NULL,
+			nf_log_packet(net, xt_family(p), xt_hooknum(p), skb,
+				      xt_in(p), xt_out(p), NULL,
 				      "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
 				      f->genre, f->version, f->subtype,
 				      &ip->saddr, ntohs(tcp->source),
@@ -341,8 +341,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 	rcu_read_unlock();
 
 	if (!fcount && (info->flags & XT_OSF_LOG))
-		nf_log_packet(net, p->family, p->hooknum, skb, p->in,
-			      p->out, NULL,
+		nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
+			      xt_out(p), NULL,
 			"Remote OS is not known: %pI4:%u -> %pI4:%u\n",
 				&ip->saddr, ntohs(tcp->source),
 				&ip->daddr, ntohs(tcp->dest));
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index a20e731b5b6c..16477df45b3b 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -63,7 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
 	struct sock *sk = skb_to_full_sk(skb);
-	struct net *net = par->net;
+	struct net *net = xt_net(par);
 
 	if (sk == NULL || sk->sk_socket == NULL)
 		return (info->match ^ info->invert) == 0;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 5b645cb598fc..57efb703ff18 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -30,10 +30,10 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	if (skb->pkt_type != PACKET_LOOPBACK)
 		type = skb->pkt_type;
-	else if (par->family == NFPROTO_IPV4 &&
+	else if (xt_family(par) == NFPROTO_IPV4 &&
 	    ipv4_is_multicast(ip_hdr(skb)->daddr))
 		type = PACKET_MULTICAST;
-	else if (par->family == NFPROTO_IPV6 &&
+	else if (xt_family(par) == NFPROTO_IPV6 &&
 	    ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
 		type = PACKET_MULTICAST;
 	else
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index f23e97bb42d7..2b4ab189bba7 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	int ret;
 
 	if (info->flags & XT_POLICY_MATCH_IN)
-		ret = match_policy_in(skb, info, par->family);
+		ret = match_policy_in(skb, info, xt_family(par));
 	else
-		ret = match_policy_out(skb, info, par->family);
+		ret = match_policy_out(skb, info, xt_family(par));
 
 	if (ret < 0)
 		ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index e3b7a09b103e..bf250000e084 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -236,7 +236,7 @@ static void recent_table_flush(struct recent_table *t)
 static bool
 recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct net *net = par->net;
+	struct net *net = xt_net(par);
 	struct recent_net *recent_net = recent_pernet(net);
 	const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
 	struct recent_table *t;
@@ -245,7 +245,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	u_int8_t ttl;
 	bool ret = info->invert;
 
-	if (par->family == NFPROTO_IPV4) {
+	if (xt_family(par) == NFPROTO_IPV4) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		if (info->side == XT_RECENT_DEST)
@@ -266,7 +266,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 
 	/* use TTL as seen before forwarding */
-	if (par->out != NULL && skb->sk == NULL)
+	if (xt_out(par) != NULL && skb->sk == NULL)
 		ttl++;
 
 	spin_lock_bh(&recent_lock);
@@ -274,12 +274,12 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	nf_inet_addr_mask(&addr, &addr_mask, &t->mask);
 
-	e = recent_entry_lookup(t, &addr_mask, par->family,
+	e = recent_entry_lookup(t, &addr_mask, xt_family(par),
 				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
 	if (e == NULL) {
 		if (!(info->check_set & XT_RECENT_SET))
 			goto out;
-		e = recent_entry_init(t, &addr_mask, par->family, ttl);
+		e = recent_entry_init(t, &addr_mask, xt_family(par), ttl);
 		if (e == NULL)
 			par->hotdrop = true;
 		ret = !ret;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 5669e5b453f4..1bfede7be418 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -55,7 +55,7 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v0 *info = par->matchinfo;
 
-	ADT_OPT(opt, par->family, info->match_set.u.compat.dim,
+	ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim,
 		info->match_set.u.compat.flags, 0, UINT_MAX);
 
 	return match_set(info->match_set.index, skb, par, &opt,
@@ -118,7 +118,7 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v1 *info = par->matchinfo;
 
-	ADT_OPT(opt, par->family, info->match_set.dim,
+	ADT_OPT(opt, xt_family(par), info->match_set.dim,
 		info->match_set.flags, 0, UINT_MAX);
 
 	if (opt.flags & IPSET_RETURN_NOMATCH)
@@ -184,7 +184,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_set_info_match_v3 *info = par->matchinfo;
 	int ret;
 
-	ADT_OPT(opt, par->family, info->match_set.dim,
+	ADT_OPT(opt, xt_family(par), info->match_set.dim,
 		info->match_set.flags, info->flags, UINT_MAX);
 
 	if (info->packets.op != IPSET_COUNTER_NONE ||
@@ -231,7 +231,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_set_info_match_v4 *info = par->matchinfo;
 	int ret;
 
-	ADT_OPT(opt, par->family, info->match_set.dim,
+	ADT_OPT(opt, xt_family(par), info->match_set.dim,
 		info->match_set.flags, info->flags, UINT_MAX);
 
 	if (info->packets.op != IPSET_COUNTER_NONE ||
@@ -259,9 +259,9 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v0 *info = par->targinfo;
 
-	ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim,
+	ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim,
 		info->add_set.u.compat.flags, 0, UINT_MAX);
-	ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim,
+	ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim,
 		info->del_set.u.compat.flags, 0, UINT_MAX);
 
 	if (info->add_set.index != IPSET_INVALID_ID)
@@ -332,9 +332,9 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v1 *info = par->targinfo;
 
-	ADT_OPT(add_opt, par->family, info->add_set.dim,
+	ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
 		info->add_set.flags, 0, UINT_MAX);
-	ADT_OPT(del_opt, par->family, info->del_set.dim,
+	ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
 		info->del_set.flags, 0, UINT_MAX);
 
 	if (info->add_set.index != IPSET_INVALID_ID)
@@ -401,9 +401,9 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v2 *info = par->targinfo;
 
-	ADT_OPT(add_opt, par->family, info->add_set.dim,
+	ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
 		info->add_set.flags, info->flags, info->timeout);
-	ADT_OPT(del_opt, par->family, info->del_set.dim,
+	ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
 		info->del_set.flags, 0, UINT_MAX);
 
 	/* Normalize to fit into jiffies */
@@ -429,11 +429,11 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 	const struct xt_set_info_target_v3 *info = par->targinfo;
 	int ret;
 
-	ADT_OPT(add_opt, par->family, info->add_set.dim,
+	ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
 		info->add_set.flags, info->flags, info->timeout);
-	ADT_OPT(del_opt, par->family, info->del_set.dim,
+	ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
 		info->del_set.flags, 0, UINT_MAX);
-	ADT_OPT(map_opt, par->family, info->map_set.dim,
+	ADT_OPT(map_opt, xt_family(par), info->map_set.dim,
 		info->map_set.flags, 0, UINT_MAX);
 
 	/* Normalize to fit into jiffies */
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 018c369c9f0d..2198914707f5 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -57,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	struct sock *sk = skb->sk;
 
 	if (!sk)
-		sk = nf_sk_lookup_slow_v4(par->net, skb, par->in);
+		sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
@@ -114,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sock *sk = skb->sk;
 
 	if (!sk)
-		sk = nf_sk_lookup_slow_v6(par->net, skb, par->in);
+		sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 378c1c976058..ce7ea6c1c50d 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -213,6 +213,12 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 	int ret = 0, result = 0;
 	struct tcf_ipt *ipt = to_ipt(a);
 	struct xt_action_param par;
+	struct nf_hook_state state = {
+		.net	= dev_net(skb->dev),
+		.in	= skb->dev,
+		.hook	= ipt->tcfi_hook,
+		.pf	= NFPROTO_IPV4,
+	};
 
 	if (skb_unclone(skb, GFP_ATOMIC))
 		return TC_ACT_UNSPEC;
@@ -226,13 +232,9 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 	 * worry later - danger - this API seems to have changed
 	 * from earlier kernels
 	 */
-	par.net	     = dev_net(skb->dev);
-	par.in       = skb->dev;
-	par.out      = NULL;
-	par.hooknum  = ipt->tcfi_hook;
+	par.state    = &state;
 	par.target   = ipt->tcfi_t->u.kernel.target;
 	par.targinfo = ipt->tcfi_t->data;
-	par.family   = NFPROTO_IPV4;
 	ret = par.target->target(skb, &par);
 
 	switch (ret) {
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index c66ca9400ab4..c1b23e3060b8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -57,17 +57,20 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
 	struct xt_action_param acpar;
 	const struct xt_set_info *set = (const void *) em->data;
 	struct net_device *dev, *indev = NULL;
+	struct nf_hook_state state = {
+		.net	= em->net,
+	};
 	int ret, network_offset;
 
 	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
-		acpar.family = NFPROTO_IPV4;
+		state.pf = NFPROTO_IPV4;
 		if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
 			return 0;
 		acpar.thoff = ip_hdrlen(skb);
 		break;
 	case htons(ETH_P_IPV6):
-		acpar.family = NFPROTO_IPV6;
+		state.pf = NFPROTO_IPV6;
 		if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
 			return 0;
 		/* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */
@@ -77,9 +80,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
 		return 0;
 	}
 
-	acpar.hooknum = 0;
-
-	opt.family = acpar.family;
+	opt.family = state.pf;
 	opt.dim = set->dim;
 	opt.flags = set->flags;
 	opt.cmdflags = 0;
@@ -95,9 +96,9 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
 	if (skb->skb_iif)
 		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
 
-	acpar.net     = em->net;
-	acpar.in      = indev ? indev : dev;
-	acpar.out     = dev;
+	state.in      = indev ? indev : dev;
+	state.out     = dev;
+	acpar.state   = &state;
 
 	ret = ip_set_test(set->index, skb, &acpar, &opt);
 
-- 
cgit v1.2.3


From 01886bd91f1ba418ce669dfe97a06ca9504e482a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 3 Nov 2016 10:56:35 +0100
Subject: netfilter: remove hook_entries field from nf_hook_state

This field is only useful for nf_queue, so store it in the
nf_queue_entry structure instead, away from the core path. Pass
hook_head to nf_hook_slow().

Since we always have a valid entry on the first iteration in
nf_iterate(), we can use 'do { ... } while (entry)' loop instead.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h             | 10 ++++------
 include/linux/netfilter_ingress.h     |  4 ++--
 include/net/netfilter/nf_queue.h      |  1 +
 net/bridge/br_netfilter_hooks.c       |  4 ++--
 net/bridge/netfilter/ebtable_broute.c |  2 +-
 net/netfilter/core.c                  |  9 ++++-----
 net/netfilter/nf_queue.c              | 13 +++++--------
 net/netfilter/nfnetlink_queue.c       |  2 +-
 8 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index e0d000f6c9bf..69230140215b 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -54,7 +54,6 @@ struct nf_hook_state {
 	struct net_device *out;
 	struct sock *sk;
 	struct net *net;
-	struct nf_hook_entry __rcu *hook_entries;
 	int (*okfn)(struct net *, struct sock *, struct sk_buff *);
 };
 
@@ -81,7 +80,6 @@ struct nf_hook_entry {
 };
 
 static inline void nf_hook_state_init(struct nf_hook_state *p,
-				      struct nf_hook_entry *hook_entry,
 				      unsigned int hook,
 				      u_int8_t pf,
 				      struct net_device *indev,
@@ -96,7 +94,6 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 	p->out = outdev;
 	p->sk = sk;
 	p->net = net;
-	RCU_INIT_POINTER(p->hook_entries, hook_entry);
 	p->okfn = okfn;
 }
 
@@ -150,7 +147,8 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
 extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 #endif
 
-int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
+		 struct nf_hook_entry *entry);
 
 /**
  *	nf_hook - call a netfilter hook
@@ -179,10 +177,10 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 	if (hook_head) {
 		struct nf_hook_state state;
 
-		nf_hook_state_init(&state, hook_head, hook, pf, indev, outdev,
+		nf_hook_state_init(&state, hook, pf, indev, outdev,
 				   sk, net, okfn);
 
-		ret = nf_hook_slow(skb, &state);
+		ret = nf_hook_slow(skb, &state, hook_head);
 	}
 	rcu_read_unlock();
 
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index fd44e4131710..2dc3b49b804a 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -26,10 +26,10 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	if (unlikely(!e))
 		return 0;
 
-	nf_hook_state_init(&state, e, NF_NETDEV_INGRESS,
+	nf_hook_state_init(&state, NF_NETDEV_INGRESS,
 			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
-	return nf_hook_slow(skb, &state);
+	return nf_hook_slow(skb, &state, e);
 }
 
 static inline void nf_hook_ingress_init(struct net_device *dev)
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 2280cfe86c56..09948d10e38e 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -12,6 +12,7 @@ struct nf_queue_entry {
 	unsigned int		id;
 
 	struct nf_hook_state	state;
+	struct nf_hook_entry	*hook;
 	u16			size; /* sizeof(entry) + saved route keys */
 
 	/* extra space to store route keys */
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 7e3645fa6339..8155bd2a5138 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1018,10 +1018,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
-	nf_hook_state_init(&state, elem, hook, NFPROTO_BRIDGE, indev, outdev,
+	nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
 			   sk, net, okfn);
 
-	ret = nf_hook_slow(skb, &state);
+	ret = nf_hook_slow(skb, &state, elem);
 	rcu_read_unlock();
 	if (ret == 1)
 		ret = okfn(net, sk, skb);
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 599679e3498d..8fe36dc3aab2 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb)
 	struct nf_hook_state state;
 	int ret;
 
-	nf_hook_state_init(&state, NULL, NF_BR_BROUTING,
+	nf_hook_state_init(&state, NF_BR_BROUTING,
 			   NFPROTO_BRIDGE, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 64623374bc5f..ebece48b8392 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -308,7 +308,7 @@ unsigned int nf_iterate(struct sk_buff *skb,
 {
 	unsigned int verdict;
 
-	while (*entryp) {
+	do {
 repeat:
 		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
 		if (verdict != NF_ACCEPT) {
@@ -317,20 +317,19 @@ repeat:
 			goto repeat;
 		}
 		*entryp = rcu_dereference((*entryp)->next);
-	}
+	} while (*entryp);
 	return NF_ACCEPT;
 }
 
 
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
-int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
+		 struct nf_hook_entry *entry)
 {
-	struct nf_hook_entry *entry;
 	unsigned int verdict;
 	int ret;
 
-	entry = rcu_dereference(state->hook_entries);
 next_hook:
 	verdict = nf_iterate(skb, state, &entry);
 	switch (verdict & NF_VERDICT_MASK) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 0fb38966e5bf..2e39e38ae1c7 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -108,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
 }
 
 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
-		      unsigned int queuenum)
+		      struct nf_hook_entry *hook_entry, unsigned int queuenum)
 {
 	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
@@ -136,6 +136,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
 		.state	= *state,
+		.hook	= hook_entry,
 		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
 
@@ -163,8 +164,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 	struct nf_hook_entry *entry = *entryp;
 	int ret;
 
-	RCU_INIT_POINTER(state->hook_entries, entry);
-	ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
+	ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
 	if (ret < 0) {
 		if (ret == -ESRCH &&
 		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
@@ -179,15 +179,12 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
-	struct nf_hook_entry *hook_entry;
+	struct nf_hook_entry *hook_entry = entry->hook;
+	struct nf_hook_ops *elem = &hook_entry->ops;
 	struct sk_buff *skb = entry->skb;
 	const struct nf_afinfo *afinfo;
-	struct nf_hook_ops *elem;
 	int err;
 
-	hook_entry = rcu_dereference(entry->state.hook_entries);
-	elem = &hook_entry->ops;
-
 	nf_queue_entry_release_refs(entry);
 
 	/* Continue traversal iff userspace said ok... */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5379f788a372..1e33115b399f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -919,7 +919,7 @@ static struct notifier_block nfqnl_dev_notifier = {
 
 static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr)
 {
-	return rcu_access_pointer(entry->state.hook_entries) ==
+	return rcu_access_pointer(entry->hook) ==
 		(struct nf_hook_entry *)entry_ptr;
 }
 
-- 
cgit v1.2.3


From 0cc0aa614b4c24b21b2492c0a1753035ee8c6edb Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 2 Nov 2016 11:02:17 -0400
Subject: ipv6: add IPV6_RECVFRAGSIZE cmsg

When reading a datagram or raw packet that arrived fragmented, expose
the maximum fragment size if recorded to allow applications to
estimate receive path MTU.

At this point, the field is only recorded when ipv6 connection
tracking is enabled. A follow-up patch will record this field also
in the ipv6 input path.

Tested using the test for IP_RECVFRAGSIZE plus

  ip netns exec to ip addr add dev veth1 fc07::1/64
  ip netns exec from ip addr add dev veth0 fc07::2/64

  ip netns exec to ./recv_cmsg_recvfragsize -6 -u -p 6000 &
  ip netns exec from nc -q 1 -u fc07::1 6000 < payload

Both with and without enabling connection tracking

  ip6tables -A INPUT -m state --state NEW -p udp -j LOG

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h     | 5 +++--
 include/uapi/linux/in6.h | 1 +
 net/ipv6/datagram.c      | 5 +++++
 net/ipv6/ipv6_sockglue.c | 8 ++++++++
 4 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ca1ad9ebbc92..1afb6e8d35c3 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -229,8 +229,9 @@ struct ipv6_pinfo {
                                 rxflow:1,
 				rxtclass:1,
 				rxpmtu:1,
-				rxorigdstaddr:1;
-				/* 2 bits hole */
+				rxorigdstaddr:1,
+				recvfragsize:1;
+				/* 1 bits hole */
 		} bits;
 		__u16		all;
 	} rxopt;
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index b39ea4f2e701..46444f8fbee4 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -283,6 +283,7 @@ struct in6_flowlabel_req {
 #define IPV6_RECVORIGDSTADDR    IPV6_ORIGDSTADDR
 #define IPV6_TRANSPARENT        75
 #define IPV6_UNICAST_IF         76
+#define IPV6_RECVFRAGSIZE	77
 
 /*
  * Multicast Routing:
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 37874e2f30ed..620c79a0130a 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -715,6 +715,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
 			put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
 		}
 	}
+	if (np->rxopt.bits.recvfragsize && opt->frag_max_size) {
+		int val = opt->frag_max_size;
+
+		put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val);
+	}
 }
 
 void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 636ec56f5f50..6c126780fcf2 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -868,6 +868,10 @@ pref_skip_coa:
 		np->autoflowlabel = valbool;
 		retv = 0;
 		break;
+	case IPV6_RECVFRAGSIZE:
+		np->rxopt.bits.recvfragsize = valbool;
+		retv = 0;
+		break;
 	}
 
 	release_sock(sk);
@@ -1310,6 +1314,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->autoflowlabel;
 		break;
 
+	case IPV6_RECVFRAGSIZE:
+		val = np->rxopt.bits.recvfragsize;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From 50d24c34403c62ad29e8b6db559d491bae20b4b7 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 3 Nov 2016 17:03:53 -0700
Subject: block: immediately dispatch big size request

Currently block plug holds up to 16 non-mergeable requests. This makes
sense if the request size is small, eg, reduce lock contention. But if
request size is big enough, we don't need to worry about lock
contention. Holding such request makes no sense and it lows the disk
utilization.

In practice, this improves 10% throughput for my raid5 sequential write
workload.

The size (128k) is arbitrary right now, but it makes sure lock
contention is small. This probably could be more intelligent, eg, check
average request size holded. Since this is mainly for sequential IO,
probably not worthy.

V2: check the last request instead of the first request, so as long as
there is one big size request we flush the plug.

Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 4 +++-
 include/linux/blkdev.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 0bfaa54d3e9f..2deca48a4a05 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1746,7 +1746,9 @@ get_rq:
 		if (!request_count)
 			trace_block_plug(q);
 		else {
-			if (request_count >= BLK_MAX_REQUEST_COUNT) {
+			struct request *last = list_entry_rq(plug->list.prev);
+			if (request_count >= BLK_MAX_REQUEST_COUNT ||
+			    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE) {
 				blk_flush_plug_list(plug, false);
 				trace_block_plug(q);
 			}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 13d893a69b46..9189a2d5c392 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1173,6 +1173,7 @@ struct blk_plug {
 	struct list_head cb_list; /* md requires an unplug callback */
 };
 #define BLK_MAX_REQUEST_COUNT 16
+#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
 
 struct blk_plug_cb;
 typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
-- 
cgit v1.2.3


From 1b5b42216469b05ef4b5916cb40b127dfab1da88 Mon Sep 17 00:00:00 2001
From: Axel Haslam <ahaslam@baylibre.com>
Date: Thu, 3 Nov 2016 12:11:42 +0100
Subject: regulator: core: Add new API to poll for error conditions

Regulator consumers can receive event notifications when
errors are reported to the driver, but currently, there is
no way for a regulator consumer to know when the error is over.

To allow a regulator consumer to poll for error conditions
add a new API: regulator_get_error_flags.

Signed-off-by: Axel Haslam <ahaslam@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c           | 33 +++++++++++++++++++++++++++++++++
 include/linux/regulator/consumer.h | 26 ++++++++++++++++++++++++++
 include/linux/regulator/driver.h   |  4 ++++
 3 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 67426c0477d3..08260c215895 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3359,6 +3359,39 @@ unsigned int regulator_get_mode(struct regulator *regulator)
 }
 EXPORT_SYMBOL_GPL(regulator_get_mode);
 
+static int _regulator_get_error_flags(struct regulator_dev *rdev,
+					unsigned int *flags)
+{
+	int ret;
+
+	mutex_lock(&rdev->mutex);
+
+	/* sanity check */
+	if (!rdev->desc->ops->get_error_flags) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = rdev->desc->ops->get_error_flags(rdev, flags);
+out:
+	mutex_unlock(&rdev->mutex);
+	return ret;
+}
+
+/**
+ * regulator_get_error_flags - get regulator error information
+ * @regulator: regulator source
+ * @flags: pointer to store error flags
+ *
+ * Get the current regulator error information.
+ */
+int regulator_get_error_flags(struct regulator *regulator,
+				unsigned int *flags)
+{
+	return _regulator_get_error_flags(regulator->rdev, flags);
+}
+EXPORT_SYMBOL_GPL(regulator_get_error_flags);
+
 /**
  * regulator_set_load - set regulator load
  * @regulator: regulator source
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 692108222271..528eb1f5273e 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -120,6 +120,25 @@ struct regmap;
 #define REGULATOR_EVENT_PRE_DISABLE		0x400
 #define REGULATOR_EVENT_ABORT_DISABLE		0x800
 
+/*
+ * Regulator errors that can be queried using regulator_get_error_flags
+ *
+ * UNDER_VOLTAGE  Regulator output is under voltage.
+ * OVER_CURRENT   Regulator output current is too high.
+ * REGULATION_OUT Regulator output is out of regulation.
+ * FAIL           Regulator output has failed.
+ * OVER_TEMP      Regulator over temp.
+ *
+ * NOTE: These errors can be OR'ed together.
+ */
+
+#define REGULATOR_ERROR_UNDER_VOLTAGE		BIT(1)
+#define REGULATOR_ERROR_OVER_CURRENT		BIT(2)
+#define REGULATOR_ERROR_REGULATION_OUT		BIT(3)
+#define REGULATOR_ERROR_FAIL			BIT(4)
+#define REGULATOR_ERROR_OVER_TEMP		BIT(5)
+
+
 /**
  * struct pre_voltage_change_data - Data sent with PRE_VOLTAGE_CHANGE event
  *
@@ -237,6 +256,8 @@ int regulator_get_current_limit(struct regulator *regulator);
 
 int regulator_set_mode(struct regulator *regulator, unsigned int mode);
 unsigned int regulator_get_mode(struct regulator *regulator);
+int regulator_get_error_flags(struct regulator *regulator,
+				unsigned int *flags);
 int regulator_set_load(struct regulator *regulator, int load_uA);
 
 int regulator_allow_bypass(struct regulator *regulator, bool allow);
@@ -477,6 +498,11 @@ static inline unsigned int regulator_get_mode(struct regulator *regulator)
 	return REGULATOR_MODE_NORMAL;
 }
 
+static inline int regulator_get_error_flags(struct regulator *regulator)
+{
+	return -EINVAL;
+}
+
 static inline int regulator_set_load(struct regulator *regulator, int load_uA)
 {
 	return REGULATOR_MODE_NORMAL;
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 37b532410528..dac8e7b16bc6 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -100,6 +100,7 @@ struct regulator_linear_range {
  *
  * @set_mode: Set the configured operating mode for the regulator.
  * @get_mode: Get the configured operating mode for the regulator.
+ * @get_error_flags: Get the current error(s) for the regulator.
  * @get_status: Return actual (not as-configured) status of regulator, as a
  *	REGULATOR_STATUS value (or negative errno)
  * @get_optimum_mode: Get the most efficient operating mode for the regulator
@@ -169,6 +170,9 @@ struct regulator_ops {
 	int (*set_mode) (struct regulator_dev *, unsigned int mode);
 	unsigned int (*get_mode) (struct regulator_dev *);
 
+	/* retrieve current error flags on the regulator */
+	int (*get_error_flags)(struct regulator_dev *, unsigned int *flags);
+
 	/* Time taken to enable or set voltage on the regulator */
 	int (*enable_time) (struct regulator_dev *);
 	int (*set_ramp_delay) (struct regulator_dev *, int ramp_delay);
-- 
cgit v1.2.3


From 68f929ff2654bced015ccb9b5555667f46f88dfa Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 3 Nov 2016 17:12:06 +0000
Subject: debugfs: constify argument to debugfs_real_fops()

seq_file users can only access const version of file pointer,
because the ->file member of struct seq_operations is marked
as such.  Make parameter to debugfs_real_fops() const.

CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: Nicolai Stange <nicstange@gmail.com>
CC: Christian Lamparter <chunkeey@gmail.com>
CC: LKML <linux-kernel@vger.kernel.org>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/debugfs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 4d3f0d1aec73..bf1907d96097 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -52,7 +52,8 @@ extern struct srcu_struct debugfs_srcu;
  * Must only be called under the protection established by
  * debugfs_use_file_start().
  */
-static inline const struct file_operations *debugfs_real_fops(struct file *filp)
+static inline const struct file_operations *
+debugfs_real_fops(const struct file *filp)
 	__must_hold(&debugfs_srcu)
 {
 	/*
-- 
cgit v1.2.3


From 29f3ad7d8380364c86556eedf4eedd3b3d4921dc Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 4 Nov 2016 18:08:11 +0100
Subject: fs: Provide function to unmap metadata for a range of blocks

Provide function equivalent to unmap_underlying_metadata() for a range
of blocks. We somewhat optimize the function to use pagevec lookups
instead of looking up buffer heads one by one and use page lock to pin
buffer heads instead of mapping's private_lock to improve scalability.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/buffer.c                 | 76 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/buffer_head.h |  2 ++
 2 files changed, 78 insertions(+)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index af5776da814a..f8beca55240a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -43,6 +43,7 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
+#include <linux/pagevec.h>
 #include <trace/events/block.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@ -1636,6 +1637,81 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
 }
 EXPORT_SYMBOL(unmap_underlying_metadata);
 
+/**
+ * clean_bdev_aliases: clean a range of buffers in block device
+ * @bdev: Block device to clean buffers in
+ * @block: Start of a range of blocks to clean
+ * @len: Number of blocks to clean
+ *
+ * We are taking a range of blocks for data and we don't want writeback of any
+ * buffer-cache aliases starting from return from this function and until the
+ * moment when something will explicitly mark the buffer dirty (hopefully that
+ * will not happen until we will free that block ;-) We don't even need to mark
+ * it not-uptodate - nobody can expect anything from a newly allocated buffer
+ * anyway. We used to use unmap_buffer() for such invalidation, but that was
+ * wrong. We definitely don't want to mark the alias unmapped, for example - it
+ * would confuse anyone who might pick it with bread() afterwards...
+ *
+ * Also..  Note that bforget() doesn't lock the buffer.  So there can be
+ * writeout I/O going on against recently-freed buffers.  We don't wait on that
+ * I/O in bforget() - it's more efficient to wait on the I/O only if we really
+ * need to.  That happens here.
+ */
+void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
+{
+	struct inode *bd_inode = bdev->bd_inode;
+	struct address_space *bd_mapping = bd_inode->i_mapping;
+	struct pagevec pvec;
+	pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
+	pgoff_t end;
+	int i;
+	struct buffer_head *bh;
+	struct buffer_head *head;
+
+	end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
+	pagevec_init(&pvec, 0);
+	while (index <= end && pagevec_lookup(&pvec, bd_mapping, index,
+			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+
+			index = page->index;
+			if (index > end)
+				break;
+			if (!page_has_buffers(page))
+				continue;
+			/*
+			 * We use page lock instead of bd_mapping->private_lock
+			 * to pin buffers here since we can afford to sleep and
+			 * it scales better than a global spinlock lock.
+			 */
+			lock_page(page);
+			/* Recheck when the page is locked which pins bhs */
+			if (!page_has_buffers(page))
+				goto unlock_page;
+			head = page_buffers(page);
+			bh = head;
+			do {
+				if (!buffer_mapped(bh))
+					goto next;
+				if (bh->b_blocknr >= block + len)
+					break;
+				clear_buffer_dirty(bh);
+				wait_on_buffer(bh);
+				clear_buffer_req(bh);
+next:
+				bh = bh->b_this_page;
+			} while (bh != head);
+unlock_page:
+			unlock_page(page);
+		}
+		pagevec_release(&pvec);
+		cond_resched();
+		index++;
+	}
+}
+EXPORT_SYMBOL(clean_bdev_aliases);
+
 /*
  * Size is a power-of-two in the range 512..PAGE_SIZE,
  * and the case we care about most is PAGE_SIZE.
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index ebbacd14d450..9c9c73ce7d4f 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -169,6 +169,8 @@ void invalidate_inode_buffers(struct inode *);
 int remove_inode_buffers(struct inode *inode);
 int sync_mapping_buffers(struct address_space *mapping);
 void unmap_underlying_metadata(struct block_device *bdev, sector_t block);
+void clean_bdev_aliases(struct block_device *bdev, sector_t block,
+			sector_t len);
 
 void mark_buffer_async_write(struct buffer_head *bh);
 void __wait_on_buffer(struct buffer_head *);
-- 
cgit v1.2.3


From e64855c6cfaa0a80c1b71c5f647cb792dc436668 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 4 Nov 2016 18:08:15 +0100
Subject: fs: Add helper to clean bdev aliases under a bh and use it

Add a helper function that clears buffer heads from a block device
aliasing passed bh. Use this helper function from filesystems instead of
the original unmap_underlying_metadata() to save some boiler plate code
and also have a better name for the functionalily since it is not
unmapping anything for a *long* time.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/buffer.c                 | 8 +++-----
 fs/ext4/inode.c             | 3 +--
 fs/ext4/page-io.c           | 2 +-
 fs/mpage.c                  | 3 +--
 fs/ntfs/aops.c              | 2 +-
 fs/ntfs/file.c              | 5 ++---
 fs/ocfs2/aops.c             | 2 +-
 fs/ufs/balloc.c             | 3 +--
 fs/ufs/inode.c              | 3 +--
 include/linux/buffer_head.h | 4 ++++
 10 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index f8beca55240a..912d70169fca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1821,8 +1821,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 			if (buffer_new(bh)) {
 				/* blockdev mappings never come here */
 				clear_buffer_new(bh);
-				unmap_underlying_metadata(bh->b_bdev,
-							bh->b_blocknr);
+				clean_bdev_bh_alias(bh);
 			}
 		}
 		bh = bh->b_this_page;
@@ -2068,8 +2067,7 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
 			}
 
 			if (buffer_new(bh)) {
-				unmap_underlying_metadata(bh->b_bdev,
-							bh->b_blocknr);
+				clean_bdev_bh_alias(bh);
 				if (PageUptodate(page)) {
 					clear_buffer_new(bh);
 					set_buffer_uptodate(bh);
@@ -2709,7 +2707,7 @@ int nobh_write_begin(struct address_space *mapping,
 		if (!buffer_mapped(bh))
 			is_mapped_to_disk = 0;
 		if (buffer_new(bh))
-			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+			clean_bdev_bh_alias(bh);
 		if (PageUptodate(page)) {
 			set_buffer_uptodate(bh);
 			continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c7cc4ae4b8e..2f8127601bef 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1123,8 +1123,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 			if (err)
 				break;
 			if (buffer_new(bh)) {
-				unmap_underlying_metadata(bh->b_bdev,
-							  bh->b_blocknr);
+				clean_bdev_bh_alias(bh);
 				if (PageUptodate(page)) {
 					clear_buffer_new(bh);
 					set_buffer_uptodate(bh);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index e0b3b54cdef3..f28fd6483e04 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -457,7 +457,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 		}
 		if (buffer_new(bh)) {
 			clear_buffer_new(bh);
-			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+			clean_bdev_bh_alias(bh);
 		}
 		set_buffer_async_write(bh);
 		nr_to_submit++;
diff --git a/fs/mpage.c b/fs/mpage.c
index 98fc11aa7e0b..28af984a3d96 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -555,8 +555,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 		if (mpd->get_block(inode, block_in_file, &map_bh, 1))
 			goto confused;
 		if (buffer_new(&map_bh))
-			unmap_underlying_metadata(map_bh.b_bdev,
-						map_bh.b_blocknr);
+			clean_bdev_bh_alias(&map_bh);
 		if (buffer_boundary(&map_bh)) {
 			boundary_block = map_bh.b_blocknr;
 			boundary_bdev = map_bh.b_bdev;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index d0cf6fee5c77..cc91856b5e2d 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -765,7 +765,7 @@ lock_retry_remap:
 			}
 			// TODO: Instantiate the hole.
 			// clear_buffer_new(bh);
-			// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+			// clean_bdev_bh_alias(bh);
 			ntfs_error(vol->sb, "Writing into sparse regions is "
 					"not supported yet. Sorry.");
 			err = -EOPNOTSUPP;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index bf72a2c58b75..99510d811a8c 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -740,8 +740,7 @@ map_buffer_cached:
 					set_buffer_uptodate(bh);
 				if (unlikely(was_hole)) {
 					/* We allocated the buffer. */
-					unmap_underlying_metadata(bh->b_bdev,
-							bh->b_blocknr);
+					clean_bdev_bh_alias(bh);
 					if (bh_end <= pos || bh_pos >= end)
 						mark_buffer_dirty(bh);
 					else
@@ -784,7 +783,7 @@ map_buffer_cached:
 				continue;
 			}
 			/* We allocated the buffer. */
-			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+			clean_bdev_bh_alias(bh);
 			/*
 			 * If the buffer is fully outside the write, zero it,
 			 * set it uptodate, and mark it dirty so it gets
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c5c5b9748ea3..e8f65eefffca 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -630,7 +630,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
 
 		if (!buffer_mapped(bh)) {
 			map_bh(bh, inode->i_sb, *p_blkno);
-			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+			clean_bdev_bh_alias(bh);
 		}
 
 		if (PageUptodate(page)) {
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index b035af54f538..a0376a2c1c29 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -307,8 +307,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
 			     (unsigned long long)(pos + newb), pos);
 
 			bh->b_blocknr = newb + pos;
-			unmap_underlying_metadata(bh->b_bdev,
-						  bh->b_blocknr);
+			clean_bdev_bh_alias(bh);
 			mark_buffer_dirty(bh);
 			++j;
 			bh = bh->b_this_page;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 190d64be22ed..45ceb94e89e4 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1070,8 +1070,7 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
 
        if (buffer_new(bh)) {
 	       clear_buffer_new(bh);
-	       unmap_underlying_metadata(bh->b_bdev,
-					 bh->b_blocknr);
+	       clean_bdev_bh_alias(bh);
 	       /*
 		* we do not zeroize fragment, because of
 		* if it maped to hole, it already contains zeroes
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 9c9c73ce7d4f..d1ab91fc6d43 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -171,6 +171,10 @@ int sync_mapping_buffers(struct address_space *mapping);
 void unmap_underlying_metadata(struct block_device *bdev, sector_t block);
 void clean_bdev_aliases(struct block_device *bdev, sector_t block,
 			sector_t len);
+static inline void clean_bdev_bh_alias(struct buffer_head *bh)
+{
+	clean_bdev_aliases(bh->b_bdev, bh->b_blocknr, 1);
+}
 
 void mark_buffer_async_write(struct buffer_head *bh);
 void __wait_on_buffer(struct buffer_head *);
-- 
cgit v1.2.3


From ce98321bf7d274a470642ef99e1d82512673ce7c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 4 Nov 2016 18:08:16 +0100
Subject: fs: Remove unmap_underlying_metadata

Nobody is using this function anymore. Remove it.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/buffer.c                 | 32 --------------------------------
 include/linux/buffer_head.h |  1 -
 2 files changed, 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 912d70169fca..1104ce8b4536 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1605,38 +1605,6 @@ void create_empty_buffers(struct page *page,
 }
 EXPORT_SYMBOL(create_empty_buffers);
 
-/*
- * We are taking a block for data and we don't want any output from any
- * buffer-cache aliases starting from return from that function and
- * until the moment when something will explicitly mark the buffer
- * dirty (hopefully that will not happen until we will free that block ;-)
- * We don't even need to mark it not-uptodate - nobody can expect
- * anything from a newly allocated buffer anyway. We used to used
- * unmap_buffer() for such invalidation, but that was wrong. We definitely
- * don't want to mark the alias unmapped, for example - it would confuse
- * anyone who might pick it with bread() afterwards...
- *
- * Also..  Note that bforget() doesn't lock the buffer.  So there can
- * be writeout I/O going on against recently-freed buffers.  We don't
- * wait on that I/O in bforget() - it's more efficient to wait on the I/O
- * only if we really need to.  That happens here.
- */
-void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
-{
-	struct buffer_head *old_bh;
-
-	might_sleep();
-
-	old_bh = __find_get_block_slow(bdev, block);
-	if (old_bh) {
-		clear_buffer_dirty(old_bh);
-		wait_on_buffer(old_bh);
-		clear_buffer_req(old_bh);
-		__brelse(old_bh);
-	}
-}
-EXPORT_SYMBOL(unmap_underlying_metadata);
-
 /**
  * clean_bdev_aliases: clean a range of buffers in block device
  * @bdev: Block device to clean buffers in
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index d1ab91fc6d43..d67ab83823ad 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -168,7 +168,6 @@ int inode_has_buffers(struct inode *);
 void invalidate_inode_buffers(struct inode *);
 int remove_inode_buffers(struct inode *inode);
 int sync_mapping_buffers(struct address_space *mapping);
-void unmap_underlying_metadata(struct block_device *bdev, sector_t block);
 void clean_bdev_aliases(struct block_device *bdev, sector_t block,
 			sector_t len);
 static inline void clean_bdev_bh_alias(struct buffer_head *bh)
-- 
cgit v1.2.3


From c9329d8638cfa1a86faf4fb8bd4922a3d9c6c437 Mon Sep 17 00:00:00 2001
From: Mugunthan V N <mugunthanvnm@ti.com>
Date: Wed, 5 Oct 2016 14:34:40 +0530
Subject: mfd: ti_am335x_tscadc: store physical address

store the physical address of the device in its priv to use it
for DMA addressing in the client drivers.

Signed-off-by: Mugunthan V N <mugunthanvnm@ti.com>
Acked-for-MFD-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/mfd/ti_am335x_tscadc.c       | 1 +
 include/linux/mfd/ti_am335x_tscadc.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index c8f027b4ea4c..0f3fab47fe48 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -183,6 +183,7 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 		tscadc->irq = err;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tscadc->tscadc_phys_base = res->start;
 	tscadc->tscadc_base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(tscadc->tscadc_base))
 		return PTR_ERR(tscadc->tscadc_base);
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 7f55b8b41032..e45a208d9944 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -155,6 +155,7 @@ struct ti_tscadc_dev {
 	struct device *dev;
 	struct regmap *regmap;
 	void __iomem *tscadc_base;
+	phys_addr_t tscadc_phys_base;
 	int irq;
 	int used_cells;	/* 1-2 */
 	int tsc_wires;
-- 
cgit v1.2.3


From f438b9da75eb80eb6c4095a5b75324cc9a7f0570 Mon Sep 17 00:00:00 2001
From: Mugunthan V N <mugunthanvnm@ti.com>
Date: Wed, 5 Oct 2016 14:34:41 +0530
Subject: drivers: iio: ti_am335x_adc: add dma support

This patch adds the required pieces to ti_am335x_adc driver for
DMA support

Signed-off-by: Mugunthan V N <mugunthanvnm@ti.com>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/ti_am335x_adc.c      | 148 ++++++++++++++++++++++++++++++++++-
 include/linux/mfd/ti_am335x_tscadc.h |   7 ++
 2 files changed, 152 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index c3cfacca2541..ad9dec30bb30 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -30,10 +30,28 @@
 #include <linux/iio/buffer.h>
 #include <linux/iio/kfifo_buf.h>
 
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+
+#define DMA_BUFFER_SIZE		SZ_2K
+
+struct tiadc_dma {
+	struct dma_slave_config	conf;
+	struct dma_chan		*chan;
+	dma_addr_t		addr;
+	dma_cookie_t		cookie;
+	u8			*buf;
+	int			current_period;
+	int			period_size;
+	u8			fifo_thresh;
+};
+
 struct tiadc_device {
 	struct ti_tscadc_dev *mfd_tscadc;
+	struct tiadc_dma dma;
 	struct mutex fifo1_lock; /* to protect fifo access */
 	int channels;
+	int total_ch_enabled;
 	u8 channel_line[8];
 	u8 channel_step[8];
 	int buffer_en_ch_steps;
@@ -198,6 +216,67 @@ static irqreturn_t tiadc_worker_h(int irq, void *private)
 	return IRQ_HANDLED;
 }
 
+static void tiadc_dma_rx_complete(void *param)
+{
+	struct iio_dev *indio_dev = param;
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	struct tiadc_dma *dma = &adc_dev->dma;
+	u8 *data;
+	int i;
+
+	data = dma->buf + dma->current_period * dma->period_size;
+	dma->current_period = 1 - dma->current_period; /* swap the buffer ID */
+
+	for (i = 0; i < dma->period_size; i += indio_dev->scan_bytes) {
+		iio_push_to_buffers(indio_dev, data);
+		data += indio_dev->scan_bytes;
+	}
+}
+
+static int tiadc_start_dma(struct iio_dev *indio_dev)
+{
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	struct tiadc_dma *dma = &adc_dev->dma;
+	struct dma_async_tx_descriptor *desc;
+
+	dma->current_period = 0; /* We start to fill period 0 */
+	/*
+	 * Make the fifo thresh as the multiple of total number of
+	 * channels enabled, so make sure that cyclic DMA period
+	 * length is also a multiple of total number of channels
+	 * enabled. This ensures that no invalid data is reported
+	 * to the stack via iio_push_to_buffers().
+	 */
+	dma->fifo_thresh = rounddown(FIFO1_THRESHOLD + 1,
+				     adc_dev->total_ch_enabled) - 1;
+	/* Make sure that period length is multiple of fifo thresh level */
+	dma->period_size = rounddown(DMA_BUFFER_SIZE / 2,
+				    (dma->fifo_thresh + 1) * sizeof(u16));
+
+	dma->conf.src_maxburst = dma->fifo_thresh + 1;
+	dmaengine_slave_config(dma->chan, &dma->conf);
+
+	desc = dmaengine_prep_dma_cyclic(dma->chan, dma->addr,
+					 dma->period_size * 2,
+					 dma->period_size, DMA_DEV_TO_MEM,
+					 DMA_PREP_INTERRUPT);
+	if (!desc)
+		return -EBUSY;
+
+	desc->callback = tiadc_dma_rx_complete;
+	desc->callback_param = indio_dev;
+
+	dma->cookie = dmaengine_submit(desc);
+
+	dma_async_issue_pending(dma->chan);
+
+	tiadc_writel(adc_dev, REG_FIFO1THR, dma->fifo_thresh);
+	tiadc_writel(adc_dev, REG_DMA1REQ, dma->fifo_thresh);
+	tiadc_writel(adc_dev, REG_DMAENABLE_SET, DMA_FIFO1);
+
+	return 0;
+}
+
 static int tiadc_buffer_preenable(struct iio_dev *indio_dev)
 {
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
@@ -218,20 +297,30 @@ static int tiadc_buffer_preenable(struct iio_dev *indio_dev)
 static int tiadc_buffer_postenable(struct iio_dev *indio_dev)
 {
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	struct tiadc_dma *dma = &adc_dev->dma;
+	unsigned int irq_enable;
 	unsigned int enb = 0;
 	u8 bit;
 
 	tiadc_step_config(indio_dev);
-	for_each_set_bit(bit, indio_dev->active_scan_mask, adc_dev->channels)
+	for_each_set_bit(bit, indio_dev->active_scan_mask, adc_dev->channels) {
 		enb |= (get_adc_step_bit(adc_dev, bit) << 1);
+		adc_dev->total_ch_enabled++;
+	}
 	adc_dev->buffer_en_ch_steps = enb;
 
+	if (dma->chan)
+		tiadc_start_dma(indio_dev);
+
 	am335x_tsc_se_set_cache(adc_dev->mfd_tscadc, enb);
 
 	tiadc_writel(adc_dev,  REG_IRQSTATUS, IRQENB_FIFO1THRES
 				| IRQENB_FIFO1OVRRUN | IRQENB_FIFO1UNDRFLW);
-	tiadc_writel(adc_dev,  REG_IRQENABLE, IRQENB_FIFO1THRES
-				| IRQENB_FIFO1OVRRUN);
+
+	irq_enable = IRQENB_FIFO1OVRRUN;
+	if (!dma->chan)
+		irq_enable |= IRQENB_FIFO1THRES;
+	tiadc_writel(adc_dev,  REG_IRQENABLE, irq_enable);
 
 	return 0;
 }
@@ -239,12 +328,18 @@ static int tiadc_buffer_postenable(struct iio_dev *indio_dev)
 static int tiadc_buffer_predisable(struct iio_dev *indio_dev)
 {
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	struct tiadc_dma *dma = &adc_dev->dma;
 	int fifo1count, i, read;
 
 	tiadc_writel(adc_dev, REG_IRQCLR, (IRQENB_FIFO1THRES |
 				IRQENB_FIFO1OVRRUN | IRQENB_FIFO1UNDRFLW));
 	am335x_tsc_se_clr(adc_dev->mfd_tscadc, adc_dev->buffer_en_ch_steps);
 	adc_dev->buffer_en_ch_steps = 0;
+	adc_dev->total_ch_enabled = 0;
+	if (dma->chan) {
+		tiadc_writel(adc_dev, REG_DMAENABLE_CLEAR, 0x2);
+		dmaengine_terminate_async(dma->chan);
+	}
 
 	/* Flush FIFO of leftover data in the time it takes to disable adc */
 	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
@@ -430,6 +525,41 @@ static const struct iio_info tiadc_info = {
 	.driver_module = THIS_MODULE,
 };
 
+static int tiadc_request_dma(struct platform_device *pdev,
+			     struct tiadc_device *adc_dev)
+{
+	struct tiadc_dma	*dma = &adc_dev->dma;
+	dma_cap_mask_t		mask;
+
+	/* Default slave configuration parameters */
+	dma->conf.direction = DMA_DEV_TO_MEM;
+	dma->conf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+	dma->conf.src_addr = adc_dev->mfd_tscadc->tscadc_phys_base + REG_FIFO1;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_CYCLIC, mask);
+
+	/* Get a channel for RX */
+	dma->chan = dma_request_chan(adc_dev->mfd_tscadc->dev, "fifo1");
+	if (IS_ERR(dma->chan)) {
+		int ret = PTR_ERR(dma->chan);
+
+		dma->chan = NULL;
+		return ret;
+	}
+
+	/* RX buffer */
+	dma->buf = dma_alloc_coherent(dma->chan->device->dev, DMA_BUFFER_SIZE,
+				      &dma->addr, GFP_KERNEL);
+	if (!dma->buf)
+		goto err;
+
+	return 0;
+err:
+	dma_release_channel(dma->chan);
+	return -ENOMEM;
+}
+
 static int tiadc_parse_dt(struct platform_device *pdev,
 			  struct tiadc_device *adc_dev)
 {
@@ -512,8 +642,14 @@ static int tiadc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, indio_dev);
 
+	err = tiadc_request_dma(pdev, adc_dev);
+	if (err && err == -EPROBE_DEFER)
+		goto err_dma;
+
 	return 0;
 
+err_dma:
+	iio_device_unregister(indio_dev);
 err_buffer_unregister:
 	tiadc_iio_buffered_hardware_remove(indio_dev);
 err_free_channels:
@@ -525,8 +661,14 @@ static int tiadc_remove(struct platform_device *pdev)
 {
 	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	struct tiadc_dma *dma = &adc_dev->dma;
 	u32 step_en;
 
+	if (dma->chan) {
+		dma_free_coherent(dma->chan->device->dev, DMA_BUFFER_SIZE,
+				  dma->buf, dma->addr);
+		dma_release_channel(dma->chan);
+	}
 	iio_device_unregister(indio_dev);
 	tiadc_iio_buffered_hardware_remove(indio_dev);
 	tiadc_channels_remove(indio_dev);
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index e45a208d9944..b9a53e013bff 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -23,6 +23,8 @@
 #define REG_IRQENABLE		0x02C
 #define REG_IRQCLR		0x030
 #define REG_IRQWAKEUP		0x034
+#define REG_DMAENABLE_SET	0x038
+#define REG_DMAENABLE_CLEAR	0x03c
 #define REG_CTRL		0x040
 #define REG_ADCFSM		0x044
 #define REG_CLKDIV		0x04C
@@ -36,6 +38,7 @@
 #define REG_FIFO0THR		0xE8
 #define REG_FIFO1CNT		0xF0
 #define REG_FIFO1THR		0xF4
+#define REG_DMA1REQ		0xF8
 #define REG_FIFO0		0x100
 #define REG_FIFO1		0x200
 
@@ -126,6 +129,10 @@
 #define FIFOREAD_DATA_MASK (0xfff << 0)
 #define FIFOREAD_CHNLID_MASK (0xf << 16)
 
+/* DMA ENABLE/CLEAR Register */
+#define DMA_FIFO0		BIT(0)
+#define DMA_FIFO1		BIT(1)
+
 /* Sequencer Status */
 #define SEQ_STATUS BIT(5)
 #define CHARGE_STEP		0x11
-- 
cgit v1.2.3


From d278d4a8892f13b6a9eb6102b356402f0e062324 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 30 Mar 2016 10:21:08 -0600
Subject: block: add code to track actual device queue depth

For blk-mq, ->nr_requests does track queue depth, at least at init
time. But for the older queue paths, it's simply a soft setting.
On top of that, it's generally larger than the hardware setting
on purpose, to allow backup of requests for merging.

Fill a hole in struct request with a 'queue_depth' member, that
drivers can call to more closely inform the block layer of the
real queue depth.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 block/blk-settings.c   | 12 ++++++++++++
 drivers/scsi/scsi.c    |  3 +++
 include/linux/blkdev.h | 11 +++++++++++
 3 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 55369a65dea2..9cf053759363 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -836,6 +836,18 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 
+/**
+ * blk_set_queue_depth - tell the block layer about the device queue depth
+ * @q:		the request queue for the device
+ * @depth:		queue depth
+ *
+ */
+void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
+{
+	q->queue_depth = depth;
+}
+EXPORT_SYMBOL(blk_set_queue_depth);
+
 /**
  * blk_queue_write_cache - configure queue's write cache
  * @q:		the request queue for the device
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1deb6adc411f..75455d4dab68 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -621,6 +621,9 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int depth)
 		wmb();
 	}
 
+	if (sdev->request_queue)
+		blk_set_queue_depth(sdev->request_queue, depth);
+
 	return sdev->queue_depth;
 }
 EXPORT_SYMBOL(scsi_change_queue_depth);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9189a2d5c392..d364be6e6959 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -405,6 +405,8 @@ struct request_queue {
 	struct blk_mq_ctx __percpu	*queue_ctx;
 	unsigned int		nr_queues;
 
+	unsigned int		queue_depth;
+
 	/* hw dispatch queues */
 	struct blk_mq_hw_ctx	**queue_hw_ctx;
 	unsigned int		nr_hw_queues;
@@ -777,6 +779,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 	return false;
 }
 
+static inline unsigned int blk_queue_depth(struct request_queue *q)
+{
+	if (q->queue_depth)
+		return q->queue_depth;
+
+	return q->nr_requests;
+}
+
 /*
  * q->prep_rq_fn return values
  */
@@ -1094,6 +1104,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern void blk_set_stacking_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
-- 
cgit v1.2.3


From fa32ff6576623616c1751562edaed8c164ca5199 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Sun, 6 Nov 2016 13:14:05 -0800
Subject: Drivers: hv: ring_buffer: count on wrap around mappings in
 get_next_pkt_raw() (v2)

With wrap around mappings in place we can always provide drivers with
direct links to packets on the ring buffer, even when they wrap around.
Do the required updates to get_next_pkt_raw()/put_pkt_raw()

The first version of this commit was reverted (65a532f3d50a) to deal with
cross-tree merge issues which are (hopefully) resolved now.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Tested-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6824556d37ed..42ae6a5a2538 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1526,31 +1526,23 @@ static inline struct vmpacket_descriptor *
 get_next_pkt_raw(struct vmbus_channel *channel)
 {
 	struct hv_ring_buffer_info *ring_info = &channel->inbound;
-	u32 read_loc = ring_info->priv_read_index;
+	u32 priv_read_loc = ring_info->priv_read_index;
 	void *ring_buffer = hv_get_ring_buffer(ring_info);
-	struct vmpacket_descriptor *cur_desc;
-	u32 packetlen;
 	u32 dsize = ring_info->ring_datasize;
-	u32 delta = read_loc - ring_info->ring_buffer->read_index;
+	/*
+	 * delta is the difference between what is available to read and
+	 * what was already consumed in place. We commit read index after
+	 * the whole batch is processed.
+	 */
+	u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ?
+		priv_read_loc - ring_info->ring_buffer->read_index :
+		(dsize - ring_info->ring_buffer->read_index) + priv_read_loc;
 	u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta);
 
 	if (bytes_avail_toread < sizeof(struct vmpacket_descriptor))
 		return NULL;
 
-	if ((read_loc + sizeof(*cur_desc)) > dsize)
-		return NULL;
-
-	cur_desc = ring_buffer + read_loc;
-	packetlen = cur_desc->len8 << 3;
-
-	/*
-	 * If the packet under consideration is wrapping around,
-	 * return failure.
-	 */
-	if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1))
-		return NULL;
-
-	return cur_desc;
+	return ring_buffer + priv_read_loc;
 }
 
 /*
@@ -1562,16 +1554,14 @@ static inline void put_pkt_raw(struct vmbus_channel *channel,
 				struct vmpacket_descriptor *desc)
 {
 	struct hv_ring_buffer_info *ring_info = &channel->inbound;
-	u32 read_loc = ring_info->priv_read_index;
 	u32 packetlen = desc->len8 << 3;
 	u32 dsize = ring_info->ring_datasize;
 
-	if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize)
-		BUG();
 	/*
 	 * Include the packet trailer.
 	 */
 	ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
+	ring_info->priv_read_index %= dsize;
 }
 
 /*
-- 
cgit v1.2.3


From 1f6ee4e7d83586c8b10bd4f2f4346353d04ce884 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sun, 6 Nov 2016 13:14:17 -0800
Subject: Drivers: hv: vmbus: On write cleanup the logic to interrupt the host

Signal the host when we determine the host is to be signaled.
The currrent code determines the need to signal in the ringbuffer
code and actually issues the signal elsewhere. This can result
in the host viewing this interrupt as spurious since the host may also
poll the channel. Make the necessary adjustments.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      | 99 +++++------------------------------------------
 drivers/hv/hyperv_vmbus.h |  6 +--
 drivers/hv/ring_buffer.c  | 30 +++++++++-----
 include/linux/hyperv.h    |  1 +
 4 files changed, 35 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 5e482d7f60cb..8a8148f7b842 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -39,7 +39,7 @@
  * vmbus_setevent- Trigger an event notification on the specified
  * channel.
  */
-static void vmbus_setevent(struct vmbus_channel *channel)
+void vmbus_setevent(struct vmbus_channel *channel)
 {
 	struct hv_monitor_page *monitorpage;
 
@@ -65,6 +65,7 @@ static void vmbus_setevent(struct vmbus_channel *channel)
 		vmbus_set_event(channel);
 	}
 }
+EXPORT_SYMBOL_GPL(vmbus_setevent);
 
 /*
  * vmbus_open - Open the specified channel.
@@ -635,8 +636,6 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 	u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64));
 	struct kvec bufferlist[3];
 	u64 aligned_data = 0;
-	int ret;
-	bool signal = false;
 	bool lock = channel->acquire_ring_lock;
 	int num_vecs = ((bufferlen != 0) ? 3 : 1);
 
@@ -656,41 +655,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 	bufferlist[2].iov_base = &aligned_data;
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
-	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
-				  &signal, lock, channel->signal_policy);
-
-	/*
-	 * Signalling the host is conditional on many factors:
-	 * 1. The ring state changed from being empty to non-empty.
-	 *    This is tracked by the variable "signal".
-	 * 2. The variable kick_q tracks if more data will be placed
-	 *    on the ring. We will not signal if more data is
-	 *    to be placed.
-	 *
-	 * Based on the channel signal state, we will decide
-	 * which signaling policy will be applied.
-	 *
-	 * If we cannot write to the ring-buffer; signal the host
-	 * even if we may not have written anything. This is a rare
-	 * enough condition that it should not matter.
-	 * NOTE: in this case, the hvsock channel is an exception, because
-	 * it looks the host side's hvsock implementation has a throttling
-	 * mechanism which can hurt the performance otherwise.
-	 *
-	 * KYS: Oct. 30, 2016:
-	 * It looks like Windows hosts have logic to deal with DOS attacks that
-	 * can be triggered if it receives interrupts when it is not expecting
-	 * the interrupt. The host expects interrupts only when the ring
-	 * transitions from empty to non-empty (or full to non full on the guest
-	 * to host ring).
-	 * So, base the signaling decision solely on the ring state until the
-	 * host logic is fixed.
-	 */
-
-	if (((ret == 0) && signal))
-		vmbus_setevent(channel);
+	return hv_ringbuffer_write(channel, bufferlist, num_vecs,
+				   lock, kick_q);
 
-	return ret;
 }
 EXPORT_SYMBOL(vmbus_sendpacket_ctl);
 
@@ -731,7 +698,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 				     u32 flags,
 				     bool kick_q)
 {
-	int ret;
 	int i;
 	struct vmbus_channel_packet_page_buffer desc;
 	u32 descsize;
@@ -739,7 +705,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 	u32 packetlen_aligned;
 	struct kvec bufferlist[3];
 	u64 aligned_data = 0;
-	bool signal = false;
 	bool lock = channel->acquire_ring_lock;
 
 	if (pagecount > MAX_PAGE_BUFFER_COUNT)
@@ -777,38 +742,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 	bufferlist[2].iov_base = &aligned_data;
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
-	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
-				  &signal, lock, channel->signal_policy);
-
-	/*
-	 * Signalling the host is conditional on many factors:
-	 * 1. The ring state changed from being empty to non-empty.
-	 *    This is tracked by the variable "signal".
-	 * 2. The variable kick_q tracks if more data will be placed
-	 *    on the ring. We will not signal if more data is
-	 *    to be placed.
-	 *
-	 * Based on the channel signal state, we will decide
-	 * which signaling policy will be applied.
-	 *
-	 * If we cannot write to the ring-buffer; signal the host
-	 * even if we may not have written anything. This is a rare
-	 * enough condition that it should not matter.
-	 *
-	 * KYS: Oct. 30, 2016:
-	 * It looks like Windows hosts have logic to deal with DOS attacks that
-	 * can be triggered if it receives interrupts when it is not expecting
-	 * the interrupt. The host expects interrupts only when the ring
-	 * transitions from empty to non-empty (or full to non full on the guest
-	 * to host ring).
-	 * So, base the signaling decision solely on the ring state until the
-	 * host logic is fixed.
-	 */
-
-	if (((ret == 0) && signal))
-		vmbus_setevent(channel);
-
-	return ret;
+	return hv_ringbuffer_write(channel, bufferlist, 3,
+				   lock, kick_q);
 }
 EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl);
 
@@ -839,12 +774,10 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 			      u32 desc_size,
 			      void *buffer, u32 bufferlen, u64 requestid)
 {
-	int ret;
 	u32 packetlen;
 	u32 packetlen_aligned;
 	struct kvec bufferlist[3];
 	u64 aligned_data = 0;
-	bool signal = false;
 	bool lock = channel->acquire_ring_lock;
 
 	packetlen = desc_size + bufferlen;
@@ -865,13 +798,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 	bufferlist[2].iov_base = &aligned_data;
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
-	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
-				  &signal, lock, channel->signal_policy);
-
-	if (ret == 0 && signal)
-		vmbus_setevent(channel);
-
-	return ret;
+	return hv_ringbuffer_write(channel, bufferlist, 3,
+				   lock, true);
 }
 EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
 
@@ -883,14 +811,12 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
 				struct hv_multipage_buffer *multi_pagebuffer,
 				void *buffer, u32 bufferlen, u64 requestid)
 {
-	int ret;
 	struct vmbus_channel_packet_multipage_buffer desc;
 	u32 descsize;
 	u32 packetlen;
 	u32 packetlen_aligned;
 	struct kvec bufferlist[3];
 	u64 aligned_data = 0;
-	bool signal = false;
 	bool lock = channel->acquire_ring_lock;
 	u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
 					 multi_pagebuffer->len);
@@ -930,13 +856,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
 	bufferlist[2].iov_base = &aligned_data;
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
-	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
-				  &signal, lock, channel->signal_policy);
-
-	if (ret == 0 && signal)
-		vmbus_setevent(channel);
-
-	return ret;
+	return hv_ringbuffer_write(channel, bufferlist, 3,
+				   lock, true);
 }
 EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer);
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index bdab7e7f2a15..2d42ebe2af6c 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -527,10 +527,10 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
 
-int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info,
+int hv_ringbuffer_write(struct vmbus_channel *channel,
 		    struct kvec *kv_list,
-		    u32 kv_count, bool *signal, bool lock,
-		    enum hv_signal_policy policy);
+		    u32 kv_count, bool lock,
+		    bool kick_q);
 
 int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
 		       void *buffer, u32 buflen, u32 *buffer_actual_len,
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 5d11d93eedf4..4af71306d0ff 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -66,14 +66,25 @@ u32 hv_end_read(struct hv_ring_buffer_info *rbi)
  *	   once the ring buffer is empty, it will clear the
  *	   interrupt_mask and re-check to see if new data has
  *	   arrived.
+ *
+ * KYS: Oct. 30, 2016:
+ * It looks like Windows hosts have logic to deal with DOS attacks that
+ * can be triggered if it receives interrupts when it is not expecting
+ * the interrupt. The host expects interrupts only when the ring
+ * transitions from empty to non-empty (or full to non full on the guest
+ * to host ring).
+ * So, base the signaling decision solely on the ring state until the
+ * host logic is fixed.
  */
 
-static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi,
-			      enum hv_signal_policy policy)
+static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel,
+			       bool kick_q)
 {
+	struct hv_ring_buffer_info *rbi = &channel->outbound;
+
 	virt_mb();
 	if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
-		return false;
+		return;
 
 	/* check interrupt_mask before read_index */
 	virt_rmb();
@@ -82,9 +93,9 @@ static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi,
 	 * ring transitions from being empty to non-empty.
 	 */
 	if (old_write == READ_ONCE(rbi->ring_buffer->read_index))
-		return true;
+		vmbus_setevent(channel);
 
-	return false;
+	return;
 }
 
 /* Get the next write location for the specified ring buffer. */
@@ -273,9 +284,9 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
 }
 
 /* Write to the ring buffer. */
-int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
-		    struct kvec *kv_list, u32 kv_count, bool *signal, bool lock,
-		    enum hv_signal_policy policy)
+int hv_ringbuffer_write(struct vmbus_channel *channel,
+		    struct kvec *kv_list, u32 kv_count, bool lock,
+		    bool kick_q)
 {
 	int i = 0;
 	u32 bytes_avail_towrite;
@@ -285,6 +296,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
 	u32 old_write;
 	u64 prev_indices = 0;
 	unsigned long flags = 0;
+	struct hv_ring_buffer_info *outring_info = &channel->outbound;
 
 	for (i = 0; i < kv_count; i++)
 		totalbytes_towrite += kv_list[i].iov_len;
@@ -337,7 +349,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
 	if (lock)
 		spin_unlock_irqrestore(&outring_info->ring_lock, flags);
 
-	*signal = hv_need_to_signal(old_write, outring_info, policy);
+	hv_signal_on_write(old_write, channel, kick_q);
 	return 0;
 }
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 42ae6a5a2538..8cf78ed96747 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1454,6 +1454,7 @@ void hv_event_tasklet_enable(struct vmbus_channel *channel);
 
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
 
+void vmbus_setevent(struct vmbus_channel *channel);
 /*
  * Negotiated version with the Host.
  */
-- 
cgit v1.2.3


From 3372592a140db69fd63837e81f048ab4abf8111e Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sun, 6 Nov 2016 13:14:18 -0800
Subject: Drivers: hv: vmbus: On the read path cleanup the logic to interrupt
 the host

Signal the host when we determine the host is to be signaled -
on th read path. The currrent code determines the need to signal in the
ringbuffer code and actually issues the signal elsewhere. This can result
in the host viewing this interrupt as spurious since the host may also
poll the channel. Make the necessary adjustments.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      | 11 ++---------
 drivers/hv/hyperv_vmbus.h |  4 ++--
 drivers/hv/ring_buffer.c  |  7 ++++---
 include/linux/hyperv.h    | 12 ++++++------
 4 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 8a8148f7b842..5fb4c6d9209b 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -879,16 +879,9 @@ __vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
 		   u32 bufferlen, u32 *buffer_actual_len, u64 *requestid,
 		   bool raw)
 {
-	int ret;
-	bool signal = false;
-
-	ret = hv_ringbuffer_read(&channel->inbound, buffer, bufferlen,
-				 buffer_actual_len, requestid, &signal, raw);
+	return hv_ringbuffer_read(channel, buffer, bufferlen,
+				  buffer_actual_len, requestid, raw);
 
-	if (signal)
-		vmbus_setevent(channel);
-
-	return ret;
 }
 
 int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 2d42ebe2af6c..0675b395ce5c 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -532,9 +532,9 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
 		    u32 kv_count, bool lock,
 		    bool kick_q);
 
-int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
+int hv_ringbuffer_read(struct vmbus_channel *channel,
 		       void *buffer, u32 buflen, u32 *buffer_actual_len,
-		       u64 *requestid, bool *signal, bool raw);
+		       u64 *requestid, bool raw);
 
 void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
 			    struct hv_ring_buffer_debug_info *debug_info);
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 4af71306d0ff..cd49cb17eb7f 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -353,9 +353,9 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
 	return 0;
 }
 
-int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
+int hv_ringbuffer_read(struct vmbus_channel *channel,
 		       void *buffer, u32 buflen, u32 *buffer_actual_len,
-		       u64 *requestid, bool *signal, bool raw)
+		       u64 *requestid, bool raw)
 {
 	u32 bytes_avail_toread;
 	u32 next_read_location = 0;
@@ -364,6 +364,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
 	u32 offset;
 	u32 packetlen;
 	int ret = 0;
+	struct hv_ring_buffer_info *inring_info = &channel->inbound;
 
 	if (buflen <= 0)
 		return -EINVAL;
@@ -421,7 +422,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
 	/* Update the read index */
 	hv_set_next_read_location(inring_info, next_read_location);
 
-	*signal = hv_need_to_signal_on_read(inring_info);
+	hv_signal_on_read(channel);
 
 	return ret;
 }
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 8cf78ed96747..fdb0a87323f3 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1487,10 +1487,11 @@ hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
  *    there is room for the producer to send the pending packet.
  */
 
-static inline  bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi)
+static inline  void hv_signal_on_read(struct vmbus_channel *channel)
 {
 	u32 cur_write_sz;
 	u32 pending_sz;
+	struct hv_ring_buffer_info *rbi = &channel->inbound;
 
 	/*
 	 * Issue a full memory barrier before making the signaling decision.
@@ -1508,14 +1509,14 @@ static inline  bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi)
 	pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
 	/* If the other end is not blocked on write don't bother. */
 	if (pending_sz == 0)
-		return false;
+		return;
 
 	cur_write_sz = hv_get_bytes_to_write(rbi);
 
 	if (cur_write_sz >= pending_sz)
-		return true;
+		vmbus_setevent(channel);
 
-	return false;
+	return;
 }
 
 /*
@@ -1587,8 +1588,7 @@ static inline void commit_rd_index(struct vmbus_channel *channel)
 	virt_rmb();
 	ring_info->ring_buffer->read_index = ring_info->priv_read_index;
 
-	if (hv_need_to_signal_on_read(ring_info))
-		vmbus_set_event(channel);
+	hv_signal_on_read(channel);
 }
 
 
-- 
cgit v1.2.3


From e9f66ae23c209eec617130126a23bf547bf7a6d8 Mon Sep 17 00:00:00 2001
From: Sergio Prado <sergio.prado@e-labworks.com>
Date: Thu, 20 Oct 2016 19:42:44 -0200
Subject: mtd: s3c2410: make ecc mode configurable via platform data

Removing CONFIG_MTD_NAND_S3C2410_HWECC option and adding a ecc_mode
field in the drivers's platform data structure so it can be selectable
via platform data.

Also setting this field to NAND_ECC_SOFT in all boards using this
driver since none of them had CONFIG_MTD_NAND_S3C2410_HWECC enabled.

Signed-off-by: Sergio Prado <sergio.prado@e-labworks.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 arch/arm/mach-s3c24xx/common-smdk.c            |   1 +
 arch/arm/mach-s3c24xx/mach-anubis.c            |   1 +
 arch/arm/mach-s3c24xx/mach-at2440evb.c         |   1 +
 arch/arm/mach-s3c24xx/mach-bast.c              |   1 +
 arch/arm/mach-s3c24xx/mach-gta02.c             |   1 +
 arch/arm/mach-s3c24xx/mach-jive.c              |   1 +
 arch/arm/mach-s3c24xx/mach-mini2440.c          |   1 +
 arch/arm/mach-s3c24xx/mach-osiris.c            |   1 +
 arch/arm/mach-s3c24xx/mach-qt2410.c            |   1 +
 arch/arm/mach-s3c24xx/mach-rx1950.c            |   1 +
 arch/arm/mach-s3c24xx/mach-rx3715.c            |   1 +
 arch/arm/mach-s3c24xx/mach-vstms.c             |   1 +
 arch/arm/mach-s3c64xx/mach-hmt.c               |   1 +
 arch/arm/mach-s3c64xx/mach-mini6410.c          |   1 +
 arch/arm/mach-s3c64xx/mach-real6410.c          |   1 +
 drivers/mtd/nand/Kconfig                       |   9 --
 drivers/mtd/nand/s3c2410.c                     | 123 +++++++++++++------------
 include/linux/platform_data/mtd-nand-s3c2410.h |   6 +-
 18 files changed, 82 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-s3c24xx/common-smdk.c b/arch/arm/mach-s3c24xx/common-smdk.c
index e9fbcc91c5c0..9e0bc46e90ec 100644
--- a/arch/arm/mach-s3c24xx/common-smdk.c
+++ b/arch/arm/mach-s3c24xx/common-smdk.c
@@ -171,6 +171,7 @@ static struct s3c2410_platform_nand smdk_nand_info = {
 	.twrph1		= 20,
 	.nr_sets	= ARRAY_SIZE(smdk_nand_sets),
 	.sets		= smdk_nand_sets,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 /* devices we initialise */
diff --git a/arch/arm/mach-s3c24xx/mach-anubis.c b/arch/arm/mach-s3c24xx/mach-anubis.c
index d03df0df01fa..029ef1b58925 100644
--- a/arch/arm/mach-s3c24xx/mach-anubis.c
+++ b/arch/arm/mach-s3c24xx/mach-anubis.c
@@ -223,6 +223,7 @@ static struct s3c2410_platform_nand __initdata anubis_nand_info = {
 	.nr_sets	= ARRAY_SIZE(anubis_nand_sets),
 	.sets		= anubis_nand_sets,
 	.select_chip	= anubis_nand_select,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 /* IDE channels */
diff --git a/arch/arm/mach-s3c24xx/mach-at2440evb.c b/arch/arm/mach-s3c24xx/mach-at2440evb.c
index 9ae170fef2a7..7b28eb623fc1 100644
--- a/arch/arm/mach-s3c24xx/mach-at2440evb.c
+++ b/arch/arm/mach-s3c24xx/mach-at2440evb.c
@@ -114,6 +114,7 @@ static struct s3c2410_platform_nand __initdata at2440evb_nand_info = {
 	.twrph1		= 40,
 	.nr_sets	= ARRAY_SIZE(at2440evb_nand_sets),
 	.sets		= at2440evb_nand_sets,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 /* DM9000AEP 10/100 ethernet controller */
diff --git a/arch/arm/mach-s3c24xx/mach-bast.c b/arch/arm/mach-s3c24xx/mach-bast.c
index ed07cf392d4b..5185036765db 100644
--- a/arch/arm/mach-s3c24xx/mach-bast.c
+++ b/arch/arm/mach-s3c24xx/mach-bast.c
@@ -299,6 +299,7 @@ static struct s3c2410_platform_nand __initdata bast_nand_info = {
 	.nr_sets	= ARRAY_SIZE(bast_nand_sets),
 	.sets		= bast_nand_sets,
 	.select_chip	= bast_nand_select,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 /* DM9000 */
diff --git a/arch/arm/mach-s3c24xx/mach-gta02.c b/arch/arm/mach-s3c24xx/mach-gta02.c
index 27ae6877550f..b0ed401da3a3 100644
--- a/arch/arm/mach-s3c24xx/mach-gta02.c
+++ b/arch/arm/mach-s3c24xx/mach-gta02.c
@@ -443,6 +443,7 @@ static struct s3c2410_platform_nand __initdata gta02_nand_info = {
 	.twrph1		= 15,
 	.nr_sets	= ARRAY_SIZE(gta02_nand_sets),
 	.sets		= gta02_nand_sets,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 
diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c
index 7d99fe8f6157..895aca225952 100644
--- a/arch/arm/mach-s3c24xx/mach-jive.c
+++ b/arch/arm/mach-s3c24xx/mach-jive.c
@@ -232,6 +232,7 @@ static struct s3c2410_platform_nand __initdata jive_nand_info = {
 	.twrph1		= 40,
 	.sets		= jive_nand_sets,
 	.nr_sets	= ARRAY_SIZE(jive_nand_sets),
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 static int __init jive_mtdset(char *options)
diff --git a/arch/arm/mach-s3c24xx/mach-mini2440.c b/arch/arm/mach-s3c24xx/mach-mini2440.c
index ec60bd4a1646..71af8d2fd320 100644
--- a/arch/arm/mach-s3c24xx/mach-mini2440.c
+++ b/arch/arm/mach-s3c24xx/mach-mini2440.c
@@ -287,6 +287,7 @@ static struct s3c2410_platform_nand mini2440_nand_info __initdata = {
 	.nr_sets	= ARRAY_SIZE(mini2440_nand_sets),
 	.sets		= mini2440_nand_sets,
 	.ignore_unset_ecc = 1,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 /* DM9000AEP 10/100 ethernet controller */
diff --git a/arch/arm/mach-s3c24xx/mach-osiris.c b/arch/arm/mach-s3c24xx/mach-osiris.c
index 2f6fdc326835..70b0eb7d3134 100644
--- a/arch/arm/mach-s3c24xx/mach-osiris.c
+++ b/arch/arm/mach-s3c24xx/mach-osiris.c
@@ -238,6 +238,7 @@ static struct s3c2410_platform_nand __initdata osiris_nand_info = {
 	.nr_sets	= ARRAY_SIZE(osiris_nand_sets),
 	.sets		= osiris_nand_sets,
 	.select_chip	= osiris_nand_select,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 /* PCMCIA control and configuration */
diff --git a/arch/arm/mach-s3c24xx/mach-qt2410.c b/arch/arm/mach-s3c24xx/mach-qt2410.c
index 984516e8307a..868c82087403 100644
--- a/arch/arm/mach-s3c24xx/mach-qt2410.c
+++ b/arch/arm/mach-s3c24xx/mach-qt2410.c
@@ -284,6 +284,7 @@ static struct s3c2410_platform_nand __initdata qt2410_nand_info = {
 	.twrph1		= 20,
 	.nr_sets	= ARRAY_SIZE(qt2410_nand_sets),
 	.sets		= qt2410_nand_sets,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 /* UDC */
diff --git a/arch/arm/mach-s3c24xx/mach-rx1950.c b/arch/arm/mach-s3c24xx/mach-rx1950.c
index 25a139bb9826..e86ad6a68a0b 100644
--- a/arch/arm/mach-s3c24xx/mach-rx1950.c
+++ b/arch/arm/mach-s3c24xx/mach-rx1950.c
@@ -611,6 +611,7 @@ static struct s3c2410_platform_nand rx1950_nand_info = {
 	.twrph1 = 15,
 	.nr_sets = ARRAY_SIZE(rx1950_nand_sets),
 	.sets = rx1950_nand_sets,
+	.ecc_mode = NAND_ECC_SOFT,
 };
 
 static struct s3c2410_udc_mach_info rx1950_udc_cfg __initdata = {
diff --git a/arch/arm/mach-s3c24xx/mach-rx3715.c b/arch/arm/mach-s3c24xx/mach-rx3715.c
index cf55196f89ca..a39fb9780dd3 100644
--- a/arch/arm/mach-s3c24xx/mach-rx3715.c
+++ b/arch/arm/mach-s3c24xx/mach-rx3715.c
@@ -164,6 +164,7 @@ static struct s3c2410_platform_nand __initdata rx3715_nand_info = {
 	.twrph1		= 15,
 	.nr_sets	= ARRAY_SIZE(rx3715_nand_sets),
 	.sets		= rx3715_nand_sets,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 static struct platform_device *rx3715_devices[] __initdata = {
diff --git a/arch/arm/mach-s3c24xx/mach-vstms.c b/arch/arm/mach-s3c24xx/mach-vstms.c
index b4460d5f7011..f5e6322145fa 100644
--- a/arch/arm/mach-s3c24xx/mach-vstms.c
+++ b/arch/arm/mach-s3c24xx/mach-vstms.c
@@ -117,6 +117,7 @@ static struct s3c2410_platform_nand __initdata vstms_nand_info = {
 	.twrph1		= 20,
 	.nr_sets	= ARRAY_SIZE(vstms_nand_sets),
 	.sets		= vstms_nand_sets,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 static struct platform_device *vstms_devices[] __initdata = {
diff --git a/arch/arm/mach-s3c64xx/mach-hmt.c b/arch/arm/mach-s3c64xx/mach-hmt.c
index bc7dc1fcbf7d..59b5531f1987 100644
--- a/arch/arm/mach-s3c64xx/mach-hmt.c
+++ b/arch/arm/mach-s3c64xx/mach-hmt.c
@@ -204,6 +204,7 @@ static struct s3c2410_platform_nand hmt_nand_info = {
 	.twrph1		= 40,
 	.nr_sets	= ARRAY_SIZE(hmt_nand_sets),
 	.sets		= hmt_nand_sets,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 static struct gpio_led hmt_leds[] = {
diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c
index ae999fb3fe6d..a3e3e25728b4 100644
--- a/arch/arm/mach-s3c64xx/mach-mini6410.c
+++ b/arch/arm/mach-s3c64xx/mach-mini6410.c
@@ -142,6 +142,7 @@ static struct s3c2410_platform_nand mini6410_nand_info = {
 	.twrph1		= 40,
 	.nr_sets	= ARRAY_SIZE(mini6410_nand_sets),
 	.sets		= mini6410_nand_sets,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 static struct s3c_fb_pd_win mini6410_lcd_type0_fb_win = {
diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c
index 4e240ffa7ac7..d6b3ffd7704b 100644
--- a/arch/arm/mach-s3c64xx/mach-real6410.c
+++ b/arch/arm/mach-s3c64xx/mach-real6410.c
@@ -194,6 +194,7 @@ static struct s3c2410_platform_nand real6410_nand_info = {
 	.twrph1		= 40,
 	.nr_sets	= ARRAY_SIZE(real6410_nand_sets),
 	.sets		= real6410_nand_sets,
+	.ecc_mode       = NAND_ECC_SOFT,
 };
 
 static struct platform_device *real6410_devices[] __initdata = {
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index c023125989cf..60757af314d3 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -179,15 +179,6 @@ config MTD_NAND_S3C2410_DEBUG
 	help
 	  Enable debugging of the S3C NAND driver
 
-config MTD_NAND_S3C2410_HWECC
-	bool "Samsung S3C NAND Hardware ECC"
-	depends on MTD_NAND_S3C2410
-	help
-	  Enable the use of the controller's internal ECC generator when
-	  using NAND. Early versions of the chips have had problems with
-	  incorrect ECC generation, and if using these, the default of
-	  software ECC is preferable.
-
 config MTD_NAND_NDFC
 	tristate "NDFC NanD Flash Controller"
 	depends on 4xx
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index d459c19d78de..bb4ac3179d17 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -497,7 +497,6 @@ static int s3c2412_nand_devready(struct mtd_info *mtd)
 
 /* ECC handling functions */
 
-#ifdef CONFIG_MTD_NAND_S3C2410_HWECC
 static int s3c2410_nand_correct_data(struct mtd_info *mtd, u_char *dat,
 				     u_char *read_ecc, u_char *calc_ecc)
 {
@@ -649,7 +648,6 @@ static int s3c2440_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
 
 	return 0;
 }
-#endif
 
 /* over-ride the standard functions for a little more speed. We can
  * use read/write block to move the data buffers to/from the controller
@@ -858,50 +856,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 	nmtd->info	   = info;
 	nmtd->set	   = set;
 
-#ifdef CONFIG_MTD_NAND_S3C2410_HWECC
-	chip->ecc.calculate = s3c2410_nand_calculate_ecc;
-	chip->ecc.correct   = s3c2410_nand_correct_data;
-	chip->ecc.mode	    = NAND_ECC_HW;
-	chip->ecc.strength  = 1;
-
-	switch (info->cpu_type) {
-	case TYPE_S3C2410:
-		chip->ecc.hwctl	    = s3c2410_nand_enable_hwecc;
-		chip->ecc.calculate = s3c2410_nand_calculate_ecc;
-		break;
-
-	case TYPE_S3C2412:
-		chip->ecc.hwctl     = s3c2412_nand_enable_hwecc;
-		chip->ecc.calculate = s3c2412_nand_calculate_ecc;
-		break;
-
-	case TYPE_S3C2440:
-		chip->ecc.hwctl     = s3c2440_nand_enable_hwecc;
-		chip->ecc.calculate = s3c2440_nand_calculate_ecc;
-		break;
-	}
-#else
-	chip->ecc.mode	    = NAND_ECC_SOFT;
-	chip->ecc.algo	= NAND_ECC_HAMMING;
-#endif
-
-	if (set->disable_ecc)
-		chip->ecc.mode	= NAND_ECC_NONE;
-
-	switch (chip->ecc.mode) {
-	case NAND_ECC_NONE:
-		dev_info(info->device, "NAND ECC disabled\n");
-		break;
-	case NAND_ECC_SOFT:
-		dev_info(info->device, "NAND soft ECC\n");
-		break;
-	case NAND_ECC_HW:
-		dev_info(info->device, "NAND hardware ECC\n");
-		break;
-	default:
-		dev_info(info->device, "NAND ECC UNKNOWN\n");
-		break;
-	}
+	chip->ecc.mode = info->platform->ecc_mode;
 
 	/* If you use u-boot BBT creation code, specifying this flag will
 	 * let the kernel fish out the BBT from the NAND, and also skip the
@@ -923,28 +878,74 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
  *
  * The internal state is currently limited to the ECC state information.
 */
-static void s3c2410_nand_update_chip(struct s3c2410_nand_info *info,
-				     struct s3c2410_nand_mtd *nmtd)
+static int s3c2410_nand_update_chip(struct s3c2410_nand_info *info,
+				    struct s3c2410_nand_mtd *nmtd)
 {
 	struct nand_chip *chip = &nmtd->chip;
 
-	dev_dbg(info->device, "chip %p => page shift %d\n",
-		chip, chip->page_shift);
+	switch (chip->ecc.mode) {
 
-	if (chip->ecc.mode != NAND_ECC_HW)
-		return;
+	case NAND_ECC_NONE:
+		dev_info(info->device, "ECC disabled\n");
+		break;
+
+	case NAND_ECC_SOFT:
+		/*
+		 * This driver expects Hamming based ECC when ecc_mode is set
+		 * to NAND_ECC_SOFT. Force ecc.algo to NAND_ECC_HAMMING to
+		 * avoid adding an extra ecc_algo field to
+		 * s3c2410_platform_nand.
+		 */
+		chip->ecc.algo = NAND_ECC_HAMMING;
+		dev_info(info->device, "soft ECC\n");
+		break;
+
+	case NAND_ECC_HW:
+		chip->ecc.calculate = s3c2410_nand_calculate_ecc;
+		chip->ecc.correct   = s3c2410_nand_correct_data;
+		chip->ecc.strength  = 1;
+
+		switch (info->cpu_type) {
+		case TYPE_S3C2410:
+			chip->ecc.hwctl	    = s3c2410_nand_enable_hwecc;
+			chip->ecc.calculate = s3c2410_nand_calculate_ecc;
+			break;
+
+		case TYPE_S3C2412:
+			chip->ecc.hwctl     = s3c2412_nand_enable_hwecc;
+			chip->ecc.calculate = s3c2412_nand_calculate_ecc;
+			break;
+
+		case TYPE_S3C2440:
+			chip->ecc.hwctl     = s3c2440_nand_enable_hwecc;
+			chip->ecc.calculate = s3c2440_nand_calculate_ecc;
+			break;
+		}
+
+		dev_dbg(info->device, "chip %p => page shift %d\n",
+			chip, chip->page_shift);
 
 		/* change the behaviour depending on whether we are using
 		 * the large or small page nand device */
+		if (chip->page_shift > 10) {
+			chip->ecc.size	    = 256;
+			chip->ecc.bytes	    = 3;
+		} else {
+			chip->ecc.size	    = 512;
+			chip->ecc.bytes	    = 3;
+			mtd_set_ooblayout(nand_to_mtd(chip),
+					  &s3c2410_ooblayout_ops);
+		}
 
-	if (chip->page_shift > 10) {
-		chip->ecc.size	    = 256;
-		chip->ecc.bytes	    = 3;
-	} else {
-		chip->ecc.size	    = 512;
-		chip->ecc.bytes	    = 3;
-		mtd_set_ooblayout(nand_to_mtd(chip), &s3c2410_ooblayout_ops);
+		dev_info(info->device, "hardware ECC\n");
+		break;
+
+	default:
+		dev_err(info->device, "invalid ECC mode!\n");
+		return -EINVAL;
 	}
+
+	return 0;
 }
 
 /* s3c24xx_nand_probe
@@ -1046,7 +1047,9 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 						 NULL);
 
 		if (nmtd->scan_res == 0) {
-			s3c2410_nand_update_chip(info, nmtd);
+			err = s3c2410_nand_update_chip(info, nmtd);
+			if (err < 0)
+				goto exit_error;
 			nand_scan_tail(mtd);
 			s3c2410_nand_add_partition(info, nmtd, sets);
 		}
diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h
index c55e42ee57fa..729af13d1773 100644
--- a/include/linux/platform_data/mtd-nand-s3c2410.h
+++ b/include/linux/platform_data/mtd-nand-s3c2410.h
@@ -12,9 +12,10 @@
 #ifndef __MTD_NAND_S3C2410_H
 #define __MTD_NAND_S3C2410_H
 
+#include <linux/mtd/nand.h>
+
 /**
  * struct s3c2410_nand_set - define a set of one or more nand chips
- * @disable_ecc:	Entirely disable ECC - Dangerous
  * @flash_bbt: 		Openmoko u-boot can create a Bad Block Table
  *			Setting this flag will allow the kernel to
  *			look for it at boot time and also skip the NAND
@@ -31,7 +32,6 @@
  * a warning at boot time.
  */
 struct s3c2410_nand_set {
-	unsigned int		disable_ecc:1;
 	unsigned int		flash_bbt:1;
 
 	unsigned int		options;
@@ -51,6 +51,8 @@ struct s3c2410_platform_nand {
 
 	unsigned int	ignore_unset_ecc:1;
 
+	nand_ecc_modes_t	ecc_mode;
+
 	int			nr_sets;
 	struct s3c2410_nand_set *sets;
 
-- 
cgit v1.2.3


From 1c825ad1b8cfe12ccc145dcdba360c52c0272c04 Mon Sep 17 00:00:00 2001
From: Sergio Prado <sergio.prado@e-labworks.com>
Date: Wed, 26 Oct 2016 21:59:55 -0200
Subject: mtd: s3c2410: parse the device configuration from OF node

Allows configuring Samsung's s3c2410 memory controller using a
devicetree.

Signed-off-by: Sergio Prado <sergio.prado@e-labworks.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/s3c2410.c                     | 163 ++++++++++++++++++++++---
 include/linux/platform_data/mtd-nand-s3c2410.h |   1 +
 2 files changed, 147 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index bb4ac3179d17..f0b030d44f71 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -39,6 +39,8 @@
 #include <linux/slab.h>
 #include <linux/clk.h>
 #include <linux/cpufreq.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
@@ -185,6 +187,22 @@ struct s3c2410_nand_info {
 #endif
 };
 
+struct s3c24XX_nand_devtype_data {
+	enum s3c_cpu_type type;
+};
+
+static const struct s3c24XX_nand_devtype_data s3c2410_nand_devtype_data = {
+	.type = TYPE_S3C2410,
+};
+
+static const struct s3c24XX_nand_devtype_data s3c2412_nand_devtype_data = {
+	.type = TYPE_S3C2412,
+};
+
+static const struct s3c24XX_nand_devtype_data s3c2440_nand_devtype_data = {
+	.type = TYPE_S3C2440,
+};
+
 /* conversion functions */
 
 static struct s3c2410_nand_mtd *s3c2410_nand_mtd_toours(struct mtd_info *mtd)
@@ -794,6 +812,30 @@ static int s3c2410_nand_add_partition(struct s3c2410_nand_info *info,
 	return -ENODEV;
 }
 
+static int s3c2410_nand_setup_data_interface(struct mtd_info *mtd,
+					const struct nand_data_interface *conf,
+					bool check_only)
+{
+	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
+	struct s3c2410_platform_nand *pdata = info->platform;
+	const struct nand_sdr_timings *timings;
+	int tacls;
+
+	timings = nand_get_sdr_timings(conf);
+	if (IS_ERR(timings))
+		return -ENOTSUPP;
+
+	tacls = timings->tCLS_min - timings->tWP_min;
+	if (tacls < 0)
+		tacls = 0;
+
+	pdata->tacls  = DIV_ROUND_UP(tacls, 1000);
+	pdata->twrph0 = DIV_ROUND_UP(timings->tWP_min, 1000);
+	pdata->twrph1 = DIV_ROUND_UP(timings->tCLH_min, 1000);
+
+	return s3c2410_nand_setrate(info);
+}
+
 /**
  * s3c2410_nand_init_chip - initialise a single instance of an chip
  * @info: The base NAND controller the chip is on.
@@ -808,9 +850,12 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 				   struct s3c2410_nand_mtd *nmtd,
 				   struct s3c2410_nand_set *set)
 {
+	struct device_node *np = info->device->of_node;
 	struct nand_chip *chip = &nmtd->chip;
 	void __iomem *regs = info->regs;
 
+	nand_set_flash_node(chip, set->of_node);
+
 	chip->write_buf    = s3c2410_nand_write_buf;
 	chip->read_buf     = s3c2410_nand_read_buf;
 	chip->select_chip  = s3c2410_nand_select_chip;
@@ -819,6 +864,13 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 	chip->options	   = set->options;
 	chip->controller   = &info->controller;
 
+	/*
+	 * let's keep behavior unchanged for legacy boards booting via pdata and
+	 * auto-detect timings only when booting with a device tree.
+	 */
+	if (np)
+		chip->setup_data_interface = s3c2410_nand_setup_data_interface;
+
 	switch (info->cpu_type) {
 	case TYPE_S3C2410:
 		chip->IO_ADDR_W = regs + S3C2410_NFDATA;
@@ -858,13 +910,12 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 
 	chip->ecc.mode = info->platform->ecc_mode;
 
-	/* If you use u-boot BBT creation code, specifying this flag will
-	 * let the kernel fish out the BBT from the NAND, and also skip the
-	 * full NAND scan that can take 1/2s or so. Little things... */
-	if (set->flash_bbt) {
+	/*
+	 * If you use u-boot BBT creation code, specifying this flag will
+	 * let the kernel fish out the BBT from the NAND.
+	 */
+	if (set->flash_bbt)
 		chip->bbt_options |= NAND_BBT_USE_FLASH;
-		chip->options |= NAND_SKIP_BBTSCAN;
-	}
 }
 
 /**
@@ -945,6 +996,78 @@ static int s3c2410_nand_update_chip(struct s3c2410_nand_info *info,
 		return -EINVAL;
 	}
 
+	if (chip->bbt_options & NAND_BBT_USE_FLASH)
+		chip->options |= NAND_SKIP_BBTSCAN;
+
+	return 0;
+}
+
+static const struct of_device_id s3c24xx_nand_dt_ids[] = {
+	{
+		.compatible = "samsung,s3c2410-nand",
+		.data = &s3c2410_nand_devtype_data,
+	}, {
+		/* also compatible with s3c6400 */
+		.compatible = "samsung,s3c2412-nand",
+		.data = &s3c2412_nand_devtype_data,
+	}, {
+		.compatible = "samsung,s3c2440-nand",
+		.data = &s3c2440_nand_devtype_data,
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, s3c24xx_nand_dt_ids);
+
+static int s3c24xx_nand_probe_dt(struct platform_device *pdev)
+{
+	const struct s3c24XX_nand_devtype_data *devtype_data;
+	struct s3c2410_platform_nand *pdata;
+	struct s3c2410_nand_info *info = platform_get_drvdata(pdev);
+	struct device_node *np = pdev->dev.of_node, *child;
+	struct s3c2410_nand_set *sets;
+
+	devtype_data = of_device_get_match_data(&pdev->dev);
+	if (!devtype_data)
+		return -ENODEV;
+
+	info->cpu_type = devtype_data->type;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	pdev->dev.platform_data = pdata;
+
+	pdata->nr_sets = of_get_child_count(np);
+	if (!pdata->nr_sets)
+		return 0;
+
+	sets = devm_kzalloc(&pdev->dev, sizeof(*sets) * pdata->nr_sets,
+			    GFP_KERNEL);
+	if (!sets)
+		return -ENOMEM;
+
+	pdata->sets = sets;
+
+	for_each_available_child_of_node(np, child) {
+		sets->name = (char *)child->name;
+		sets->of_node = child;
+		sets->nr_chips = 1;
+
+		of_node_get(child);
+
+		sets++;
+	}
+
+	return 0;
+}
+
+static int s3c24xx_nand_probe_pdata(struct platform_device *pdev)
+{
+	struct s3c2410_nand_info *info = platform_get_drvdata(pdev);
+
+	info->cpu_type = platform_get_device_id(pdev)->driver_data;
+
 	return 0;
 }
 
@@ -957,8 +1080,7 @@ static int s3c2410_nand_update_chip(struct s3c2410_nand_info *info,
 */
 static int s3c24xx_nand_probe(struct platform_device *pdev)
 {
-	struct s3c2410_platform_nand *plat = to_nand_plat(pdev);
-	enum s3c_cpu_type cpu_type;
+	struct s3c2410_platform_nand *plat;
 	struct s3c2410_nand_info *info;
 	struct s3c2410_nand_mtd *nmtd;
 	struct s3c2410_nand_set *sets;
@@ -968,8 +1090,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 	int nr_sets;
 	int setno;
 
-	cpu_type = platform_get_device_id(pdev)->driver_data;
-
 	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
 	if (info == NULL) {
 		err = -ENOMEM;
@@ -991,6 +1111,16 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 
 	s3c2410_nand_clk_set_state(info, CLOCK_ENABLE);
 
+	if (pdev->dev.of_node)
+		err = s3c24xx_nand_probe_dt(pdev);
+	else
+		err = s3c24xx_nand_probe_pdata(pdev);
+
+	if (err)
+		goto exit_error;
+
+	plat = to_nand_plat(pdev);
+
 	/* allocate and map the resource */
 
 	/* currently we assume we have the one resource */
@@ -999,7 +1129,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 
 	info->device	= &pdev->dev;
 	info->platform	= plat;
-	info->cpu_type	= cpu_type;
 
 	info->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(info->regs)) {
@@ -1009,12 +1138,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 
 	dev_dbg(&pdev->dev, "mapped registers at %p\n", info->regs);
 
-	/* initialise the hardware */
-
-	err = s3c2410_nand_inithw(info);
-	if (err != 0)
-		goto exit_error;
-
 	sets = (plat != NULL) ? plat->sets : NULL;
 	nr_sets = (plat != NULL) ? plat->nr_sets : 1;
 
@@ -1058,6 +1181,11 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 			sets++;
 	}
 
+	/* initialise the hardware */
+	err = s3c2410_nand_inithw(info);
+	if (err != 0)
+		goto exit_error;
+
 	err = s3c2410_nand_cpufreq_register(info);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to init cpufreq support\n");
@@ -1158,6 +1286,7 @@ static struct platform_driver s3c24xx_nand_driver = {
 	.id_table	= s3c24xx_driver_ids,
 	.driver		= {
 		.name	= "s3c24xx-nand",
+		.of_match_table = s3c24xx_nand_dt_ids,
 	},
 };
 
diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h
index 729af13d1773..f01659026b26 100644
--- a/include/linux/platform_data/mtd-nand-s3c2410.h
+++ b/include/linux/platform_data/mtd-nand-s3c2410.h
@@ -40,6 +40,7 @@ struct s3c2410_nand_set {
 	char			*name;
 	int			*nr_map;
 	struct mtd_partition	*partitions;
+	struct device_node	*of_node;
 };
 
 struct s3c2410_platform_nand {
-- 
cgit v1.2.3


From 204e7ecd47e26cc12d9e8e8a7e7a2eeb9573f0ba Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sat, 1 Oct 2016 10:24:02 +0200
Subject: mtd: nand: Add a few more timings to nand_sdr_timings

Add the tR_max, tBERS_max, tPROG_max and tCCS_min timings to the
nand_sdr_timings struct.
Assign default/safe values for the statically defined timings, and
extract them from the ONFI parameter table if the NAND is ONFI
compliant.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
---
 drivers/mtd/nand/nand_timings.c | 26 +++++++++++++++++++++++++-
 include/linux/mtd/nand.h        |  8 ++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c
index 13a587407be3..f06312df3669 100644
--- a/drivers/mtd/nand/nand_timings.c
+++ b/drivers/mtd/nand/nand_timings.c
@@ -18,6 +18,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = {
 	{
 		.type = NAND_SDR_IFACE,
 		.timings.sdr = {
+			.tCCS_min = 500000,
+			.tR_max = 200000000,
 			.tADL_min = 400000,
 			.tALH_min = 20000,
 			.tALS_min = 50000,
@@ -58,6 +60,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = {
 	{
 		.type = NAND_SDR_IFACE,
 		.timings.sdr = {
+			.tCCS_min = 500000,
+			.tR_max = 200000000,
 			.tADL_min = 400000,
 			.tALH_min = 10000,
 			.tALS_min = 25000,
@@ -98,6 +102,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = {
 	{
 		.type = NAND_SDR_IFACE,
 		.timings.sdr = {
+			.tCCS_min = 500000,
+			.tR_max = 200000000,
 			.tADL_min = 400000,
 			.tALH_min = 10000,
 			.tALS_min = 15000,
@@ -138,6 +144,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = {
 	{
 		.type = NAND_SDR_IFACE,
 		.timings.sdr = {
+			.tCCS_min = 500000,
+			.tR_max = 200000000,
 			.tADL_min = 400000,
 			.tALH_min = 5000,
 			.tALS_min = 10000,
@@ -178,6 +186,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = {
 	{
 		.type = NAND_SDR_IFACE,
 		.timings.sdr = {
+			.tCCS_min = 500000,
+			.tR_max = 200000000,
 			.tADL_min = 400000,
 			.tALH_min = 5000,
 			.tALS_min = 10000,
@@ -218,6 +228,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = {
 	{
 		.type = NAND_SDR_IFACE,
 		.timings.sdr = {
+			.tCCS_min = 500000,
+			.tR_max = 200000000,
 			.tADL_min = 400000,
 			.tALH_min = 5000,
 			.tALS_min = 10000,
@@ -290,10 +302,22 @@ int onfi_init_data_interface(struct nand_chip *chip,
 	*iface = onfi_sdr_timings[timing_mode];
 
 	/*
-	 * TODO: initialize timings that cannot be deduced from timing mode:
+	 * Initialize timings that cannot be deduced from timing mode:
 	 * tR, tPROG, tCCS, ...
 	 * These information are part of the ONFI parameter page.
 	 */
+	if (chip->onfi_version) {
+		struct nand_onfi_params *params = &chip->onfi_params;
+		struct nand_sdr_timings *timings = &iface->timings.sdr;
+
+		/* microseconds -> picoseconds */
+		timings->tPROG_max = 1000000UL * le16_to_cpu(params->t_prog);
+		timings->tBERS_max = 1000000UL * le16_to_cpu(params->t_bers);
+		timings->tR_max = 1000000UL * le16_to_cpu(params->t_r);
+
+		/* nanoseconds -> picoseconds */
+		timings->tCCS_min = 1000UL * le16_to_cpu(params->t_ccs);
+	}
 
 	return 0;
 }
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c5d3d5024fc8..6fe83bce83a6 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -584,6 +584,10 @@ struct nand_buffers {
  *
  * All these timings are expressed in picoseconds.
  *
+ * @tBERS_max: Block erase time
+ * @tCCS_min: Change column setup time
+ * @tPROG_max: Page program time
+ * @tR_max: Page read time
  * @tALH_min: ALE hold time
  * @tADL_min: ALE to data loading time
  * @tALS_min: ALE setup time
@@ -621,6 +625,10 @@ struct nand_buffers {
  * @tWW_min: WP# transition to WE# low
  */
 struct nand_sdr_timings {
+	u32 tBERS_max;
+	u32 tCCS_min;
+	u32 tPROG_max;
+	u32 tR_max;
 	u32 tALH_min;
 	u32 tADL_min;
 	u32 tALS_min;
-- 
cgit v1.2.3


From 6ea40a3ba93e1b14ffb349e276f9dfefc4334b99 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sat, 1 Oct 2016 10:24:03 +0200
Subject: mtd: nand: Wait tCCS after a column change

Drivers implementing ->cmd_ctrl() and relying on the default ->cmdfunc()
implementation usually don't wait tCCS when a column change (RNDIN or
RNDOUT) is requested.
Add an option flag to ask the core to do so (note that we keep this as
an opt-in to avoid breaking existing implementations), and make use of
the ->data_interface information is available (otherwise, wait 500ns).

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
---
 drivers/mtd/nand/nand_base.c | 26 +++++++++++++++++++++++++-
 include/linux/mtd/nand.h     | 10 ++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index e5718e5ecf92..0acb0070280a 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -709,6 +709,25 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 	nand_wait_ready(mtd);
 }
 
+static void nand_ccs_delay(struct nand_chip *chip)
+{
+	/*
+	 * The controller already takes care of waiting for tCCS when the RNDIN
+	 * or RNDOUT command is sent, return directly.
+	 */
+	if (!(chip->options & NAND_WAIT_TCCS))
+		return;
+
+	/*
+	 * Wait tCCS_min if it is correctly defined, otherwise wait 500ns
+	 * (which should be safe for all NANDs).
+	 */
+	if (chip->data_interface && chip->data_interface->timings.sdr.tCCS_min)
+		ndelay(chip->data_interface->timings.sdr.tCCS_min / 1000);
+	else
+		ndelay(500);
+}
+
 /**
  * nand_command_lp - [DEFAULT] Send command to NAND large page device
  * @mtd: MTD device structure
@@ -773,10 +792,13 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 	case NAND_CMD_ERASE1:
 	case NAND_CMD_ERASE2:
 	case NAND_CMD_SEQIN:
-	case NAND_CMD_RNDIN:
 	case NAND_CMD_STATUS:
 		return;
 
+	case NAND_CMD_RNDIN:
+		nand_ccs_delay(chip);
+		return;
+
 	case NAND_CMD_RESET:
 		if (chip->dev_ready)
 			break;
@@ -795,6 +817,8 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
 		chip->cmd_ctrl(mtd, NAND_CMD_NONE,
 			       NAND_NCE | NAND_CTRL_CHANGE);
+
+		nand_ccs_delay(chip);
 		return;
 
 	case NAND_CMD_READ0:
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 6fe83bce83a6..970ceb948835 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -210,6 +210,16 @@ enum nand_ecc_algo {
  */
 #define NAND_USE_BOUNCE_BUFFER	0x00100000
 
+/*
+ * In case your controller is implementing ->cmd_ctrl() and is relying on the
+ * default ->cmdfunc() implementation, you may want to let the core handle the
+ * tCCS delay which is required when a column change (RNDIN or RNDOUT) is
+ * requested.
+ * If your controller already takes care of this delay, you don't need to set
+ * this flag.
+ */
+#define NAND_WAIT_TCCS		0x00200000
+
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
 #define NAND_CONTROLLER_ALLOC	0x80000000
-- 
cgit v1.2.3


From 5b4e2900512321435a5cd7dd77f58f23f3109950 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Fri, 4 Nov 2016 01:10:56 -0400
Subject: net: phy: broadcom: add bcm54xx_auxctl_read

Add a helper function to read the AUXCTL register for the BCM54xx.  This
mirrors the bcm54xx_auxctl_write function already present in the code.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 10 ++++++++++
 include/linux/brcmphy.h    |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 583ef8a2ec8d..3a64b3d8eca8 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -30,6 +30,16 @@ MODULE_DESCRIPTION("Broadcom PHY driver");
 MODULE_AUTHOR("Maciej W. Rozycki");
 MODULE_LICENSE("GPL");
 
+static int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum)
+{
+	/* The register must be written to both the Shadow Register Select and
+	 * the Shadow Read Register Selector
+	 */
+	phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum |
+		  regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT);
+	return phy_read(phydev, MII_BCM54XX_AUX_CTL);
+}
+
 static int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val)
 {
 	return phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | val);
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 60def78c4e12..0ed66914b61c 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -110,6 +110,7 @@
 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX	0x0200
 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC	0x7000
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC	0x0007
+#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT	12
 
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK	0x0007
 
-- 
cgit v1.2.3


From b14995ac2527b43a75c9190fbd4efd43fb1f4562 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Fri, 4 Nov 2016 01:10:58 -0400
Subject: net: phy: broadcom: Add BCM54810 PHY entry

The BCM54810 PHY requires some semi-unique configuration, which results
in some additional configuration in addition to the standard config.
Also, some users of the BCM54810 require the PHY lanes to be swapped.
Since there is no way to detect this, add a device tree query to see if
it is applicable.

Inspired-by: Vikas Soni <vsoni@broadcom.com>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig    |  2 +-
 drivers/net/phy/broadcom.c | 58 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/brcmphy.h    |  9 +++++++
 3 files changed, 67 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index ff31c10a3485..d3fcfd291913 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -217,7 +217,7 @@ config BROADCOM_PHY
 	select BCM_NET_PHYLIB
 	---help---
 	  Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
-	  BCM5481 and BCM5482 PHYs.
+	  BCM5481, BCM54810 and BCM5482 PHYs.
 
 config CICADA_PHY
 	tristate "Cicada PHYs"
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 3a64b3d8eca8..b1e32e9be1b3 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -18,7 +18,7 @@
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/brcmphy.h>
-
+#include <linux/of.h>
 
 #define BRCM_PHY_MODEL(phydev) \
 	((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask)
@@ -45,6 +45,34 @@ static int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val)
 	return phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | val);
 }
 
+static int bcm54810_config(struct phy_device *phydev)
+{
+	int rc, val;
+
+	val = bcm_phy_read_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL);
+	val &= ~BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN;
+	rc = bcm_phy_write_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL,
+			       val);
+	if (rc < 0)
+		return rc;
+
+	val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
+	val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN;
+	val |= MII_BCM54XX_AUXCTL_MISC_WREN;
+	rc = bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC,
+				  val);
+	if (rc < 0)
+		return rc;
+
+	val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL);
+	val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN;
+	rc = bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
 /* Needs SMDSP clock enabled via bcm54xx_phydsp_config() */
 static int bcm50610_a0_workaround(struct phy_device *phydev)
 {
@@ -217,6 +245,12 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 	    (phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE))
 		bcm54xx_adjust_rxrefclk(phydev);
 
+	if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810) {
+		err = bcm54810_config(phydev);
+		if (err)
+			return err;
+	}
+
 	bcm54xx_phydsp_config(phydev);
 
 	return 0;
@@ -314,6 +348,7 @@ static int bcm5482_read_status(struct phy_device *phydev)
 
 static int bcm5481_config_aneg(struct phy_device *phydev)
 {
+	struct device_node *np = phydev->mdio.dev.of_node;
 	int ret;
 
 	/* Aneg firsly. */
@@ -344,6 +379,14 @@ static int bcm5481_config_aneg(struct phy_device *phydev)
 		phy_write(phydev, 0x18, reg);
 	}
 
+	if (of_property_read_bool(np, "enet-phy-lane-swap")) {
+		/* Lane Swap - Undocumented register...magic! */
+		ret = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_SEL_ER + 0x9,
+					0x11B);
+		if (ret < 0)
+			return ret;
+	}
+
 	return ret;
 }
 
@@ -577,6 +620,18 @@ static struct phy_driver broadcom_drivers[] = {
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm_phy_ack_intr,
 	.config_intr	= bcm_phy_config_intr,
+}, {
+	.phy_id         = PHY_ID_BCM54810,
+	.phy_id_mask    = 0xfffffff0,
+	.name           = "Broadcom BCM54810",
+	.features       = PHY_GBIT_FEATURES |
+			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.flags          = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init    = bcm54xx_config_init,
+	.config_aneg    = bcm5481_config_aneg,
+	.read_status    = genphy_read_status,
+	.ack_interrupt  = bcm_phy_ack_intr,
+	.config_intr    = bcm_phy_config_intr,
 }, {
 	.phy_id		= PHY_ID_BCM5482,
 	.phy_id_mask	= 0xfffffff0,
@@ -661,6 +716,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
 	{ PHY_ID_BCM54616S, 0xfffffff0 },
 	{ PHY_ID_BCM5464, 0xfffffff0 },
 	{ PHY_ID_BCM5481, 0xfffffff0 },
+	{ PHY_ID_BCM54810, 0xfffffff0 },
 	{ PHY_ID_BCM5482, 0xfffffff0 },
 	{ PHY_ID_BCM50610, 0xfffffff0 },
 	{ PHY_ID_BCM50610M, 0xfffffff0 },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 0ed66914b61c..848dc508ef57 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -13,6 +13,7 @@
 #define PHY_ID_BCM5241			0x0143bc30
 #define PHY_ID_BCMAC131			0x0143bc70
 #define PHY_ID_BCM5481			0x0143bca0
+#define PHY_ID_BCM54810			0x03625d00
 #define PHY_ID_BCM5482			0x0143bcb0
 #define PHY_ID_BCM5411			0x00206070
 #define PHY_ID_BCM5421			0x002060e0
@@ -56,6 +57,7 @@
 #define PHY_BRCM_EXT_IBND_TX_ENABLE	0x00002000
 #define PHY_BRCM_CLEAR_RGMII_MODE	0x00004000
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00008000
+
 /* Broadcom BCM7xxx specific workarounds */
 #define PHY_BRCM_7XXX_REV(x)		(((x) >> 8) & 0xff)
 #define PHY_BRCM_7XXX_PATCH(x)		((x) & 0xff)
@@ -111,6 +113,7 @@
 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC	0x7000
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC	0x0007
 #define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT	12
+#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN	(1 << 8)
 
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK	0x0007
 
@@ -192,6 +195,12 @@
 #define BCM5482_SSD_SGMII_SLAVE_EN	0x0002	/* Slave mode enable */
 #define BCM5482_SSD_SGMII_SLAVE_AD	0x0001	/* Slave auto-detection */
 
+/* BCM54810 Registers */
+#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL	(MII_BCM54XX_EXP_SEL_ER + 0x90)
+#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN	(1 << 0)
+#define BCM54810_SHD_CLK_CTL			0x3
+#define BCM54810_SHD_CLK_CTL_GTXCLK_EN		(1 << 9)
+
 
 /*****************************************************************************/
 /* Fast Ethernet Transceiver definitions. */
-- 
cgit v1.2.3


From 7c13f97ffde63cc792c49ec1513f3974f2f05229 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 4 Nov 2016 11:28:59 +0100
Subject: udp: do fwd memory scheduling on dequeue

A new argument is added to __skb_recv_datagram to provide
an explicit skb destructor, invoked under the receive queue
lock.
The UDP protocol uses such argument to perform memory
reclaiming on dequeue, so that the UDP protocol does not
set anymore skb->desctructor.
Instead explicit memory reclaiming is performed at close() time and
when skbs are removed from the receive queue.
The in kernel UDP protocol users now need to call a
skb_recv_udp() variant instead of skb_recv_datagram() to
properly perform memory accounting on dequeue.

Overall, this allows acquiring only once the receive queue
lock on dequeue.

Tested using pktgen with random src port, 64 bytes packet,
wire-speed on a 10G link as sender and udp_sink as the receiver,
using an l4 tuple rxhash to stress the contention, and one or more
udp_sink instances with reuseport.

nr sinks	vanilla		patched
1		440		560
3		2150		2300
6		3650		3800
9		4450		4600
12		6250		6450

v1 -> v2:
 - do rmem and allocated memory scheduling under the receive lock
 - do bulk scheduling in first_packet_length() and in udp_destruct_sock()
 - avoid the typdef for the dequeue callback

Suggested-by: Eric Dumazet <edumazet@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  4 ++++
 include/net/udp.h      | 15 +++++++++++++++
 net/core/datagram.c    | 17 ++++++++++++-----
 net/ipv4/udp.c         | 42 ++++++++++++++++++++++++------------------
 net/ipv6/udp.c         |  3 +--
 net/rxrpc/input.c      |  7 +++----
 net/sunrpc/svcsock.c   |  2 +-
 net/sunrpc/xprtsock.c  |  2 +-
 net/unix/af_unix.c     |  4 ++--
 9 files changed, 63 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cc6e23eaac91..a4aeeca7e805 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3033,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
 int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
 				const struct sk_buff *skb);
 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
+					void (*destructor)(struct sock *sk,
+							   struct sk_buff *skb),
 					int *peeked, int *off, int *err,
 					struct sk_buff **last);
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+				    void (*destructor)(struct sock *sk,
+						       struct sk_buff *skb),
 				    int *peeked, int *off, int *err);
 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
 				  int *err);
diff --git a/include/net/udp.h b/include/net/udp.h
index 6134f37ba3ab..e6e4e19be387 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -248,6 +248,21 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
 /* net/ipv4/udp.c */
 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
 int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
+void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
+static inline struct sk_buff *
+__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked,
+	       int *off, int *err)
+{
+	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+				   udp_skb_destructor, peeked, off, err);
+}
+static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
+					   int noblock, int *err)
+{
+	int peeked, off = 0;
+
+	return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
+}
 
 void udp_v4_early_demux(struct sk_buff *skb);
 int udp_get_port(struct sock *sk, unsigned short snum,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index bfb973aebb5b..49816af8586b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -165,6 +165,7 @@ done:
  *	__skb_try_recv_datagram - Receive a datagram skbuff
  *	@sk: socket
  *	@flags: MSG_ flags
+ *	@destructor: invoked under the receive lock on successful dequeue
  *	@peeked: returns non-zero if this packet has been seen before
  *	@off: an offset in bytes to peek skb from. Returns an offset
  *	      within an skb where data actually starts
@@ -197,6 +198,8 @@ done:
  *	the standard around please.
  */
 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
+					void (*destructor)(struct sock *sk,
+							   struct sk_buff *skb),
 					int *peeked, int *off, int *err,
 					struct sk_buff **last)
 {
@@ -241,9 +244,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 				}
 
 				atomic_inc(&skb->users);
-			} else
+			} else {
 				__skb_unlink(skb, queue);
-
+				if (destructor)
+					destructor(sk, skb);
+			}
 			spin_unlock_irqrestore(&queue->lock, cpu_flags);
 			*off = _off;
 			return skb;
@@ -262,6 +267,8 @@ no_packet:
 EXPORT_SYMBOL(__skb_try_recv_datagram);
 
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
+				    void (*destructor)(struct sock *sk,
+						       struct sk_buff *skb),
 				    int *peeked, int *off, int *err)
 {
 	struct sk_buff *skb, *last;
@@ -270,8 +277,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
 	do {
-		skb = __skb_try_recv_datagram(sk, flags, peeked, off, err,
-					      &last);
+		skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
+					      off, err, &last);
 		if (skb)
 			return skb;
 
@@ -290,7 +297,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
 	int peeked, off = 0;
 
 	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-				   &peeked, &off, err);
+				   NULL, &peeked, &off, err);
 }
 EXPORT_SYMBOL(skb_recv_datagram);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 28a0165cb848..097b70628631 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1173,26 +1173,26 @@ out:
 	return ret;
 }
 
+/* fully reclaim rmem/fwd memory allocated for skb */
 static void udp_rmem_release(struct sock *sk, int size, int partial)
 {
 	int amt;
 
 	atomic_sub(size, &sk->sk_rmem_alloc);
-
-	spin_lock_bh(&sk->sk_receive_queue.lock);
 	sk->sk_forward_alloc += size;
 	amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
 	sk->sk_forward_alloc -= amt;
-	spin_unlock_bh(&sk->sk_receive_queue.lock);
 
 	if (amt)
 		__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
 }
 
-static void udp_rmem_free(struct sk_buff *skb)
+/* Note: called with sk_receive_queue.lock held */
+void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
 {
-	udp_rmem_release(skb->sk, skb->truesize, 1);
+	udp_rmem_release(sk, skb->truesize, 1);
 }
+EXPORT_SYMBOL(udp_skb_destructor);
 
 int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 {
@@ -1229,9 +1229,9 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 
 	sk->sk_forward_alloc -= size;
 
-	/* the skb owner in now the udp socket */
-	skb->sk = sk;
-	skb->destructor = udp_rmem_free;
+	/* no need to setup a destructor, we will explicitly release the
+	 * forward allocated memory on dequeue
+	 */
 	skb->dev = NULL;
 	sock_skb_set_dropcount(sk, skb);
 
@@ -1255,8 +1255,15 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
 static void udp_destruct_sock(struct sock *sk)
 {
 	/* reclaim completely the forward allocated memory */
-	__skb_queue_purge(&sk->sk_receive_queue);
-	udp_rmem_release(sk, 0, 0);
+	unsigned int total = 0;
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		total += skb->truesize;
+		kfree_skb(skb);
+	}
+	udp_rmem_release(sk, total, 0);
+
 	inet_sock_destruct(sk);
 }
 
@@ -1288,12 +1295,11 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
  */
 static int first_packet_length(struct sock *sk)
 {
-	struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
+	struct sk_buff_head *rcvq = &sk->sk_receive_queue;
 	struct sk_buff *skb;
+	int total = 0;
 	int res;
 
-	__skb_queue_head_init(&list_kill);
-
 	spin_lock_bh(&rcvq->lock);
 	while ((skb = skb_peek(rcvq)) != NULL &&
 		udp_lib_checksum_complete(skb)) {
@@ -1303,12 +1309,13 @@ static int first_packet_length(struct sock *sk)
 				IS_UDPLITE(sk));
 		atomic_inc(&sk->sk_drops);
 		__skb_unlink(skb, rcvq);
-		__skb_queue_tail(&list_kill, skb);
+		total += skb->truesize;
+		kfree_skb(skb);
 	}
 	res = skb ? skb->len : -1;
+	if (total)
+		udp_rmem_release(sk, total, 1);
 	spin_unlock_bh(&rcvq->lock);
-
-	__skb_queue_purge(&list_kill);
 	return res;
 }
 
@@ -1363,8 +1370,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 
 try_again:
 	peeking = off = sk_peek_offset(sk, flags);
-	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-				  &peeked, &off, &err);
+	skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
 	if (!skb)
 		return err;
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b5a23ce8981d..5313818b7485 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -343,8 +343,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 try_again:
 	peeking = off = sk_peek_offset(sk, flags);
-	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-				  &peeked, &off, &err);
+	skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
 	if (!skb)
 		return err;
 
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 44fb8d893c7d..1d87b5453ef7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 
 	ASSERT(!irqs_disabled());
 
-	skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
+	skb = skb_recv_udp(udp_sk, 0, 1, &ret);
 	if (!skb) {
 		if (ret == -EAGAIN)
 			return;
@@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
 
 	__UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
 
-	/* The socket buffer we have is owned by UDP, with UDP's data all over
-	 * it, but we really want our own data there.
+	/* The UDP protocol already released all skb resources;
+	 * we are free to add our own data there.
 	 */
-	skb_orphan(skb);
 	sp = rxrpc_skb(skb);
 
 	/* dig out the RxRPC connection details */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e2a55dc787e6..78da4aee3543 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -547,7 +547,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 	err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
 			     0, 0, MSG_PEEK | MSG_DONTWAIT);
 	if (err >= 0)
-		skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err);
+		skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
 
 	if (skb == NULL) {
 		if (err != -EAGAIN) {
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1758665d609c..7178d0aa7861 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1080,7 +1080,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
 	if (sk == NULL)
 		goto out;
 	for (;;) {
-		skb = skb_recv_datagram(sk, 0, 1, &err);
+		skb = skb_recv_udp(sk, 0, 1, &err);
 		if (skb != NULL) {
 			xs_udp_data_read_skb(&transport->xprt, sk, skb);
 			consume_skb(skb);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 145082e2ba36..87620183910e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 		mutex_lock(&u->iolock);
 
 		skip = sk_peek_offset(sk, flags);
-		skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
-					      &last);
+		skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
+					      &err, &last);
 		if (skb)
 			break;
 
-- 
cgit v1.2.3


From ebcf6f979d55f35dfe36956364f0dce8c738220b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 2 Nov 2016 07:12:31 -0400
Subject: swiotlb: Drop unused functions swiotlb_map_sg and swiotlb_unmap_sg

There are no users for swiotlb_map_sg or swiotlb_unmap_sg so we might as
well just drop them.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by:  Christoph Hellwig <hch@infradead.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 include/linux/swiotlb.h |  8 --------
 lib/swiotlb.c           | 16 ----------------
 2 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 5f81f8a187f2..f0d258967869 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -72,14 +72,6 @@ extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 			       size_t size, enum dma_data_direction dir,
 			       unsigned long attrs);
 
-extern int
-swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-	       enum dma_data_direction dir);
-
-extern void
-swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-		 enum dma_data_direction dir);
-
 extern int
 swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 		     enum dma_data_direction dir,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 6ce764410ae4..bdcc0d8a7405 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -910,14 +910,6 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 }
 EXPORT_SYMBOL(swiotlb_map_sg_attrs);
 
-int
-swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-	       enum dma_data_direction dir)
-{
-	return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, 0);
-}
-EXPORT_SYMBOL(swiotlb_map_sg);
-
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
  * concerning calls here are the same as for swiotlb_unmap_page() above.
@@ -938,14 +930,6 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
-void
-swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-		 enum dma_data_direction dir)
-{
-	return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, 0);
-}
-EXPORT_SYMBOL(swiotlb_unmap_sg);
-
 /*
  * Make physical memory consistent for a set of streaming mode DMA translations
  * after a transfer.
-- 
cgit v1.2.3


From 0443fa003fa199f41bfbed3012f314d02c5b1f24 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 2 Nov 2016 07:13:02 -0400
Subject: swiotlb: Add support for DMA_ATTR_SKIP_CPU_SYNC

As a first step to making DMA_ATTR_SKIP_CPU_SYNC apply to architectures
beyond just ARM I need to make it so that the swiotlb will respect the
flag.  In order to do that I also need to update the swiotlb-xen since it
heavily makes use of the functionality.

Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 drivers/xen/swiotlb-xen.c | 11 +++++----
 include/linux/swiotlb.h   |  6 +++--
 lib/swiotlb.c             | 59 ++++++++++++++++++++++++++++++-----------------
 3 files changed, 49 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index b8014bf2b2ed..3d048afcee38 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -405,7 +405,8 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 */
 	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
 
-	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
+	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir,
+				     attrs);
 	if (map == SWIOTLB_MAP_ERROR)
 		return DMA_ERROR_CODE;
 
@@ -419,7 +420,8 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	if (dma_capable(dev, dev_addr, size))
 		return dev_addr;
 
-	swiotlb_tbl_unmap_single(dev, map, size, dir);
+	swiotlb_tbl_unmap_single(dev, map, size, dir,
+				 attrs | DMA_ATTR_SKIP_CPU_SYNC);
 
 	return DMA_ERROR_CODE;
 }
@@ -445,7 +447,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
-		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
 		return;
 	}
 
@@ -558,11 +560,12 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 								 start_dma_addr,
 								 sg_phys(sg),
 								 sg->length,
-								 dir);
+								 dir, attrs);
 			if (map == SWIOTLB_MAP_ERROR) {
 				dev_warn(hwdev, "swiotlb buffer is full\n");
 				/* Don't panic here, we expect map_sg users
 				   to do proper error handling. */
+				attrs |= DMA_ATTR_SKIP_CPU_SYNC;
 				xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
 							   attrs);
 				sg_dma_len(sgl) = 0;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index f0d258967869..183f37c8a5e1 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -44,11 +44,13 @@ enum dma_sync_target {
 extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 					  dma_addr_t tbl_dma_addr,
 					  phys_addr_t phys, size_t size,
-					  enum dma_data_direction dir);
+					  enum dma_data_direction dir,
+					  unsigned long attrs);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 				     phys_addr_t tlb_addr,
-				     size_t size, enum dma_data_direction dir);
+				     size_t size, enum dma_data_direction dir,
+				     unsigned long attrs);
 
 extern void swiotlb_tbl_sync_single(struct device *hwdev,
 				    phys_addr_t tlb_addr,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index bdcc0d8a7405..8e883c762728 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -425,7 +425,8 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 				   dma_addr_t tbl_dma_addr,
 				   phys_addr_t orig_addr, size_t size,
-				   enum dma_data_direction dir)
+				   enum dma_data_direction dir,
+				   unsigned long attrs)
 {
 	unsigned long flags;
 	phys_addr_t tlb_addr;
@@ -526,7 +527,8 @@ found:
 	 */
 	for (i = 0; i < nslots; i++)
 		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
 
 	return tlb_addr;
@@ -539,18 +541,20 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
 
 static phys_addr_t
 map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-	   enum dma_data_direction dir)
+	   enum dma_data_direction dir, unsigned long attrs)
 {
 	dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
 
-	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
+	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
+				      dir, attrs);
 }
 
 /*
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
 void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
-			      size_t size, enum dma_data_direction dir)
+			      size_t size, enum dma_data_direction dir,
+			      unsigned long attrs)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -561,6 +565,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	 * First, sync the memory before unmapping the entry
 	 */
 	if (orig_addr != INVALID_PHYS_ADDR &&
+	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
 		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
 
@@ -654,7 +659,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 * GFP_DMA memory; fall back on map_single(), which
 		 * will grab memory from the lowest available address range.
 		 */
-		phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
+		phys_addr_t paddr = map_single(hwdev, 0, size,
+					       DMA_FROM_DEVICE, 0);
 		if (paddr == SWIOTLB_MAP_ERROR)
 			goto err_warn;
 
@@ -667,9 +673,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			       (unsigned long long)dma_mask,
 			       (unsigned long long)dev_addr);
 
-			/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+			/*
+			 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
+			 * The DMA_ATTR_SKIP_CPU_SYNC is optional.
+			 */
 			swiotlb_tbl_unmap_single(hwdev, paddr,
-						 size, DMA_TO_DEVICE);
+						 size, DMA_TO_DEVICE,
+						 DMA_ATTR_SKIP_CPU_SYNC);
 			goto err_warn;
 		}
 	}
@@ -698,8 +708,12 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 	if (!is_swiotlb_buffer(paddr))
 		free_pages((unsigned long)vaddr, get_order(size));
 	else
-		/* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
-		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE);
+		/*
+		 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
+		 * DMA_ATTR_SKIP_CPU_SYNC is optional.
+		 */
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE,
+					 DMA_ATTR_SKIP_CPU_SYNC);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
@@ -755,7 +769,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
 
 	/* Oh well, have to allocate and map a bounce buffer. */
-	map = map_single(dev, phys, size, dir);
+	map = map_single(dev, phys, size, dir, attrs);
 	if (map == SWIOTLB_MAP_ERROR) {
 		swiotlb_full(dev, size, dir, 1);
 		return phys_to_dma(dev, io_tlb_overflow_buffer);
@@ -764,12 +778,13 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	dev_addr = phys_to_dma(dev, map);
 
 	/* Ensure that the address returned is DMA'ble */
-	if (!dma_capable(dev, dev_addr, size)) {
-		swiotlb_tbl_unmap_single(dev, map, size, dir);
-		return phys_to_dma(dev, io_tlb_overflow_buffer);
-	}
+	if (dma_capable(dev, dev_addr, size))
+		return dev_addr;
+
+	swiotlb_tbl_unmap_single(dev, map, size, dir,
+				 attrs | DMA_ATTR_SKIP_CPU_SYNC);
 
-	return dev_addr;
+	return phys_to_dma(dev, io_tlb_overflow_buffer);
 }
 EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
@@ -782,14 +797,15 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
  * whatever the device wrote there.
  */
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-			 size_t size, enum dma_data_direction dir)
+			 size_t size, enum dma_data_direction dir,
+			 unsigned long attrs)
 {
 	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
 		return;
 	}
 
@@ -809,7 +825,7 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 			size_t size, enum dma_data_direction dir,
 			unsigned long attrs)
 {
-	unmap_single(hwdev, dev_addr, size, dir);
+	unmap_single(hwdev, dev_addr, size, dir, attrs);
 }
 EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
@@ -891,7 +907,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 		if (swiotlb_force ||
 		    !dma_capable(hwdev, dev_addr, sg->length)) {
 			phys_addr_t map = map_single(hwdev, sg_phys(sg),
-						     sg->length, dir);
+						     sg->length, dir, attrs);
 			if (map == SWIOTLB_MAP_ERROR) {
 				/* Don't panic here, we expect map_sg users
 				   to do proper error handling. */
@@ -925,7 +941,8 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i)
-		unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
+		unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
+			     attrs);
 
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
-- 
cgit v1.2.3


From 0f78ba96bbcf30a78224fe56f8fd72f87915afdd Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 23 Feb 2016 15:32:14 -0800
Subject: Input: gpio_keys_polled - keep button data constant

Commit 633a21d80b4a ("input: gpio_keys_polled: Add support for GPIO
descriptors") placed gpio descriptor into gpio_keys_button structure, which
is supposed to be part of platform data and not modifiable by the driver.
To keep the data constant, let's move the descriptor to
gpio_keys_button_data structure instead.

Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/gpio_keys.c        |  10 +--
 drivers/input/keyboard/gpio_keys_polled.c | 105 +++++++++++++++++-------------
 include/linux/gpio_keys.h                 |   5 +-
 3 files changed, 64 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 29093657f2ef..890eb397d987 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -624,7 +624,6 @@ gpio_keys_get_devtree_pdata(struct device *dev)
 	struct gpio_keys_button *button;
 	int error;
 	int nbuttons;
-	int i;
 
 	node = dev->of_node;
 	if (!node)
@@ -640,19 +639,18 @@ gpio_keys_get_devtree_pdata(struct device *dev)
 	if (!pdata)
 		return ERR_PTR(-ENOMEM);
 
-	pdata->buttons = (struct gpio_keys_button *)(pdata + 1);
+	button = (struct gpio_keys_button *)(pdata + 1);
+
+	pdata->buttons = button;
 	pdata->nbuttons = nbuttons;
 
 	pdata->rep = !!of_get_property(node, "autorepeat", NULL);
 
 	of_property_read_string(node, "label", &pdata->name);
 
-	i = 0;
 	for_each_available_child_of_node(node, pp) {
 		enum of_gpio_flags flags;
 
-		button = &pdata->buttons[i++];
-
 		button->gpio = of_get_gpio_flags(pp, 0, &flags);
 		if (button->gpio < 0) {
 			error = button->gpio;
@@ -694,6 +692,8 @@ gpio_keys_get_devtree_pdata(struct device *dev)
 		if (of_property_read_u32(pp, "debounce-interval",
 					 &button->debounce_interval))
 			button->debounce_interval = 5;
+
+		button++;
 	}
 
 	if (pdata->nbuttons == 0)
diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c
index 62bdb1d48c49..2cf407831f06 100644
--- a/drivers/input/keyboard/gpio_keys_polled.c
+++ b/drivers/input/keyboard/gpio_keys_polled.c
@@ -30,6 +30,7 @@
 #define DRV_NAME	"gpio-keys-polled"
 
 struct gpio_keys_button_data {
+	struct gpio_desc *gpiod;
 	int last_state;
 	int count;
 	int threshold;
@@ -46,7 +47,7 @@ struct gpio_keys_polled_dev {
 };
 
 static void gpio_keys_button_event(struct input_polled_dev *dev,
-				   struct gpio_keys_button *button,
+				   const struct gpio_keys_button *button,
 				   int state)
 {
 	struct gpio_keys_polled_dev *bdev = dev->private;
@@ -70,15 +71,15 @@ static void gpio_keys_button_event(struct input_polled_dev *dev,
 }
 
 static void gpio_keys_polled_check_state(struct input_polled_dev *dev,
-					 struct gpio_keys_button *button,
+					 const struct gpio_keys_button *button,
 					 struct gpio_keys_button_data *bdata)
 {
 	int state;
 
 	if (bdata->can_sleep)
-		state = !!gpiod_get_value_cansleep(button->gpiod);
+		state = !!gpiod_get_value_cansleep(bdata->gpiod);
 	else
-		state = !!gpiod_get_value(button->gpiod);
+		state = !!gpiod_get_value(bdata->gpiod);
 
 	gpio_keys_button_event(dev, button, state);
 
@@ -142,48 +143,35 @@ static void gpio_keys_polled_close(struct input_polled_dev *dev)
 		pdata->disable(bdev->dev);
 }
 
-static struct gpio_keys_platform_data *gpio_keys_polled_get_devtree_pdata(struct device *dev)
+static struct gpio_keys_platform_data *
+gpio_keys_polled_get_devtree_pdata(struct device *dev)
 {
 	struct gpio_keys_platform_data *pdata;
 	struct gpio_keys_button *button;
 	struct fwnode_handle *child;
-	int error;
 	int nbuttons;
 
 	nbuttons = device_get_child_node_count(dev);
 	if (nbuttons == 0)
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	pdata = devm_kzalloc(dev, sizeof(*pdata) + nbuttons * sizeof(*button),
 			     GFP_KERNEL);
 	if (!pdata)
 		return ERR_PTR(-ENOMEM);
 
-	pdata->buttons = (struct gpio_keys_button *)(pdata + 1);
+	button = (struct gpio_keys_button *)(pdata + 1);
+
+	pdata->buttons = button;
+	pdata->nbuttons = nbuttons;
 
 	pdata->rep = device_property_present(dev, "autorepeat");
 	device_property_read_u32(dev, "poll-interval", &pdata->poll_interval);
 
 	device_for_each_child_node(dev, child) {
-		struct gpio_desc *desc;
-
-		desc = devm_get_gpiod_from_child(dev, NULL, child);
-		if (IS_ERR(desc)) {
-			error = PTR_ERR(desc);
-			if (error != -EPROBE_DEFER)
-				dev_err(dev,
-					"Failed to get gpio flags, error: %d\n",
-					error);
-			fwnode_handle_put(child);
-			return ERR_PTR(error);
-		}
-
-		button = &pdata->buttons[pdata->nbuttons++];
-		button->gpiod = desc;
-
-		if (fwnode_property_read_u32(child, "linux,code", &button->code)) {
-			dev_err(dev, "Button without keycode: %d\n",
-				pdata->nbuttons - 1);
+		if (fwnode_property_read_u32(child, "linux,code",
+					     &button->code)) {
+			dev_err(dev, "button without keycode\n");
 			fwnode_handle_put(child);
 			return ERR_PTR(-EINVAL);
 		}
@@ -206,10 +194,9 @@ static struct gpio_keys_platform_data *gpio_keys_polled_get_devtree_pdata(struct
 		if (fwnode_property_read_u32(child, "debounce-interval",
 					     &button->debounce_interval))
 			button->debounce_interval = 5;
-	}
 
-	if (pdata->nbuttons == 0)
-		return ERR_PTR(-EINVAL);
+		button++;
+	}
 
 	return pdata;
 }
@@ -220,7 +207,7 @@ static void gpio_keys_polled_set_abs_params(struct input_dev *input,
 	int i, min = 0, max = 0;
 
 	for (i = 0; i < pdata->nbuttons; i++) {
-		struct gpio_keys_button *button = &pdata->buttons[i];
+		const struct gpio_keys_button *button = &pdata->buttons[i];
 
 		if (button->type != EV_ABS || button->code != code)
 			continue;
@@ -230,6 +217,7 @@ static void gpio_keys_polled_set_abs_params(struct input_dev *input,
 		if (button->value > max)
 			max = button->value;
 	}
+
 	input_set_abs_params(input, code, min, max, 0, 0);
 }
 
@@ -242,6 +230,7 @@ MODULE_DEVICE_TABLE(of, gpio_keys_polled_of_match);
 static int gpio_keys_polled_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	struct fwnode_handle *child = NULL;
 	const struct gpio_keys_platform_data *pdata = dev_get_platdata(dev);
 	struct gpio_keys_polled_dev *bdev;
 	struct input_polled_dev *poll_dev;
@@ -254,10 +243,6 @@ static int gpio_keys_polled_probe(struct platform_device *pdev)
 		pdata = gpio_keys_polled_get_devtree_pdata(dev);
 		if (IS_ERR(pdata))
 			return PTR_ERR(pdata);
-		if (!pdata) {
-			dev_err(dev, "missing platform data\n");
-			return -EINVAL;
-		}
 	}
 
 	if (!pdata->poll_interval) {
@@ -300,20 +285,40 @@ static int gpio_keys_polled_probe(struct platform_device *pdev)
 		__set_bit(EV_REP, input->evbit);
 
 	for (i = 0; i < pdata->nbuttons; i++) {
-		struct gpio_keys_button *button = &pdata->buttons[i];
+		const struct gpio_keys_button *button = &pdata->buttons[i];
 		struct gpio_keys_button_data *bdata = &bdev->data[i];
 		unsigned int type = button->type ?: EV_KEY;
 
 		if (button->wakeup) {
 			dev_err(dev, DRV_NAME " does not support wakeup\n");
+			fwnode_handle_put(child);
 			return -EINVAL;
 		}
 
-		/*
-		 * Legacy GPIO number so request the GPIO here and
-		 * convert it to descriptor.
-		 */
-		if (!button->gpiod && gpio_is_valid(button->gpio)) {
+		if (!dev_get_platdata(dev)) {
+			/* No legacy static platform data */
+			child = device_get_next_child_node(dev, child);
+			if (!child) {
+				dev_err(dev, "missing child device node\n");
+				return -EINVAL;
+			}
+
+			bdata->gpiod = devm_get_gpiod_from_child(dev, NULL,
+								 child);
+			if (IS_ERR(bdata->gpiod)) {
+				error = PTR_ERR(bdata->gpiod);
+				if (error != -EPROBE_DEFER)
+					dev_err(dev,
+						"failed to get gpio: %d\n",
+						error);
+				fwnode_handle_put(child);
+				return error;
+			}
+		} else if (gpio_is_valid(button->gpio)) {
+			/*
+			 * Legacy GPIO number so request the GPIO here and
+			 * convert it to descriptor.
+			 */
 			unsigned flags = GPIOF_IN;
 
 			if (button->active_low)
@@ -322,18 +327,22 @@ static int gpio_keys_polled_probe(struct platform_device *pdev)
 			error = devm_gpio_request_one(&pdev->dev, button->gpio,
 					flags, button->desc ? : DRV_NAME);
 			if (error) {
-				dev_err(dev, "unable to claim gpio %u, err=%d\n",
+				dev_err(dev,
+					"unable to claim gpio %u, err=%d\n",
 					button->gpio, error);
 				return error;
 			}
 
-			button->gpiod = gpio_to_desc(button->gpio);
+			bdata->gpiod = gpio_to_desc(button->gpio);
+			if (!bdata->gpiod) {
+				dev_err(dev,
+					"unable to convert gpio %u to descriptor\n",
+					button->gpio);
+				return -EINVAL;
+			}
 		}
 
-		if (IS_ERR(button->gpiod))
-			return PTR_ERR(button->gpiod);
-
-		bdata->can_sleep = gpiod_cansleep(button->gpiod);
+		bdata->can_sleep = gpiod_cansleep(bdata->gpiod);
 		bdata->last_state = -1;
 		bdata->threshold = DIV_ROUND_UP(button->debounce_interval,
 						pdata->poll_interval);
@@ -344,6 +353,8 @@ static int gpio_keys_polled_probe(struct platform_device *pdev)
 							button->code);
 	}
 
+	fwnode_handle_put(child);
+
 	bdev->poll_dev = poll_dev;
 	bdev->dev = dev;
 	bdev->pdata = pdata;
diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index ee2d8c6f9130..0b71024c082c 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -2,7 +2,6 @@
 #define _GPIO_KEYS_H
 
 struct device;
-struct gpio_desc;
 
 /**
  * struct gpio_keys_button - configuration parameters
@@ -18,7 +17,6 @@ struct gpio_desc;
  *			disable button via sysfs
  * @value:		axis value for %EV_ABS
  * @irq:		Irq number in case of interrupt keys
- * @gpiod:		GPIO descriptor
  */
 struct gpio_keys_button {
 	unsigned int code;
@@ -31,7 +29,6 @@ struct gpio_keys_button {
 	bool can_disable;
 	int value;
 	unsigned int irq;
-	struct gpio_desc *gpiod;
 };
 
 /**
@@ -46,7 +43,7 @@ struct gpio_keys_button {
  * @name:		input device name
  */
 struct gpio_keys_platform_data {
-	struct gpio_keys_button *buttons;
+	const struct gpio_keys_button *buttons;
 	int nbuttons;
 	unsigned int poll_interval;
 	unsigned int rep:1;
-- 
cgit v1.2.3


From 63e95b5c4f16e156b98adcf2f7d820ba941c82a3 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 8 Nov 2016 11:32:20 +1100
Subject: dax: coordinate locking for offsets in PMD range

DAX radix tree locking currently locks entries based on the unique
combination of the 'mapping' pointer and the pgoff_t 'index' for the entry.
This works for PTEs, but as we move to PMDs we will need to have all the
offsets within the range covered by the PMD to map to the same bit lock.
To accomplish this, for ranges covered by a PMD entry we will instead lock
based on the page offset of the beginning of the PMD entry.  The 'mapping'
pointer is still used in the same way.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/dax.c            | 65 +++++++++++++++++++++++++++++++++--------------------
 include/linux/dax.h |  2 +-
 mm/filemap.c        |  2 +-
 3 files changed, 43 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 835e7f082cff..72387023545e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -64,14 +64,6 @@ static int __init init_dax_wait_table(void)
 }
 fs_initcall(init_dax_wait_table);
 
-static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
-					      pgoff_t index)
-{
-	unsigned long hash = hash_long((unsigned long)mapping ^ index,
-				       DAX_WAIT_TABLE_BITS);
-	return wait_table + hash;
-}
-
 static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
 {
 	struct request_queue *q = bdev->bd_queue;
@@ -285,7 +277,7 @@ EXPORT_SYMBOL_GPL(dax_do_io);
  */
 struct exceptional_entry_key {
 	struct address_space *mapping;
-	unsigned long index;
+	pgoff_t entry_start;
 };
 
 struct wait_exceptional_entry_queue {
@@ -293,6 +285,26 @@ struct wait_exceptional_entry_queue {
 	struct exceptional_entry_key key;
 };
 
+static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
+		pgoff_t index, void *entry, struct exceptional_entry_key *key)
+{
+	unsigned long hash;
+
+	/*
+	 * If 'entry' is a PMD, align the 'index' that we use for the wait
+	 * queue to the start of that PMD.  This ensures that all offsets in
+	 * the range covered by the PMD map to the same bit lock.
+	 */
+	if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+		index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
+
+	key->mapping = mapping;
+	key->entry_start = index;
+
+	hash = hash_long((unsigned long)mapping ^ index, DAX_WAIT_TABLE_BITS);
+	return wait_table + hash;
+}
+
 static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
 				       int sync, void *keyp)
 {
@@ -301,7 +313,7 @@ static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
 		container_of(wait, struct wait_exceptional_entry_queue, wait);
 
 	if (key->mapping != ewait->key.mapping ||
-	    key->index != ewait->key.index)
+	    key->entry_start != ewait->key.entry_start)
 		return 0;
 	return autoremove_wake_function(wait, mode, sync, NULL);
 }
@@ -359,12 +371,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 {
 	void *entry, **slot;
 	struct wait_exceptional_entry_queue ewait;
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	wait_queue_head_t *wq;
 
 	init_wait(&ewait.wait);
 	ewait.wait.func = wake_exceptional_entry_func;
-	ewait.key.mapping = mapping;
-	ewait.key.index = index;
 
 	for (;;) {
 		entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
@@ -375,6 +385,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 				*slotp = slot;
 			return entry;
 		}
+
+		wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key);
 		prepare_to_wait_exclusive(wq, &ewait.wait,
 					  TASK_UNINTERRUPTIBLE);
 		spin_unlock_irq(&mapping->tree_lock);
@@ -447,10 +459,20 @@ restart:
 	return entry;
 }
 
+/*
+ * We do not necessarily hold the mapping->tree_lock when we call this
+ * function so it is possible that 'entry' is no longer a valid item in the
+ * radix tree.  This is okay, though, because all we really need to do is to
+ * find the correct waitqueue where tasks might be sleeping waiting for that
+ * old 'entry' and wake them.
+ */
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
-				   pgoff_t index, bool wake_all)
+		pgoff_t index, void *entry, bool wake_all)
 {
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	struct exceptional_entry_key key;
+	wait_queue_head_t *wq;
+
+	wq = dax_entry_waitqueue(mapping, index, entry, &key);
 
 	/*
 	 * Checking for locked entry and prepare_to_wait_exclusive() happens
@@ -458,13 +480,8 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 	 * So at this point all tasks that could have seen our entry locked
 	 * must be in the waitqueue and the following check will see them.
 	 */
-	if (waitqueue_active(wq)) {
-		struct exceptional_entry_key key;
-
-		key.mapping = mapping;
-		key.index = index;
+	if (waitqueue_active(wq))
 		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
-	}
 }
 
 void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
@@ -480,7 +497,7 @@ void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 	}
 	unlock_slot(mapping, slot);
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
 static void put_locked_mapping_entry(struct address_space *mapping,
@@ -505,7 +522,7 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 		return;
 
 	/* We have to wake up next waiter for the radix tree entry lock */
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
 /*
@@ -532,7 +549,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 	radix_tree_delete(&mapping->page_tree, index);
 	mapping->nrexceptional--;
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, true);
+	dax_wake_mapping_entry_waiter(mapping, index, entry, true);
 
 	return 1;
 }
diff --git a/include/linux/dax.h b/include/linux/dax.h
index add6c4bc568f..a41a747d6112 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -22,7 +22,7 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
-				   pgoff_t index, bool wake_all);
+		pgoff_t index, void *entry, bool wake_all);
 
 #ifdef CONFIG_FS_DAX
 struct page *read_dax_sector(struct block_device *bdev, sector_t n);
diff --git a/mm/filemap.c b/mm/filemap.c
index 849f459ad078..1ffb7dcd1b5d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -143,7 +143,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 			if (node)
 				workingset_node_pages_dec(node);
 			/* Wakeup waiters for exceptional entry lock */
-			dax_wake_mapping_entry_waiter(mapping, page->index,
+			dax_wake_mapping_entry_waiter(mapping, page->index, p,
 						      false);
 		}
 	}
-- 
cgit v1.2.3


From b9fde0462e34a05b25c3d68d344971865659abae Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 8 Nov 2016 11:32:35 +1100
Subject: dax: remove dax_pmd_fault()

dax_pmd_fault() is the old struct buffer_head + get_block_t based 2 MiB DAX
fault handler.  This fault handler has been disabled for several kernel
releases, and support for PMDs will be reintroduced using the struct iomap
interface instead.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/dax.c            | 213 ----------------------------------------------------
 include/linux/dax.h |   6 +-
 2 files changed, 1 insertion(+), 218 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 72387023545e..3d0b1032c555 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -915,219 +915,6 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 }
 EXPORT_SYMBOL_GPL(dax_fault);
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
-/*
- * The 'colour' (ie low bits) within a PMD of a page offset.  This comes up
- * more often than one might expect in the below function.
- */
-#define PG_PMD_COLOUR	((PMD_SIZE >> PAGE_SHIFT) - 1)
-
-static void __dax_dbg(struct buffer_head *bh, unsigned long address,
-		const char *reason, const char *fn)
-{
-	if (bh) {
-		char bname[BDEVNAME_SIZE];
-		bdevname(bh->b_bdev, bname);
-		pr_debug("%s: %s addr: %lx dev %s state %lx start %lld "
-			"length %zd fallback: %s\n", fn, current->comm,
-			address, bname, bh->b_state, (u64)bh->b_blocknr,
-			bh->b_size, reason);
-	} else {
-		pr_debug("%s: %s addr: %lx fallback: %s\n", fn,
-			current->comm, address, reason);
-	}
-}
-
-#define dax_pmd_dbg(bh, address, reason)	__dax_dbg(bh, address, reason, "dax_pmd")
-
-/**
- * dax_pmd_fault - handle a PMD fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * When a page fault occurs, filesystems may call this helper in their
- * pmd_fault handler for DAX files.
- */
-int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
-		pmd_t *pmd, unsigned int flags, get_block_t get_block)
-{
-	struct file *file = vma->vm_file;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	struct buffer_head bh;
-	unsigned blkbits = inode->i_blkbits;
-	unsigned long pmd_addr = address & PMD_MASK;
-	bool write = flags & FAULT_FLAG_WRITE;
-	struct block_device *bdev;
-	pgoff_t size, pgoff;
-	sector_t block;
-	int result = 0;
-	bool alloc = false;
-
-	/* dax pmd mappings require pfn_t_devmap() */
-	if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
-		return VM_FAULT_FALLBACK;
-
-	/* Fall back to PTEs if we're going to COW */
-	if (write && !(vma->vm_flags & VM_SHARED)) {
-		split_huge_pmd(vma, pmd, address);
-		dax_pmd_dbg(NULL, address, "cow write");
-		return VM_FAULT_FALLBACK;
-	}
-	/* If the PMD would extend outside the VMA */
-	if (pmd_addr < vma->vm_start) {
-		dax_pmd_dbg(NULL, address, "vma start unaligned");
-		return VM_FAULT_FALLBACK;
-	}
-	if ((pmd_addr + PMD_SIZE) > vma->vm_end) {
-		dax_pmd_dbg(NULL, address, "vma end unaligned");
-		return VM_FAULT_FALLBACK;
-	}
-
-	pgoff = linear_page_index(vma, pmd_addr);
-	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (pgoff >= size)
-		return VM_FAULT_SIGBUS;
-	/* If the PMD would cover blocks out of the file */
-	if ((pgoff | PG_PMD_COLOUR) >= size) {
-		dax_pmd_dbg(NULL, address,
-				"offset + huge page size > file size");
-		return VM_FAULT_FALLBACK;
-	}
-
-	memset(&bh, 0, sizeof(bh));
-	bh.b_bdev = inode->i_sb->s_bdev;
-	block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
-
-	bh.b_size = PMD_SIZE;
-
-	if (get_block(inode, block, &bh, 0) != 0)
-		return VM_FAULT_SIGBUS;
-
-	if (!buffer_mapped(&bh) && write) {
-		if (get_block(inode, block, &bh, 1) != 0)
-			return VM_FAULT_SIGBUS;
-		alloc = true;
-		WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-	}
-
-	bdev = bh.b_bdev;
-
-	if (bh.b_size < PMD_SIZE) {
-		dax_pmd_dbg(&bh, address, "allocated block too small");
-		return VM_FAULT_FALLBACK;
-	}
-
-	/*
-	 * If we allocated new storage, make sure no process has any
-	 * zero pages covering this hole
-	 */
-	if (alloc) {
-		loff_t lstart = pgoff << PAGE_SHIFT;
-		loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
-
-		truncate_pagecache_range(inode, lstart, lend);
-	}
-
-	if (!write && !buffer_mapped(&bh)) {
-		spinlock_t *ptl;
-		pmd_t entry;
-		struct page *zero_page = mm_get_huge_zero_page(vma->vm_mm);
-
-		if (unlikely(!zero_page)) {
-			dax_pmd_dbg(&bh, address, "no zero page");
-			goto fallback;
-		}
-
-		ptl = pmd_lock(vma->vm_mm, pmd);
-		if (!pmd_none(*pmd)) {
-			spin_unlock(ptl);
-			dax_pmd_dbg(&bh, address, "pmd already present");
-			goto fallback;
-		}
-
-		dev_dbg(part_to_dev(bdev->bd_part),
-				"%s: %s addr: %lx pfn: <zero> sect: %llx\n",
-				__func__, current->comm, address,
-				(unsigned long long) to_sector(&bh, inode));
-
-		entry = mk_pmd(zero_page, vma->vm_page_prot);
-		entry = pmd_mkhuge(entry);
-		set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry);
-		result = VM_FAULT_NOPAGE;
-		spin_unlock(ptl);
-	} else {
-		struct blk_dax_ctl dax = {
-			.sector = to_sector(&bh, inode),
-			.size = PMD_SIZE,
-		};
-		long length = dax_map_atomic(bdev, &dax);
-
-		if (length < 0) {
-			dax_pmd_dbg(&bh, address, "dax-error fallback");
-			goto fallback;
-		}
-		if (length < PMD_SIZE) {
-			dax_pmd_dbg(&bh, address, "dax-length too small");
-			dax_unmap_atomic(bdev, &dax);
-			goto fallback;
-		}
-		if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) {
-			dax_pmd_dbg(&bh, address, "pfn unaligned");
-			dax_unmap_atomic(bdev, &dax);
-			goto fallback;
-		}
-
-		if (!pfn_t_devmap(dax.pfn)) {
-			dax_unmap_atomic(bdev, &dax);
-			dax_pmd_dbg(&bh, address, "pfn not in memmap");
-			goto fallback;
-		}
-		dax_unmap_atomic(bdev, &dax);
-
-		/*
-		 * For PTE faults we insert a radix tree entry for reads, and
-		 * leave it clean.  Then on the first write we dirty the radix
-		 * tree entry via the dax_pfn_mkwrite() path.  This sequence
-		 * allows the dax_pfn_mkwrite() call to be simpler and avoid a
-		 * call into get_block() to translate the pgoff to a sector in
-		 * order to be able to create a new radix tree entry.
-		 *
-		 * The PMD path doesn't have an equivalent to
-		 * dax_pfn_mkwrite(), though, so for a read followed by a
-		 * write we traverse all the way through dax_pmd_fault()
-		 * twice.  This means we can just skip inserting a radix tree
-		 * entry completely on the initial read and just wait until
-		 * the write to insert a dirty entry.
-		 */
-		if (write) {
-			/*
-			 * We should insert radix-tree entry and dirty it here.
-			 * For now this is broken...
-			 */
-		}
-
-		dev_dbg(part_to_dev(bdev->bd_part),
-				"%s: %s addr: %lx pfn: %lx sect: %llx\n",
-				__func__, current->comm, address,
-				pfn_t_to_pfn(dax.pfn),
-				(unsigned long long) dax.sector);
-		result |= vmf_insert_pfn_pmd(vma, address, pmd,
-				dax.pfn, write);
-	}
-
- out:
-	return result;
-
- fallback:
-	count_vm_event(THP_FAULT_FALLBACK);
-	result = VM_FAULT_FALLBACK;
-	goto out;
-}
-EXPORT_SYMBOL_GPL(dax_pmd_fault);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
 /**
  * dax_pfn_mkwrite - handle first write to DAX page
  * @vma: The virtual memory area where the fault occurred
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a41a747d6112..0f74866edae6 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -48,16 +48,12 @@ static inline int __dax_zero_page_range(struct block_device *bdev,
 }
 #endif
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
-int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
-				unsigned int flags, get_block_t);
-#else
 static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 				pmd_t *pmd, unsigned int flags, get_block_t gb)
 {
 	return VM_FAULT_FALLBACK;
 }
-#endif
+
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
 #define dax_mkwrite(vma, vmf, gb)	dax_fault(vma, vmf, gb)
 
-- 
cgit v1.2.3


From 11c59c92f44d9272db7655a462608658a6d95013 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 8 Nov 2016 11:32:46 +1100
Subject: dax: correct dax iomap code namespace

The recently added DAX functions that use the new struct iomap data
structure were named iomap_dax_rw(), iomap_dax_fault() and
iomap_dax_actor().  These are actually defined in fs/dax.c, though, so
should be part of the "dax" namespace and not the "iomap" namespace.
Rename them to dax_iomap_rw(), dax_iomap_fault() and dax_iomap_actor()
respectively.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/dax.c            | 16 ++++++++--------
 fs/ext2/file.c      |  6 +++---
 fs/xfs/xfs_file.c   |  8 ++++----
 include/linux/dax.h |  4 ++--
 4 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 3d0b1032c555..fdbd7a1ec6cf 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1031,7 +1031,7 @@ EXPORT_SYMBOL_GPL(dax_truncate_page);
 
 #ifdef CONFIG_FS_IOMAP
 static loff_t
-iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		struct iomap *iomap)
 {
 	struct iov_iter *iter = data;
@@ -1088,7 +1088,7 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 }
 
 /**
- * iomap_dax_rw - Perform I/O to a DAX file
+ * dax_iomap_rw - Perform I/O to a DAX file
  * @iocb:	The control block for this I/O
  * @iter:	The addresses to do I/O from or to
  * @ops:	iomap ops passed from the file system
@@ -1098,7 +1098,7 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
  * and evicting any page cache pages in the region under I/O.
  */
 ssize_t
-iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		struct iomap_ops *ops)
 {
 	struct address_space *mapping = iocb->ki_filp->f_mapping;
@@ -1128,7 +1128,7 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
 
 	while (iov_iter_count(iter)) {
 		ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
-				iter, iomap_dax_actor);
+				iter, dax_iomap_actor);
 		if (ret <= 0)
 			break;
 		pos += ret;
@@ -1138,10 +1138,10 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
 	iocb->ki_pos += done;
 	return done ? done : ret;
 }
-EXPORT_SYMBOL_GPL(iomap_dax_rw);
+EXPORT_SYMBOL_GPL(dax_iomap_rw);
 
 /**
- * iomap_dax_fault - handle a page fault on a DAX file
+ * dax_iomap_fault - handle a page fault on a DAX file
  * @vma: The virtual memory area where the fault occurred
  * @vmf: The description of the fault
  * @ops: iomap ops passed from the file system
@@ -1150,7 +1150,7 @@ EXPORT_SYMBOL_GPL(iomap_dax_rw);
  * or mkwrite handler for DAX files. Assumes the caller has done all the
  * necessary locking for the page fault to proceed successfully.
  */
-int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			struct iomap_ops *ops)
 {
 	struct address_space *mapping = vma->vm_file->f_mapping;
@@ -1252,5 +1252,5 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		return VM_FAULT_SIGBUS | major;
 	return VM_FAULT_NOPAGE | major;
 }
-EXPORT_SYMBOL_GPL(iomap_dax_fault);
+EXPORT_SYMBOL_GPL(dax_iomap_fault);
 #endif /* CONFIG_FS_IOMAP */
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index fb88b51ca947..b0f241528a30 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -38,7 +38,7 @@ static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		return 0; /* skip atime */
 
 	inode_lock_shared(inode);
-	ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops);
+	ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
 	inode_unlock_shared(inode);
 
 	file_accessed(iocb->ki_filp);
@@ -62,7 +62,7 @@ static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (ret)
 		goto out_unlock;
 
-	ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops);
+	ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
 	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
 		i_size_write(inode, iocb->ki_pos);
 		mark_inode_dirty(inode);
@@ -99,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 	down_read(&ei->dax_sem);
 
-	ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops);
+	ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
 
 	up_read(&ei->dax_sem);
 	if (vmf->flags & FAULT_FLAG_WRITE)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a314fc7b56fa..e7f35d548cfc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -344,7 +344,7 @@ xfs_file_dax_read(
 		return 0; /* skip atime */
 
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-	ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
+	ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
 	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
 	file_accessed(iocb->ki_filp);
@@ -691,7 +691,7 @@ xfs_file_dax_write(
 
 	trace_xfs_file_dax_write(ip, count, pos);
 
-	ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+	ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
 	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
 		i_size_write(inode, iocb->ki_pos);
 		error = xfs_setfilesize(ip, pos, ret);
@@ -1640,7 +1640,7 @@ xfs_filemap_page_mkwrite(
 	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
 	if (IS_DAX(inode)) {
-		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
+		ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
 	} else {
 		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
 		ret = block_page_mkwrite_return(ret);
@@ -1674,7 +1674,7 @@ xfs_filemap_fault(
 		 * changes to xfs_get_blocks_direct() to map unwritten extent
 		 * ioend for conversion on read-only mappings.
 		 */
-		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
+		ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
 	} else
 		ret = filemap_fault(vma, vmf);
 	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 0f74866edae6..a3dfee4cb03f 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -11,13 +11,13 @@ struct iomap_ops;
 /* We use lowest available exceptional entry bit for locking */
 #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
 
-ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		struct iomap_ops *ops);
 ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
 		  get_block_t, dio_iodone_t, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
-int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			struct iomap_ops *ops);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
-- 
cgit v1.2.3


From fa28f7296a7ce38ed15dc06bd2149e04c8db9d4b Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 8 Nov 2016 11:33:35 +1100
Subject: dax: move RADIX_DAX_* defines to dax.h

The RADIX_DAX_* defines currently mostly live in fs/dax.c, with just
RADIX_DAX_ENTRY_LOCK being in include/linux/dax.h so it can be used in
mm/filemap.c.  When we add PMD support, though, mm/filemap.c will also need
access to the RADIX_DAX_PTE type so it can properly construct a 4k sized
empty entry.

Instead of shifting the defines between dax.c and dax.h as they are
individually used in other code, just move them wholesale to dax.h so
they'll be available when we need them.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/dax.c            | 14 --------------
 include/linux/dax.h | 15 ++++++++++++++-
 2 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 6edd89b3b69c..c45cc4d8e996 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -34,20 +34,6 @@
 #include <linux/iomap.h>
 #include "internal.h"
 
-/*
- * We use lowest available bit in exceptional entry for locking, other two
- * bits to determine entry type. In total 3 special bits.
- */
-#define RADIX_DAX_SHIFT	(RADIX_TREE_EXCEPTIONAL_SHIFT + 3)
-#define RADIX_DAX_PTE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
-#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
-#define RADIX_DAX_TYPE_MASK (RADIX_DAX_PTE | RADIX_DAX_PMD)
-#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_TYPE_MASK)
-#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
-#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
-		RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE) | \
-		RADIX_TREE_EXCEPTIONAL_ENTRY))
-
 /* We choose 4096 entries - same as per-zone page wait tables */
 #define DAX_WAIT_TABLE_BITS 12
 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a3dfee4cb03f..e9ea78c1cf98 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -8,8 +8,21 @@
 
 struct iomap_ops;
 
-/* We use lowest available exceptional entry bit for locking */
+/*
+ * We use lowest available bit in exceptional entry for locking, other two
+ * bits to determine entry type. In total 3 special bits.
+ */
+#define RADIX_DAX_SHIFT	(RADIX_TREE_EXCEPTIONAL_SHIFT + 3)
 #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
+#define RADIX_DAX_PTE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
+#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
+#define RADIX_DAX_TYPE_MASK (RADIX_DAX_PTE | RADIX_DAX_PMD)
+#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_TYPE_MASK)
+#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
+#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
+		RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE) | \
+		RADIX_TREE_EXCEPTIONAL_ENTRY))
+
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		struct iomap_ops *ops);
-- 
cgit v1.2.3


From 642261ac995e01d7837db1f4b90181496f7e6835 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 8 Nov 2016 11:34:45 +1100
Subject: dax: add struct iomap based DAX PMD support

DAX PMDs have been disabled since Jan Kara introduced DAX radix tree based
locking.  This patch allows DAX PMDs to participate in the DAX radix tree
based locking scheme so that they can be re-enabled using the new struct
iomap based fault handlers.

There are currently three types of DAX 4k entries: 4k zero pages, 4k DAX
mappings that have an associated block allocation, and 4k DAX empty
entries.  The empty entries exist to provide locking for the duration of a
given page fault.

This patch adds three equivalent 2MiB DAX entries: Huge Zero Page (HZP)
entries, PMD DAX entries that have associated block allocations, and 2 MiB
DAX empty entries.

Unlike the 4k case where we insert a struct page* into the radix tree for
4k zero pages, for HZP we insert a DAX exceptional entry with the new
RADIX_DAX_HZP flag set.  This is because we use a single 2 MiB zero page in
every 2MiB hole mapping, and it doesn't make sense to have that same struct
page* with multiple entries in multiple trees.  This would cause contention
on the single page lock for the one Huge Zero Page, and it would break the
page->index and page->mapping associations that are assumed to be valid in
many other places in the kernel.

One difficult use case is when one thread is trying to use 4k entries in
radix tree for a given offset, and another thread is using 2 MiB entries
for that same offset.  The current code handles this by making the 2 MiB
user fall back to 4k entries for most cases.  This was done because it is
the simplest solution, and because the use of 2MiB pages is already
opportunistic.

If we were to try to upgrade from 4k pages to 2MiB pages for a given range,
we run into the problem of how we lock out 4k page faults for the entire
2MiB range while we clean out the radix tree so we can insert the 2MiB
entry.  We can solve this problem if we need to, but I think that the cases
where both 2MiB entries and 4K entries are being used for the same range
will be rare enough and the gain small enough that it probably won't be
worth the complexity.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/dax.c            | 378 +++++++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/dax.h |  55 ++++++--
 mm/filemap.c        |   3 +-
 3 files changed, 386 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 0582c7c2ae40..281e91a63367 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -76,6 +76,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
 	blk_queue_exit(bdev->bd_queue);
 }
 
+static int dax_is_pmd_entry(void *entry)
+{
+	return (unsigned long)entry & RADIX_DAX_PMD;
+}
+
+static int dax_is_pte_entry(void *entry)
+{
+	return !((unsigned long)entry & RADIX_DAX_PMD);
+}
+
+static int dax_is_zero_entry(void *entry)
+{
+	return (unsigned long)entry & RADIX_DAX_HZP;
+}
+
+static int dax_is_empty_entry(void *entry)
+{
+	return (unsigned long)entry & RADIX_DAX_EMPTY;
+}
+
 struct page *read_dax_sector(struct block_device *bdev, sector_t n)
 {
 	struct page *page = alloc_pages(GFP_KERNEL, 0);
@@ -281,7 +301,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
 	 * queue to the start of that PMD.  This ensures that all offsets in
 	 * the range covered by the PMD map to the same bit lock.
 	 */
-	if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+	if (dax_is_pmd_entry(entry))
 		index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
 
 	key->mapping = mapping;
@@ -413,36 +433,116 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
  * radix tree entry locked. If the radix tree doesn't contain given index,
  * create empty exceptional entry for the index and return with it locked.
  *
+ * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will
+ * either return that locked entry or will return an error.  This error will
+ * happen if there are any 4k entries (either zero pages or DAX entries)
+ * within the 2MiB range that we are requesting.
+ *
+ * We always favor 4k entries over 2MiB entries. There isn't a flow where we
+ * evict 4k entries in order to 'upgrade' them to a 2MiB entry.  A 2MiB
+ * insertion will fail if it finds any 4k entries already in the tree, and a
+ * 4k insertion will cause an existing 2MiB entry to be unmapped and
+ * downgraded to 4k entries.  This happens for both 2MiB huge zero pages as
+ * well as 2MiB empty entries.
+ *
+ * The exception to this downgrade path is for 2MiB DAX PMD entries that have
+ * real storage backing them.  We will leave these real 2MiB DAX entries in
+ * the tree, and PTE writes will simply dirty the entire 2MiB DAX entry.
+ *
  * Note: Unlike filemap_fault() we don't honor FAULT_FLAG_RETRY flags. For
  * persistent memory the benefit is doubtful. We can add that later if we can
  * show it helps.
  */
-static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index)
+static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
+		unsigned long size_flag)
 {
+	bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
 	void *entry, **slot;
 
 restart:
 	spin_lock_irq(&mapping->tree_lock);
 	entry = get_unlocked_mapping_entry(mapping, index, &slot);
+
+	if (entry) {
+		if (size_flag & RADIX_DAX_PMD) {
+			if (!radix_tree_exceptional_entry(entry) ||
+			    dax_is_pte_entry(entry)) {
+				put_unlocked_mapping_entry(mapping, index,
+						entry);
+				entry = ERR_PTR(-EEXIST);
+				goto out_unlock;
+			}
+		} else { /* trying to grab a PTE entry */
+			if (radix_tree_exceptional_entry(entry) &&
+			    dax_is_pmd_entry(entry) &&
+			    (dax_is_zero_entry(entry) ||
+			     dax_is_empty_entry(entry))) {
+				pmd_downgrade = true;
+			}
+		}
+	}
+
 	/* No entry for given index? Make sure radix tree is big enough. */
-	if (!entry) {
+	if (!entry || pmd_downgrade) {
 		int err;
 
+		if (pmd_downgrade) {
+			/*
+			 * Make sure 'entry' remains valid while we drop
+			 * mapping->tree_lock.
+			 */
+			entry = lock_slot(mapping, slot);
+		}
+
 		spin_unlock_irq(&mapping->tree_lock);
 		err = radix_tree_preload(
 				mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
-		if (err)
+		if (err) {
+			if (pmd_downgrade)
+				put_locked_mapping_entry(mapping, index, entry);
 			return ERR_PTR(err);
-		entry = (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
-			       RADIX_DAX_ENTRY_LOCK);
+		}
+
+		/*
+		 * Besides huge zero pages the only other thing that gets
+		 * downgraded are empty entries which don't need to be
+		 * unmapped.
+		 */
+		if (pmd_downgrade && dax_is_zero_entry(entry))
+			unmap_mapping_range(mapping,
+				(index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
+
 		spin_lock_irq(&mapping->tree_lock);
-		err = radix_tree_insert(&mapping->page_tree, index, entry);
+
+		if (pmd_downgrade) {
+			radix_tree_delete(&mapping->page_tree, index);
+			mapping->nrexceptional--;
+			dax_wake_mapping_entry_waiter(mapping, index, entry,
+					true);
+		}
+
+		entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
+
+		err = __radix_tree_insert(&mapping->page_tree, index,
+				dax_radix_order(entry), entry);
 		radix_tree_preload_end();
 		if (err) {
 			spin_unlock_irq(&mapping->tree_lock);
-			/* Someone already created the entry? */
-			if (err == -EEXIST)
+			/*
+			 * Someone already created the entry?  This is a
+			 * normal failure when inserting PMDs in a range
+			 * that already contains PTEs.  In that case we want
+			 * to return -EEXIST immediately.
+			 */
+			if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
 				goto restart;
+			/*
+			 * Our insertion of a DAX PMD entry failed, most
+			 * likely because it collided with a PTE sized entry
+			 * at a different index in the PMD range.  We haven't
+			 * inserted anything into the radix tree and have no
+			 * waiters to wake.
+			 */
 			return ERR_PTR(err);
 		}
 		/* Good, we have inserted empty locked entry into the tree. */
@@ -466,6 +566,7 @@ restart:
 		return page;
 	}
 	entry = lock_slot(mapping, slot);
+ out_unlock:
 	spin_unlock_irq(&mapping->tree_lock);
 	return entry;
 }
@@ -473,9 +574,9 @@ restart:
 /*
  * We do not necessarily hold the mapping->tree_lock when we call this
  * function so it is possible that 'entry' is no longer a valid item in the
- * radix tree.  This is okay, though, because all we really need to do is to
- * find the correct waitqueue where tasks might be sleeping waiting for that
- * old 'entry' and wake them.
+ * radix tree.  This is okay because all we really need to do is to find the
+ * correct waitqueue where tasks might be waiting for that old 'entry' and
+ * wake them.
  */
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		pgoff_t index, void *entry, bool wake_all)
@@ -588,11 +689,17 @@ static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size
 	return 0;
 }
 
-#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
-
+/*
+ * By this point grab_mapping_entry() has ensured that we have a locked entry
+ * of the appropriate size so we don't have to worry about downgrading PMDs to
+ * PTEs.  If we happen to be trying to insert a PTE and there is a PMD
+ * already in the tree, we will skip the insertion and just dirty the PMD as
+ * appropriate.
+ */
 static void *dax_insert_mapping_entry(struct address_space *mapping,
 				      struct vm_fault *vmf,
-				      void *entry, sector_t sector)
+				      void *entry, sector_t sector,
+				      unsigned long flags)
 {
 	struct radix_tree_root *page_tree = &mapping->page_tree;
 	int error = 0;
@@ -615,22 +722,35 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 		error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
 		if (error)
 			return ERR_PTR(error);
+	} else if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_HZP)) {
+		/* replacing huge zero page with PMD block mapping */
+		unmap_mapping_range(mapping,
+			(vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
 	}
 
 	spin_lock_irq(&mapping->tree_lock);
-	new_entry = (void *)((unsigned long)RADIX_DAX_ENTRY(sector, false) |
-		       RADIX_DAX_ENTRY_LOCK);
+	new_entry = dax_radix_locked_entry(sector, flags);
+
 	if (hole_fill) {
 		__delete_from_page_cache(entry, NULL);
 		/* Drop pagecache reference */
 		put_page(entry);
-		error = radix_tree_insert(page_tree, index, new_entry);
+		error = __radix_tree_insert(page_tree, index,
+				dax_radix_order(new_entry), new_entry);
 		if (error) {
 			new_entry = ERR_PTR(error);
 			goto unlock;
 		}
 		mapping->nrexceptional++;
-	} else {
+	} else if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+		/*
+		 * Only swap our new entry into the radix tree if the current
+		 * entry is a zero page or an empty entry.  If a normal PTE or
+		 * PMD entry is already in the tree, we leave it alone.  This
+		 * means that if we are trying to insert a PTE and the
+		 * existing entry is a PMD, we will just leave the PMD in the
+		 * tree and dirty it if necessary.
+		 */
 		void **slot;
 		void *ret;
 
@@ -660,7 +780,6 @@ static int dax_writeback_one(struct block_device *bdev,
 		struct address_space *mapping, pgoff_t index, void *entry)
 {
 	struct radix_tree_root *page_tree = &mapping->page_tree;
-	int type = RADIX_DAX_TYPE(entry);
 	struct radix_tree_node *node;
 	struct blk_dax_ctl dax;
 	void **slot;
@@ -681,13 +800,21 @@ static int dax_writeback_one(struct block_device *bdev,
 	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
 		goto unlock;
 
-	if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+	if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
+				dax_is_zero_entry(entry))) {
 		ret = -EIO;
 		goto unlock;
 	}
 
-	dax.sector = RADIX_DAX_SECTOR(entry);
-	dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+	/*
+	 * Even if dax_writeback_mapping_range() was given a wbc->range_start
+	 * in the middle of a PMD, the 'index' we are given will be aligned to
+	 * the start index of the PMD, as will the sector we pull from
+	 * 'entry'.  This allows us to flush for PMD_SIZE and not have to
+	 * worry about partial PMD writebacks.
+	 */
+	dax.sector = dax_radix_sector(entry);
+	dax.size = PAGE_SIZE << dax_radix_order(entry);
 	spin_unlock_irq(&mapping->tree_lock);
 
 	/*
@@ -726,12 +853,11 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
-	pgoff_t start_index, end_index, pmd_index;
+	pgoff_t start_index, end_index;
 	pgoff_t indices[PAGEVEC_SIZE];
 	struct pagevec pvec;
 	bool done = false;
 	int i, ret = 0;
-	void *entry;
 
 	if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
 		return -EIO;
@@ -741,15 +867,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 
 	start_index = wbc->range_start >> PAGE_SHIFT;
 	end_index = wbc->range_end >> PAGE_SHIFT;
-	pmd_index = DAX_PMD_INDEX(start_index);
-
-	rcu_read_lock();
-	entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
-	rcu_read_unlock();
-
-	/* see if the start of our range is covered by a PMD entry */
-	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
-		start_index = pmd_index;
 
 	tag_pages_for_writeback(mapping, start_index, end_index);
 
@@ -794,7 +911,7 @@ static int dax_insert_mapping(struct address_space *mapping,
 		return PTR_ERR(dax.addr);
 	dax_unmap_atomic(bdev, &dax);
 
-	ret = dax_insert_mapping_entry(mapping, vmf, entry, dax.sector);
+	ret = dax_insert_mapping_entry(mapping, vmf, entry, dax.sector, 0);
 	if (IS_ERR(ret))
 		return PTR_ERR(ret);
 	*entryp = ret;
@@ -841,7 +958,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	bh.b_bdev = inode->i_sb->s_bdev;
 	bh.b_size = PAGE_SIZE;
 
-	entry = grab_mapping_entry(mapping, vmf->pgoff);
+	entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
 	if (IS_ERR(entry)) {
 		error = PTR_ERR(entry);
 		goto out;
@@ -1162,7 +1279,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	if (pos >= i_size_read(inode))
 		return VM_FAULT_SIGBUS;
 
-	entry = grab_mapping_entry(mapping, vmf->pgoff);
+	entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
 	if (IS_ERR(entry)) {
 		error = PTR_ERR(entry);
 		goto out;
@@ -1264,4 +1381,191 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	return VM_FAULT_NOPAGE | major;
 }
 EXPORT_SYMBOL_GPL(dax_iomap_fault);
+
+#ifdef CONFIG_FS_DAX_PMD
+/*
+ * The 'colour' (ie low bits) within a PMD of a page offset.  This comes up
+ * more often than one might expect in the below functions.
+ */
+#define PG_PMD_COLOUR	((PMD_SIZE >> PAGE_SHIFT) - 1)
+
+static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
+		struct vm_fault *vmf, unsigned long address,
+		struct iomap *iomap, loff_t pos, bool write, void **entryp)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct block_device *bdev = iomap->bdev;
+	struct blk_dax_ctl dax = {
+		.sector = dax_iomap_sector(iomap, pos),
+		.size = PMD_SIZE,
+	};
+	long length = dax_map_atomic(bdev, &dax);
+	void *ret;
+
+	if (length < 0) /* dax_map_atomic() failed */
+		return VM_FAULT_FALLBACK;
+	if (length < PMD_SIZE)
+		goto unmap_fallback;
+	if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
+		goto unmap_fallback;
+	if (!pfn_t_devmap(dax.pfn))
+		goto unmap_fallback;
+
+	dax_unmap_atomic(bdev, &dax);
+
+	ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
+			RADIX_DAX_PMD);
+	if (IS_ERR(ret))
+		return VM_FAULT_FALLBACK;
+	*entryp = ret;
+
+	return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
+
+ unmap_fallback:
+	dax_unmap_atomic(bdev, &dax);
+	return VM_FAULT_FALLBACK;
+}
+
+static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
+		struct vm_fault *vmf, unsigned long address,
+		struct iomap *iomap, void **entryp)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	unsigned long pmd_addr = address & PMD_MASK;
+	struct page *zero_page;
+	spinlock_t *ptl;
+	pmd_t pmd_entry;
+	void *ret;
+
+	zero_page = mm_get_huge_zero_page(vma->vm_mm);
+
+	if (unlikely(!zero_page))
+		return VM_FAULT_FALLBACK;
+
+	ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
+			RADIX_DAX_PMD | RADIX_DAX_HZP);
+	if (IS_ERR(ret))
+		return VM_FAULT_FALLBACK;
+	*entryp = ret;
+
+	ptl = pmd_lock(vma->vm_mm, pmd);
+	if (!pmd_none(*pmd)) {
+		spin_unlock(ptl);
+		return VM_FAULT_FALLBACK;
+	}
+
+	pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
+	pmd_entry = pmd_mkhuge(pmd_entry);
+	set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
+	spin_unlock(ptl);
+	return VM_FAULT_NOPAGE;
+}
+
+int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
+		pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	unsigned long pmd_addr = address & PMD_MASK;
+	bool write = flags & FAULT_FLAG_WRITE;
+	unsigned int iomap_flags = write ? IOMAP_WRITE : 0;
+	struct inode *inode = mapping->host;
+	int result = VM_FAULT_FALLBACK;
+	struct iomap iomap = { 0 };
+	pgoff_t max_pgoff, pgoff;
+	struct vm_fault vmf;
+	void *entry;
+	loff_t pos;
+	int error;
+
+	/* Fall back to PTEs if we're going to COW */
+	if (write && !(vma->vm_flags & VM_SHARED))
+		goto fallback;
+
+	/* If the PMD would extend outside the VMA */
+	if (pmd_addr < vma->vm_start)
+		goto fallback;
+	if ((pmd_addr + PMD_SIZE) > vma->vm_end)
+		goto fallback;
+
+	/*
+	 * Check whether offset isn't beyond end of file now. Caller is
+	 * supposed to hold locks serializing us with truncate / punch hole so
+	 * this is a reliable test.
+	 */
+	pgoff = linear_page_index(vma, pmd_addr);
+	max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+
+	if (pgoff > max_pgoff)
+		return VM_FAULT_SIGBUS;
+
+	/* If the PMD would extend beyond the file size */
+	if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
+		goto fallback;
+
+	/*
+	 * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
+	 * PMD or a HZP entry.  If it can't (because a 4k page is already in
+	 * the tree, for instance), it will return -EEXIST and we just fall
+	 * back to 4k entries.
+	 */
+	entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+	if (IS_ERR(entry))
+		goto fallback;
+
+	/*
+	 * Note that we don't use iomap_apply here.  We aren't doing I/O, only
+	 * setting up a mapping, so really we're using iomap_begin() as a way
+	 * to look up our filesystem block.
+	 */
+	pos = (loff_t)pgoff << PAGE_SHIFT;
+	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
+	if (error)
+		goto unlock_entry;
+	if (iomap.offset + iomap.length < pos + PMD_SIZE)
+		goto finish_iomap;
+
+	vmf.pgoff = pgoff;
+	vmf.flags = flags;
+	vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
+
+	switch (iomap.type) {
+	case IOMAP_MAPPED:
+		result = dax_pmd_insert_mapping(vma, pmd, &vmf, address,
+				&iomap, pos, write, &entry);
+		break;
+	case IOMAP_UNWRITTEN:
+	case IOMAP_HOLE:
+		if (WARN_ON_ONCE(write))
+			goto finish_iomap;
+		result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
+				&entry);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+ finish_iomap:
+	if (ops->iomap_end) {
+		if (result == VM_FAULT_FALLBACK) {
+			ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags,
+					&iomap);
+		} else {
+			error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE,
+					iomap_flags, &iomap);
+			if (error)
+				result = VM_FAULT_FALLBACK;
+		}
+	}
+ unlock_entry:
+	put_locked_mapping_entry(mapping, pgoff, entry);
+ fallback:
+	if (result == VM_FAULT_FALLBACK) {
+		split_huge_pmd(vma, pmd, address);
+		count_vm_event(THP_FAULT_FALLBACK);
+	}
+	return result;
+}
+EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
+#endif /* CONFIG_FS_DAX_PMD */
 #endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index e9ea78c1cf98..8d1a5c47945f 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -9,20 +9,32 @@
 struct iomap_ops;
 
 /*
- * We use lowest available bit in exceptional entry for locking, other two
- * bits to determine entry type. In total 3 special bits.
+ * We use lowest available bit in exceptional entry for locking, one bit for
+ * the entry size (PMD) and two more to tell us if the entry is a huge zero
+ * page (HZP) or an empty entry that is just used for locking.  In total four
+ * special bits.
+ *
+ * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the HZP and
+ * EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
+ * block allocation.
  */
-#define RADIX_DAX_SHIFT	(RADIX_TREE_EXCEPTIONAL_SHIFT + 3)
+#define RADIX_DAX_SHIFT	(RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
 #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
-#define RADIX_DAX_PTE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
-#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
-#define RADIX_DAX_TYPE_MASK (RADIX_DAX_PTE | RADIX_DAX_PMD)
-#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_TYPE_MASK)
-#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
-#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
-		RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE) | \
-		RADIX_TREE_EXCEPTIONAL_ENTRY))
+#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
+#define RADIX_DAX_HZP (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
+#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
 
+static inline unsigned long dax_radix_sector(void *entry)
+{
+	return (unsigned long)entry >> RADIX_DAX_SHIFT;
+}
+
+static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
+{
+	return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
+			((unsigned long)sector << RADIX_DAX_SHIFT) |
+			RADIX_DAX_ENTRY_LOCK);
+}
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		struct iomap_ops *ops);
@@ -67,6 +79,27 @@ static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 	return VM_FAULT_FALLBACK;
 }
 
+#ifdef CONFIG_FS_DAX_PMD
+static inline unsigned int dax_radix_order(void *entry)
+{
+	if ((unsigned long)entry & RADIX_DAX_PMD)
+		return PMD_SHIFT - PAGE_SHIFT;
+	return 0;
+}
+int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
+		pmd_t *pmd, unsigned int flags, struct iomap_ops *ops);
+#else
+static inline unsigned int dax_radix_order(void *entry)
+{
+	return 0;
+}
+static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmd, unsigned int flags,
+		struct iomap_ops *ops)
+{
+	return VM_FAULT_FALLBACK;
+}
+#endif
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
 #define dax_mkwrite(vma, vmf, gb)	dax_fault(vma, vmf, gb)
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 1ffb7dcd1b5d..00ab94a882de 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -137,8 +137,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 		} else {
 			/* DAX can replace empty locked entry with a hole */
 			WARN_ON_ONCE(p !=
-				(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
-					 RADIX_DAX_ENTRY_LOCK));
+				dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
 			/* DAX accounts exceptional entries as normal pages */
 			if (node)
 				workingset_node_pages_dec(node);
-- 
cgit v1.2.3


From b57d74aff9ab92fbfb7c197c384d1adfa2827b2e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 1 Sep 2016 10:20:33 -0600
Subject: writeback: track if we're sleeping on progress in
 balance_dirty_pages()

Note in the bdi_writeback structure whenever a task ends up sleeping
waiting for progress. We can use that information in the lower layers
to increase the priority of writes.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 include/linux/backing-dev-defs.h | 2 ++
 mm/backing-dev.c                 | 1 +
 mm/page-writeback.c              | 1 +
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index c357f27d5483..dc5f76d7f648 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -116,6 +116,8 @@ struct bdi_writeback {
 	struct list_head work_list;
 	struct delayed_work dwork;	/* work item used for writeback */
 
+	unsigned long dirty_sleep;	/* last wait */
+
 	struct list_head bdi_node;	/* anchored at bdi->wb_list */
 
 #ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8fde443f36d7..3bfed5ab2475 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -310,6 +310,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
 	spin_lock_init(&wb->work_lock);
 	INIT_LIST_HEAD(&wb->work_list);
 	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
+	wb->dirty_sleep = jiffies;
 
 	wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
 	if (!wb->congested)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 439cc63ad903..52e2f8e3b472 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1778,6 +1778,7 @@ pause:
 					  pause,
 					  start_time);
 		__set_current_state(TASK_KILLABLE);
+		wb->dirty_sleep = now;
 		io_schedule_timeout(pause);
 
 		current->dirty_paused_when = now + pause;
-- 
cgit v1.2.3


From 7392b4bb702b05749539ff0936e94976248240c9 Mon Sep 17 00:00:00 2001
From: "monk.liu" <monk.liu@amd.com>
Date: Fri, 4 Nov 2016 16:16:09 -0400
Subject: dma-buf: return index of the first signaled fence (v2)

Return the index of the first signaled fence.  This information
is useful in some APIs like Vulkan.

v2: rebase on drm-next (fence -> dma_fence)

Signed-off-by: monk.liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
 [sumits: fix warnings]
Link: http://patchwork.freedesktop.org/patch/msgid/1478290570-30982-1-git-send-email-alexander.deucher@amd.com
---
 drivers/dma-buf/dma-fence.c | 21 ++++++++++++++++-----
 include/linux/dma-fence.h   |  3 ++-
 2 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 3a7bf009c21c..cda40cbd8c43 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -403,14 +403,18 @@ out:
 EXPORT_SYMBOL(dma_fence_default_wait);
 
 static bool
-dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count)
+dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count,
+			    uint32_t *idx)
 {
 	int i;
 
 	for (i = 0; i < count; ++i) {
 		struct dma_fence *fence = fences[i];
-		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+			if (idx)
+				*idx = i;
 			return true;
+		}
 	}
 	return false;
 }
@@ -422,6 +426,8 @@ dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count)
  * @count:	[in]	number of fences to wait on
  * @intr:	[in]	if true, do an interruptible wait
  * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ * @idx:       [out]	the first signaled fence index, meaningful only on
+ *			positive return
  *
  * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
  * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
@@ -433,7 +439,7 @@ dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count)
  */
 signed long
 dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
-			   bool intr, signed long timeout)
+			   bool intr, signed long timeout, uint32_t *idx)
 {
 	struct default_wait_cb *cb;
 	signed long ret = timeout;
@@ -444,8 +450,11 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 
 	if (timeout == 0) {
 		for (i = 0; i < count; ++i)
-			if (dma_fence_is_signaled(fences[i]))
+			if (dma_fence_is_signaled(fences[i])) {
+				if (idx)
+					*idx = i;
 				return 1;
+			}
 
 		return 0;
 	}
@@ -468,6 +477,8 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 		if (dma_fence_add_callback(fence, &cb[i].base,
 					   dma_fence_default_wait_cb)) {
 			/* This fence is already signaled */
+			if (idx)
+				*idx = i;
 			goto fence_rm_cb;
 		}
 	}
@@ -478,7 +489,7 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 		else
 			set_current_state(TASK_UNINTERRUPTIBLE);
 
-		if (dma_fence_test_signaled_any(fences, count))
+		if (dma_fence_test_signaled_any(fences, count, idx))
 			break;
 
 		ret = schedule_timeout(ret);
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index ba60c043a5d3..fcf4b1971eba 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -382,7 +382,8 @@ signed long dma_fence_wait_timeout(struct dma_fence *,
 				   bool intr, signed long timeout);
 signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
 				       uint32_t count,
-				       bool intr, signed long timeout);
+				       bool intr, signed long timeout,
+				       uint32_t *idx);
 
 /**
  * dma_fence_wait - sleep until the fence gets signaled
-- 
cgit v1.2.3


From 9e5a7e22951bc12ee45cb617919d57b5efce56b5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 08:12:47 -0600
Subject: blk-mq: export blk_mq_map_queues

This will allow SCSI to have a single blk_mq_ops structure that either
lets the LLDD map the queues to PCIe MSIx vectors or use the default.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-mq-cpumap.c  | 1 +
 block/blk-mq.h         | 1 -
 include/linux/blk-mq.h | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 19b1d9c5f07e..8e61e8640e17 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -87,6 +87,7 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 	free_cpumask_var(cpus);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(blk_mq_map_queues);
 
 /*
  * We have no quick way of doing reverse lookups. This is only used at
diff --git a/block/blk-mq.h b/block/blk-mq.h
index e5d25249028c..5347f011e90d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -38,7 +38,6 @@ void blk_mq_disable_hotplug(void);
 /*
  * CPU -> queue mappings
  */
-int blk_mq_map_queues(struct blk_mq_tag_set *set);
 extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
 
 static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 535ab2e13d2e..6c0fb259581f 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -237,6 +237,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
 int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 
+int blk_mq_map_queues(struct blk_mq_tag_set *set);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
 /*
-- 
cgit v1.2.3


From feb3d79800ece19c18b979c5edd1c28755f59d07 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <vivek.gautam@codeaurora.org>
Date: Tue, 8 Nov 2016 15:37:48 +0530
Subject: scsi: ufs-qcom: phy/hcd: Refactoring phy clock handling

Add phy clock enable code to phy_power_on/off callbacks, and
remove explicit calls to enable these phy clocks from the
ufs-qcom hcd driver.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/phy/phy-qcom-ufs.c       | 36 ++++++++++++++++++------------------
 drivers/scsi/ufs/ufs-qcom.c      | 21 ++++++---------------
 include/linux/phy/phy-qcom-ufs.h | 18 ------------------
 3 files changed, 24 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/phy/phy-qcom-ufs.c b/drivers/phy/phy-qcom-ufs.c
index c5c29fef4c56..fdd9b901983f 100644
--- a/drivers/phy/phy-qcom-ufs.c
+++ b/drivers/phy/phy-qcom-ufs.c
@@ -361,10 +361,9 @@ out:
 	return ret;
 }
 
-int ufs_qcom_phy_enable_ref_clk(struct phy *generic_phy)
+static int ufs_qcom_phy_enable_ref_clk(struct ufs_qcom_phy *phy)
 {
 	int ret = 0;
-	struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy);
 
 	if (phy->is_ref_clk_enabled)
 		goto out;
@@ -411,7 +410,6 @@ out_disable_src:
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(ufs_qcom_phy_enable_ref_clk);
 
 static int ufs_qcom_phy_disable_vreg(struct device *dev,
 			      struct ufs_qcom_phy_vreg *vreg)
@@ -435,10 +433,8 @@ out:
 	return ret;
 }
 
-void ufs_qcom_phy_disable_ref_clk(struct phy *generic_phy)
+static void ufs_qcom_phy_disable_ref_clk(struct ufs_qcom_phy *phy)
 {
-	struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy);
-
 	if (phy->is_ref_clk_enabled) {
 		clk_disable_unprepare(phy->ref_clk);
 		/*
@@ -451,7 +447,6 @@ void ufs_qcom_phy_disable_ref_clk(struct phy *generic_phy)
 		phy->is_ref_clk_enabled = false;
 	}
 }
-EXPORT_SYMBOL_GPL(ufs_qcom_phy_disable_ref_clk);
 
 #define UFS_REF_CLK_EN	(1 << 5)
 
@@ -504,9 +499,8 @@ void ufs_qcom_phy_disable_dev_ref_clk(struct phy *generic_phy)
 EXPORT_SYMBOL_GPL(ufs_qcom_phy_disable_dev_ref_clk);
 
 /* Turn ON M-PHY RMMI interface clocks */
-int ufs_qcom_phy_enable_iface_clk(struct phy *generic_phy)
+static int ufs_qcom_phy_enable_iface_clk(struct ufs_qcom_phy *phy)
 {
-	struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy);
 	int ret = 0;
 
 	if (phy->is_iface_clk_enabled)
@@ -530,20 +524,16 @@ int ufs_qcom_phy_enable_iface_clk(struct phy *generic_phy)
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(ufs_qcom_phy_enable_iface_clk);
 
 /* Turn OFF M-PHY RMMI interface clocks */
-void ufs_qcom_phy_disable_iface_clk(struct phy *generic_phy)
+void ufs_qcom_phy_disable_iface_clk(struct ufs_qcom_phy *phy)
 {
-	struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy);
-
 	if (phy->is_iface_clk_enabled) {
 		clk_disable_unprepare(phy->tx_iface_clk);
 		clk_disable_unprepare(phy->rx_iface_clk);
 		phy->is_iface_clk_enabled = false;
 	}
 }
-EXPORT_SYMBOL_GPL(ufs_qcom_phy_disable_iface_clk);
 
 int ufs_qcom_phy_start_serdes(struct phy *generic_phy)
 {
@@ -661,13 +651,20 @@ int ufs_qcom_phy_power_on(struct phy *generic_phy)
 		goto out_disable_phy;
 	}
 
-	err = ufs_qcom_phy_enable_ref_clk(generic_phy);
+	err = ufs_qcom_phy_enable_iface_clk(phy_common);
 	if (err) {
-		dev_err(dev, "%s enable phy ref clock failed, err=%d\n",
+		dev_err(dev, "%s enable phy iface clock failed, err=%d\n",
 			__func__, err);
 		goto out_disable_pll;
 	}
 
+	err = ufs_qcom_phy_enable_ref_clk(phy_common);
+	if (err) {
+		dev_err(dev, "%s enable phy ref clock failed, err=%d\n",
+			__func__, err);
+		goto out_disable_iface_clk;
+	}
+
 	/* enable device PHY ref_clk pad rail */
 	if (phy_common->vddp_ref_clk.reg) {
 		err = ufs_qcom_phy_enable_vreg(dev,
@@ -683,7 +680,9 @@ int ufs_qcom_phy_power_on(struct phy *generic_phy)
 	goto out;
 
 out_disable_ref_clk:
-	ufs_qcom_phy_disable_ref_clk(generic_phy);
+	ufs_qcom_phy_disable_ref_clk(phy_common);
+out_disable_iface_clk:
+	ufs_qcom_phy_disable_iface_clk(phy_common);
 out_disable_pll:
 	ufs_qcom_phy_disable_vreg(dev, &phy_common->vdda_pll);
 out_disable_phy:
@@ -702,7 +701,8 @@ int ufs_qcom_phy_power_off(struct phy *generic_phy)
 	if (phy_common->vddp_ref_clk.reg)
 		ufs_qcom_phy_disable_vreg(phy_common->dev,
 					  &phy_common->vddp_ref_clk);
-	ufs_qcom_phy_disable_ref_clk(generic_phy);
+	ufs_qcom_phy_disable_ref_clk(phy_common);
+	ufs_qcom_phy_disable_iface_clk(phy_common);
 
 	ufs_qcom_phy_disable_vreg(phy_common->dev, &phy_common->vdda_pll);
 	ufs_qcom_phy_disable_vreg(phy_common->dev, &phy_common->vdda_phy);
diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index 3c4f602eecd2..5f70a35c053f 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -1114,17 +1114,8 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on,
 		return 0;
 
 	if (on && (status == POST_CHANGE)) {
-		err = ufs_qcom_phy_enable_iface_clk(host->generic_phy);
-		if (err)
-			goto out;
+		phy_power_on(host->generic_phy);
 
-		err = ufs_qcom_phy_enable_ref_clk(host->generic_phy);
-		if (err) {
-			dev_err(hba->dev, "%s enable phy ref clock failed, err=%d\n",
-				__func__, err);
-			ufs_qcom_phy_disable_iface_clk(host->generic_phy);
-			goto out;
-		}
 		/* enable the device ref clock for HS mode*/
 		if (ufshcd_is_hs_mode(&hba->pwr_info))
 			ufs_qcom_dev_ref_clk_ctrl(host, true);
@@ -1133,13 +1124,14 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on,
 			ufs_qcom_update_bus_bw_vote(host);
 
 	} else if (!on && (status == PRE_CHANGE)) {
-
-		/* M-PHY RMMI interface clocks can be turned off */
-		ufs_qcom_phy_disable_iface_clk(host->generic_phy);
-		if (!ufs_qcom_is_link_active(hba))
+		if (!ufs_qcom_is_link_active(hba)) {
 			/* disable device ref_clk */
 			ufs_qcom_dev_ref_clk_ctrl(host, false);
 
+			/* powering off PHY during aggressive clk gating */
+			phy_power_off(host->generic_phy);
+		}
+
 		vote = host->bus_vote.min_bw_vote;
 	}
 
@@ -1148,7 +1140,6 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on,
 		dev_err(hba->dev, "%s: set bus vote failed %d\n",
 				__func__, err);
 
-out:
 	return err;
 }
 
diff --git a/include/linux/phy/phy-qcom-ufs.h b/include/linux/phy/phy-qcom-ufs.h
index 9d18e9f948e9..35c070ea6ea3 100644
--- a/include/linux/phy/phy-qcom-ufs.h
+++ b/include/linux/phy/phy-qcom-ufs.h
@@ -17,22 +17,6 @@
 
 #include "phy.h"
 
-/**
- * ufs_qcom_phy_enable_ref_clk() - Enable the phy
- * ref clock.
- * @phy: reference to a generic phy
- *
- * returns 0 for success, and non-zero for error.
- */
-int ufs_qcom_phy_enable_ref_clk(struct phy *phy);
-
-/**
- * ufs_qcom_phy_disable_ref_clk() - Disable the phy
- * ref clock.
- * @phy: reference to a generic phy.
- */
-void ufs_qcom_phy_disable_ref_clk(struct phy *phy);
-
 /**
  * ufs_qcom_phy_enable_dev_ref_clk() - Enable the device
  * ref clock.
@@ -47,8 +31,6 @@ void ufs_qcom_phy_enable_dev_ref_clk(struct phy *phy);
  */
 void ufs_qcom_phy_disable_dev_ref_clk(struct phy *phy);
 
-int ufs_qcom_phy_enable_iface_clk(struct phy *phy);
-void ufs_qcom_phy_disable_iface_clk(struct phy *phy);
 int ufs_qcom_phy_start_serdes(struct phy *phy);
 int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes);
 int ufs_qcom_phy_calibrate_phy(struct phy *phy, bool is_rate_B);
-- 
cgit v1.2.3


From 3aeed5b573f97b4525841cc07c1e948227af389f Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 8 Nov 2016 16:34:57 -0800
Subject: Input: synaptics-rmi4 - move IRQ handling to rmi_driver

The attn IRQ is related to the chip, rather than the transport, so move
all handling of interrupts to the core driver. This also makes sure that
there are no races between interrupts and availability of the resources
used by the core driver.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_driver.c | 73 +++++++++++++++++++++++++++++++++++++---
 drivers/input/rmi4/rmi_i2c.c    | 74 +++--------------------------------------
 drivers/input/rmi4/rmi_spi.c    | 72 +++------------------------------------
 include/linux/rmi.h             |  7 ++--
 4 files changed, 83 insertions(+), 143 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 06feede3ce17..4f8d19794b01 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -17,6 +17,7 @@
 #include <linux/bitmap.h>
 #include <linux/delay.h>
 #include <linux/fs.h>
+#include <linux/irq.h>
 #include <linux/kconfig.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
@@ -136,7 +137,7 @@ static void process_one_interrupt(struct rmi_driver_data *data,
 	}
 }
 
-int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
+static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
 {
 	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
 	struct device *dev = &rmi_dev->dev;
@@ -181,7 +182,42 @@ int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rmi_process_interrupt_requests);
+
+static irqreturn_t rmi_irq_fn(int irq, void *dev_id)
+{
+	struct rmi_device *rmi_dev = dev_id;
+	int ret;
+
+	ret = rmi_process_interrupt_requests(rmi_dev);
+	if (ret)
+		rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev,
+			"Failed to process interrupt request: %d\n", ret);
+
+	return IRQ_HANDLED;
+}
+
+static int rmi_irq_init(struct rmi_device *rmi_dev)
+{
+	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
+	int irq_flags = irq_get_trigger_type(pdata->irq);
+	int ret;
+
+	if (!irq_flags)
+		irq_flags = IRQF_TRIGGER_LOW;
+
+	ret = devm_request_threaded_irq(&rmi_dev->dev, pdata->irq, NULL,
+					rmi_irq_fn, irq_flags | IRQF_ONESHOT,
+					dev_name(rmi_dev->xport->dev),
+					rmi_dev);
+	if (ret < 0) {
+		dev_err(&rmi_dev->dev, "Failed to register interrupt %d\n",
+			pdata->irq);
+
+		return ret;
+	}
+
+	return 0;
+}
 
 static int suspend_one_function(struct rmi_function *fn)
 {
@@ -802,8 +838,10 @@ err_put_fn:
 	return error;
 }
 
-int rmi_driver_suspend(struct rmi_device *rmi_dev)
+int rmi_driver_suspend(struct rmi_device *rmi_dev, bool enable_wake)
 {
+	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
+	int irq = pdata->irq;
 	int retval = 0;
 
 	retval = rmi_suspend_functions(rmi_dev);
@@ -811,14 +849,33 @@ int rmi_driver_suspend(struct rmi_device *rmi_dev)
 		dev_warn(&rmi_dev->dev, "Failed to suspend functions: %d\n",
 			retval);
 
+	disable_irq(irq);
+	if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) {
+		retval = enable_irq_wake(irq);
+		if (!retval)
+			dev_warn(&rmi_dev->dev,
+				 "Failed to enable irq for wake: %d\n",
+				 retval);
+	}
 	return retval;
 }
 EXPORT_SYMBOL_GPL(rmi_driver_suspend);
 
-int rmi_driver_resume(struct rmi_device *rmi_dev)
+int rmi_driver_resume(struct rmi_device *rmi_dev, bool clear_wake)
 {
+	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
+	int irq = pdata->irq;
 	int retval;
 
+	enable_irq(irq);
+	if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) {
+		retval = disable_irq_wake(irq);
+		if (!retval)
+			dev_warn(&rmi_dev->dev,
+				 "Failed to disable irq for wake: %d\n",
+				 retval);
+	}
+
 	retval = rmi_resume_functions(rmi_dev);
 	if (retval)
 		dev_warn(&rmi_dev->dev, "Failed to suspend functions: %d\n",
@@ -831,6 +888,10 @@ EXPORT_SYMBOL_GPL(rmi_driver_resume);
 static int rmi_driver_remove(struct device *dev)
 {
 	struct rmi_device *rmi_dev = to_rmi_device(dev);
+	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
+	int irq = pdata->irq;
+
+	disable_irq(irq);
 
 	rmi_free_function_list(rmi_dev);
 
@@ -1050,6 +1111,10 @@ static int rmi_driver_probe(struct device *dev)
 		}
 	}
 
+	retval = rmi_irq_init(rmi_dev);
+	if (retval < 0)
+		goto err_destroy_functions;
+
 	if (data->f01_container->dev.driver)
 		/* Driver already bound, so enable ATTN now. */
 		return enable_sensor(rmi_dev);
diff --git a/drivers/input/rmi4/rmi_i2c.c b/drivers/input/rmi4/rmi_i2c.c
index 6f2e0e4f0296..64a548822da4 100644
--- a/drivers/input/rmi4/rmi_i2c.c
+++ b/drivers/input/rmi4/rmi_i2c.c
@@ -9,7 +9,6 @@
 
 #include <linux/i2c.h>
 #include <linux/rmi.h>
-#include <linux/irq.h>
 #include <linux/of.h>
 #include <linux/delay.h>
 #include <linux/regulator/consumer.h>
@@ -35,8 +34,6 @@ struct rmi_i2c_xport {
 	struct mutex page_mutex;
 	int page;
 
-	int irq;
-
 	u8 *tx_buf;
 	size_t tx_buf_size;
 
@@ -177,42 +174,6 @@ static const struct rmi_transport_ops rmi_i2c_ops = {
 	.read_block	= rmi_i2c_read_block,
 };
 
-static irqreturn_t rmi_i2c_irq(int irq, void *dev_id)
-{
-	struct rmi_i2c_xport *rmi_i2c = dev_id;
-	struct rmi_device *rmi_dev = rmi_i2c->xport.rmi_dev;
-	int ret;
-
-	ret = rmi_process_interrupt_requests(rmi_dev);
-	if (ret)
-		rmi_dbg(RMI_DEBUG_XPORT, &rmi_dev->dev,
-			"Failed to process interrupt request: %d\n", ret);
-
-	return IRQ_HANDLED;
-}
-
-static int rmi_i2c_init_irq(struct i2c_client *client)
-{
-	struct rmi_i2c_xport *rmi_i2c = i2c_get_clientdata(client);
-	int irq_flags = irqd_get_trigger_type(irq_get_irq_data(rmi_i2c->irq));
-	int ret;
-
-	if (!irq_flags)
-		irq_flags = IRQF_TRIGGER_LOW;
-
-	ret = devm_request_threaded_irq(&client->dev, rmi_i2c->irq, NULL,
-			rmi_i2c_irq, irq_flags | IRQF_ONESHOT, client->name,
-			rmi_i2c);
-	if (ret < 0) {
-		dev_warn(&client->dev, "Failed to register interrupt %d\n",
-			rmi_i2c->irq);
-
-		return ret;
-	}
-
-	return 0;
-}
-
 #ifdef CONFIG_OF
 static const struct of_device_id rmi_i2c_of_match[] = {
 	{ .compatible = "syna,rmi4-i2c" },
@@ -240,8 +201,7 @@ static int rmi_i2c_probe(struct i2c_client *client,
 	if (!client->dev.of_node && client_pdata)
 		*pdata = *client_pdata;
 
-	if (client->irq > 0)
-		rmi_i2c->irq = client->irq;
+	pdata->irq = client->irq;
 
 	rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Probing %s.\n",
 			dev_name(&client->dev));
@@ -295,10 +255,6 @@ static int rmi_i2c_probe(struct i2c_client *client,
 		return retval;
 	}
 
-	retval = rmi_i2c_init_irq(client);
-	if (retval < 0)
-		return retval;
-
 	dev_info(&client->dev, "registered rmi i2c driver at %#04x.\n",
 			client->addr);
 	return 0;
@@ -322,18 +278,10 @@ static int rmi_i2c_suspend(struct device *dev)
 	struct rmi_i2c_xport *rmi_i2c = i2c_get_clientdata(client);
 	int ret;
 
-	ret = rmi_driver_suspend(rmi_i2c->xport.rmi_dev);
+	ret = rmi_driver_suspend(rmi_i2c->xport.rmi_dev, true);
 	if (ret)
 		dev_warn(dev, "Failed to resume device: %d\n", ret);
 
-	disable_irq(rmi_i2c->irq);
-	if (device_may_wakeup(&client->dev)) {
-		ret = enable_irq_wake(rmi_i2c->irq);
-		if (!ret)
-			dev_warn(dev, "Failed to enable irq for wake: %d\n",
-				ret);
-	}
-
 	regulator_bulk_disable(ARRAY_SIZE(rmi_i2c->supplies),
 			       rmi_i2c->supplies);
 
@@ -353,15 +301,7 @@ static int rmi_i2c_resume(struct device *dev)
 
 	msleep(rmi_i2c->startup_delay);
 
-	enable_irq(rmi_i2c->irq);
-	if (device_may_wakeup(&client->dev)) {
-		ret = disable_irq_wake(rmi_i2c->irq);
-		if (!ret)
-			dev_warn(dev, "Failed to disable irq for wake: %d\n",
-				ret);
-	}
-
-	ret = rmi_driver_resume(rmi_i2c->xport.rmi_dev);
+	ret = rmi_driver_resume(rmi_i2c->xport.rmi_dev, true);
 	if (ret)
 		dev_warn(dev, "Failed to resume device: %d\n", ret);
 
@@ -376,12 +316,10 @@ static int rmi_i2c_runtime_suspend(struct device *dev)
 	struct rmi_i2c_xport *rmi_i2c = i2c_get_clientdata(client);
 	int ret;
 
-	ret = rmi_driver_suspend(rmi_i2c->xport.rmi_dev);
+	ret = rmi_driver_suspend(rmi_i2c->xport.rmi_dev, false);
 	if (ret)
 		dev_warn(dev, "Failed to resume device: %d\n", ret);
 
-	disable_irq(rmi_i2c->irq);
-
 	regulator_bulk_disable(ARRAY_SIZE(rmi_i2c->supplies),
 			       rmi_i2c->supplies);
 
@@ -401,9 +339,7 @@ static int rmi_i2c_runtime_resume(struct device *dev)
 
 	msleep(rmi_i2c->startup_delay);
 
-	enable_irq(rmi_i2c->irq);
-
-	ret = rmi_driver_resume(rmi_i2c->xport.rmi_dev);
+	ret = rmi_driver_resume(rmi_i2c->xport.rmi_dev, false);
 	if (ret)
 		dev_warn(dev, "Failed to resume device: %d\n", ret);
 
diff --git a/drivers/input/rmi4/rmi_spi.c b/drivers/input/rmi4/rmi_spi.c
index 55bd1b34970c..f3e9e488635c 100644
--- a/drivers/input/rmi4/rmi_spi.c
+++ b/drivers/input/rmi4/rmi_spi.c
@@ -12,7 +12,6 @@
 #include <linux/rmi.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
-#include <linux/irq.h>
 #include <linux/of.h>
 #include "rmi_driver.h"
 
@@ -44,8 +43,6 @@ struct rmi_spi_xport {
 	struct mutex page_mutex;
 	int page;
 
-	int irq;
-
 	u8 *rx_buf;
 	u8 *tx_buf;
 	int xfer_buf_size;
@@ -326,41 +323,6 @@ static const struct rmi_transport_ops rmi_spi_ops = {
 	.read_block	= rmi_spi_read_block,
 };
 
-static irqreturn_t rmi_spi_irq(int irq, void *dev_id)
-{
-	struct rmi_spi_xport *rmi_spi = dev_id;
-	struct rmi_device *rmi_dev = rmi_spi->xport.rmi_dev;
-	int ret;
-
-	ret = rmi_process_interrupt_requests(rmi_dev);
-	if (ret)
-		rmi_dbg(RMI_DEBUG_XPORT, &rmi_dev->dev,
-			"Failed to process interrupt request: %d\n", ret);
-
-	return IRQ_HANDLED;
-}
-
-static int rmi_spi_init_irq(struct spi_device *spi)
-{
-	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
-	int irq_flags = irqd_get_trigger_type(irq_get_irq_data(rmi_spi->irq));
-	int ret;
-
-	if (!irq_flags)
-		irq_flags = IRQF_TRIGGER_LOW;
-
-	ret = devm_request_threaded_irq(&spi->dev, rmi_spi->irq, NULL,
-			rmi_spi_irq, irq_flags | IRQF_ONESHOT,
-			dev_name(&spi->dev), rmi_spi);
-	if (ret < 0) {
-		dev_warn(&spi->dev, "Failed to register interrupt %d\n",
-			rmi_spi->irq);
-		return ret;
-	}
-
-	return 0;
-}
-
 #ifdef CONFIG_OF
 static int rmi_spi_of_probe(struct spi_device *spi,
 			struct rmi_device_platform_data *pdata)
@@ -433,8 +395,7 @@ static int rmi_spi_probe(struct spi_device *spi)
 		return retval;
 	}
 
-	if (spi->irq > 0)
-		rmi_spi->irq = spi->irq;
+	pdata->irq = spi->irq;
 
 	rmi_spi->spi = spi;
 	mutex_init(&rmi_spi->page_mutex);
@@ -465,10 +426,6 @@ static int rmi_spi_probe(struct spi_device *spi)
 		return retval;
 	}
 
-	retval = rmi_spi_init_irq(spi);
-	if (retval < 0)
-		return retval;
-
 	dev_info(&spi->dev, "registered RMI SPI driver\n");
 	return 0;
 }
@@ -489,17 +446,10 @@ static int rmi_spi_suspend(struct device *dev)
 	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
 	int ret;
 
-	ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev);
+	ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev, true);
 	if (ret)
 		dev_warn(dev, "Failed to resume device: %d\n", ret);
 
-	disable_irq(rmi_spi->irq);
-	if (device_may_wakeup(&spi->dev)) {
-		ret = enable_irq_wake(rmi_spi->irq);
-		if (!ret)
-			dev_warn(dev, "Failed to enable irq for wake: %d\n",
-				ret);
-	}
 	return ret;
 }
 
@@ -509,15 +459,7 @@ static int rmi_spi_resume(struct device *dev)
 	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
 	int ret;
 
-	enable_irq(rmi_spi->irq);
-	if (device_may_wakeup(&spi->dev)) {
-		ret = disable_irq_wake(rmi_spi->irq);
-		if (!ret)
-			dev_warn(dev, "Failed to disable irq for wake: %d\n",
-				ret);
-	}
-
-	ret = rmi_driver_resume(rmi_spi->xport.rmi_dev);
+	ret = rmi_driver_resume(rmi_spi->xport.rmi_dev, true);
 	if (ret)
 		dev_warn(dev, "Failed to resume device: %d\n", ret);
 
@@ -532,12 +474,10 @@ static int rmi_spi_runtime_suspend(struct device *dev)
 	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
 	int ret;
 
-	ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev);
+	ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev, false);
 	if (ret)
 		dev_warn(dev, "Failed to resume device: %d\n", ret);
 
-	disable_irq(rmi_spi->irq);
-
 	return 0;
 }
 
@@ -547,9 +487,7 @@ static int rmi_spi_runtime_resume(struct device *dev)
 	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
 	int ret;
 
-	enable_irq(rmi_spi->irq);
-
-	ret = rmi_driver_resume(rmi_spi->xport.rmi_dev);
+	ret = rmi_driver_resume(rmi_spi->xport.rmi_dev, false);
 	if (ret)
 		dev_warn(dev, "Failed to resume device: %d\n", ret);
 
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index e0aca1476001..5944e6c2470d 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -204,9 +204,11 @@ struct rmi_device_platform_data_spi {
  * @reset_delay_ms - after issuing a reset command to the touch sensor, the
  * driver waits a few milliseconds to give the firmware a chance to
  * to re-initialize.  You can override the default wait period here.
+ * @irq: irq associated with the attn gpio line, or negative
  */
 struct rmi_device_platform_data {
 	int reset_delay_ms;
+	int irq;
 
 	struct rmi_device_platform_data_spi spi_data;
 
@@ -352,8 +354,7 @@ struct rmi_driver_data {
 
 int rmi_register_transport_device(struct rmi_transport_dev *xport);
 void rmi_unregister_transport_device(struct rmi_transport_dev *xport);
-int rmi_process_interrupt_requests(struct rmi_device *rmi_dev);
 
-int rmi_driver_suspend(struct rmi_device *rmi_dev);
-int rmi_driver_resume(struct rmi_device *rmi_dev);
+int rmi_driver_suspend(struct rmi_device *rmi_dev, bool enable_wake);
+int rmi_driver_resume(struct rmi_device *rmi_dev, bool clear_wake);
 #endif
-- 
cgit v1.2.3


From 2775e523246e11c5ce90b69226c5e67aa43e64a5 Mon Sep 17 00:00:00 2001
From: Andrew Duggan <aduggan@synaptics.com>
Date: Tue, 8 Nov 2016 16:48:48 -0800
Subject: Input: synaptics-rmi4 - add parameters for dribble packets and palm
 detect gesture

The rmi_f11 driver currently disables dribble packets and the palm detect
gesture for all devices. This patch creates a parameter in the 2d sensor
platform data for controlling this functionality on a per device basis.

For more information on dribble packets:
Commit 05ba999fcabb ("HID: rmi: disable dribble packets on Synaptics
touchpads")

For more information on the palm detect gesture:
Commit f097deef59a6 ("HID: rmi: disable palm detect gesture when present")

Signed-off-by: Andrew Duggan <aduggan@synaptics.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_2d_sensor.h |  2 ++
 drivers/input/rmi4/rmi_f01.c       |  6 +++---
 drivers/input/rmi4/rmi_f11.c       | 32 ++++++++++++++++++++++++++++----
 include/linux/rmi.h                | 21 +++++++++++++--------
 4 files changed, 46 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/rmi_2d_sensor.h b/drivers/input/rmi4/rmi_2d_sensor.h
index 77fcdfef003c..c871bef4dac0 100644
--- a/drivers/input/rmi4/rmi_2d_sensor.h
+++ b/drivers/input/rmi4/rmi_2d_sensor.h
@@ -67,6 +67,8 @@ struct rmi_2d_sensor {
 	u8 report_rel;
 	u8 x_mm;
 	u8 y_mm;
+	enum rmi_reg_state dribble;
+	enum rmi_reg_state palm_detect;
 };
 
 int rmi_2d_sensor_of_probe(struct device *dev,
diff --git a/drivers/input/rmi4/rmi_f01.c b/drivers/input/rmi4/rmi_f01.c
index fac81fc9bcf6..2cfa9f64acfb 100644
--- a/drivers/input/rmi4/rmi_f01.c
+++ b/drivers/input/rmi4/rmi_f01.c
@@ -327,12 +327,12 @@ static int rmi_f01_probe(struct rmi_function *fn)
 	}
 
 	switch (pdata->power_management.nosleep) {
-	case RMI_F01_NOSLEEP_DEFAULT:
+	case RMI_REG_STATE_DEFAULT:
 		break;
-	case RMI_F01_NOSLEEP_OFF:
+	case RMI_REG_STATE_OFF:
 		f01->device_control.ctrl0 &= ~RMI_F01_CTRL0_NOSLEEP_BIT;
 		break;
-	case RMI_F01_NOSLEEP_ON:
+	case RMI_REG_STATE_ON:
 		f01->device_control.ctrl0 |= RMI_F01_CTRL0_NOSLEEP_BIT;
 		break;
 	}
diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
index 3218742d2d8c..c252d405df4c 100644
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -1142,6 +1142,8 @@ static int rmi_f11_initialize(struct rmi_function *fn)
 	sensor->topbuttonpad = f11->sensor_pdata.topbuttonpad;
 	sensor->kernel_tracking = f11->sensor_pdata.kernel_tracking;
 	sensor->dmax = f11->sensor_pdata.dmax;
+	sensor->dribble = f11->sensor_pdata.dribble;
+	sensor->palm_detect = f11->sensor_pdata.palm_detect;
 
 	if (f11->sens_query.has_physical_props) {
 		sensor->x_mm = f11->sens_query.x_sensor_size_mm;
@@ -1209,11 +1211,33 @@ static int rmi_f11_initialize(struct rmi_function *fn)
 		ctrl->ctrl0_11[RMI_F11_DELTA_Y_THRESHOLD] =
 			sensor->axis_align.delta_y_threshold;
 
-	if (f11->sens_query.has_dribble)
-		ctrl->ctrl0_11[0] = ctrl->ctrl0_11[0] & ~BIT(6);
+	if (f11->sens_query.has_dribble) {
+		switch (sensor->dribble) {
+		case RMI_REG_STATE_OFF:
+			ctrl->ctrl0_11[0] &= ~BIT(6);
+			break;
+		case RMI_REG_STATE_ON:
+			ctrl->ctrl0_11[0] |= BIT(6);
+			break;
+		case RMI_REG_STATE_DEFAULT:
+		default:
+			break;
+		}
+	}
 
-	if (f11->sens_query.has_palm_det)
-		ctrl->ctrl0_11[11] = ctrl->ctrl0_11[11] & ~BIT(0);
+	if (f11->sens_query.has_palm_det) {
+		switch (sensor->palm_detect) {
+		case RMI_REG_STATE_OFF:
+			ctrl->ctrl0_11[11] &= ~BIT(0);
+			break;
+		case RMI_REG_STATE_ON:
+			ctrl->ctrl0_11[11] |= BIT(0);
+			break;
+		case RMI_REG_STATE_DEFAULT:
+		default:
+			break;
+		}
+	}
 
 	rc = f11_write_control_regs(fn, &f11->sens_query,
 			   &f11->dev_controls, fn->fd.query_base_addr);
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index 5944e6c2470d..ac904bb439a5 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -99,6 +99,8 @@ struct rmi_2d_sensor_platform_data {
 	bool topbuttonpad;
 	bool kernel_tracking;
 	int dmax;
+	int dribble;
+	int palm_detect;
 };
 
 /**
@@ -116,14 +118,17 @@ struct rmi_f30_data {
 	bool disable;
 };
 
-/**
- * struct rmi_f01_power - override default power management settings.
- *
+
+/*
+ * Set the state of a register
+ *	DEFAULT - use the default value set by the firmware config
+ *	OFF - explicitly disable the register
+ *	ON - explicitly enable the register
  */
-enum rmi_f01_nosleep {
-	RMI_F01_NOSLEEP_DEFAULT = 0,
-	RMI_F01_NOSLEEP_OFF = 1,
-	RMI_F01_NOSLEEP_ON = 2
+enum rmi_reg_state {
+	RMI_REG_STATE_DEFAULT = 0,
+	RMI_REG_STATE_OFF = 1,
+	RMI_REG_STATE_ON = 2
 };
 
 /**
@@ -143,7 +148,7 @@ enum rmi_f01_nosleep {
  * when the touch sensor is in doze mode, in units of 10ms.
  */
 struct rmi_f01_power_management {
-	enum rmi_f01_nosleep nosleep;
+	enum rmi_reg_state nosleep;
 	u8 wakeup_threshold;
 	u8 doze_holdoff;
 	u8 doze_interval;
-- 
cgit v1.2.3


From 20e407e195b29a4f5a18d713a61f54a75f992bd5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Nov 2016 17:15:01 -0800
Subject: genirq/affinity: Introduce struct irq_affinity

Some drivers (various network and RDMA adapter for example) have a MSI-X
vector layout where most of the vectors are used for I/O queues and should
have CPU affinity assigned to them, but some (usually 1 but sometimes more)
at the beginning or end are used for low-performance admin or configuration
work and should not have any explicit affinity assigned to them.

Add a new irq_affinity structure, which will be passed through a variant of
pci_irq_alloc_vectors that allows to specify these requirements (and is
extensible to any future quirks in that area) so that the core IRQ affinity
algorithm can take this quirks into account.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-2-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 72f0721f75e7..6b5268688a81 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -232,6 +232,18 @@ struct irq_affinity_notify {
 	void (*release)(struct kref *ref);
 };
 
+/**
+ * struct irq_affinity - Description for automatic irq affinity assignements
+ * @pre_vectors:	Don't apply affinity to @pre_vectors at beginning of
+ *			the MSI(-X) vector space
+ * @post_vectors:	Don't apply affinity to @post_vectors at end of
+ *			the MSI(-X) vector space
+ */
+struct irq_affinity {
+	int	pre_vectors;
+	int	post_vectors;
+};
+
 #if defined(CONFIG_SMP)
 
 extern cpumask_var_t irq_default_affinity;
-- 
cgit v1.2.3


From 212bd846223c718b6577d4df16fd8d05a55ad914 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Nov 2016 17:15:02 -0800
Subject: genirq/affinity: Handle pre/post vectors in
 irq_calc_affinity_vectors()

Only calculate the affinity for the main I/O vectors, and skip the pre or
post vectors specified by struct irq_affinity.

Also remove the irq_affinity cpumask argument that has never been used.  If
we ever need it in the future we can pass it through struct irq_affinity.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-3-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pci/msi.c         |  8 ++++----
 include/linux/interrupt.h |  4 ++--
 kernel/irq/affinity.c     | 24 ++++++++++--------------
 3 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index ad70507cfb56..dad2da7cf80e 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1061,6 +1061,7 @@ EXPORT_SYMBOL(pci_msi_enabled);
 static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
 		unsigned int flags)
 {
+	static const struct irq_affinity default_affd;
 	bool affinity = flags & PCI_IRQ_AFFINITY;
 	int nvec;
 	int rc;
@@ -1091,8 +1092,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
 
 	for (;;) {
 		if (affinity) {
-			nvec = irq_calc_affinity_vectors(dev->irq_affinity,
-					nvec);
+			nvec = irq_calc_affinity_vectors(nvec, &default_affd);
 			if (nvec < minvec)
 				return -ENOSPC;
 		}
@@ -1132,6 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
 		struct msix_entry *entries, int minvec, int maxvec,
 		unsigned int flags)
 {
+	static const struct irq_affinity default_affd;
 	bool affinity = flags & PCI_IRQ_AFFINITY;
 	int rc, nvec = maxvec;
 
@@ -1140,8 +1141,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
 
 	for (;;) {
 		if (affinity) {
-			nvec = irq_calc_affinity_vectors(dev->irq_affinity,
-					nvec);
+			nvec = irq_calc_affinity_vectors(nvec, &default_affd);
 			if (nvec < minvec)
 				return -ENOSPC;
 		}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 6b5268688a81..9081f23bc0ff 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -291,7 +291,7 @@ extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
 struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec);
-int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec);
+int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd);
 
 #else /* CONFIG_SMP */
 
@@ -331,7 +331,7 @@ irq_create_affinity_masks(const struct cpumask *affinity, int nvec)
 }
 
 static inline int
-irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
 {
 	return maxvec;
 }
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 17f51d63da56..8d9259727cb4 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -131,24 +131,20 @@ out:
 }
 
 /**
- * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
- * @affinity:		The affinity mask to spread. If NULL cpu_online_mask
- *			is used
- * @maxvec:		The maximum number of vectors available
+ * irq_calc_affinity_vectors - Calculate the optimal number of vectors
+ * @maxvec:	The maximum number of vectors available
+ * @affd:	Description of the affinity requirements
  */
-int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
 {
-	int cpus, ret;
+	int resv = affd->pre_vectors + affd->post_vectors;
+	int vecs = maxvec - resv;
+	int cpus;
 
 	/* Stabilize the cpumasks */
 	get_online_cpus();
-	/* If the supplied affinity mask is NULL, use cpu online mask */
-	if (!affinity)
-		affinity = cpu_online_mask;
-
-	cpus = cpumask_weight(affinity);
-	ret = (cpus < maxvec) ? cpus : maxvec;
-
+	cpus = cpumask_weight(cpu_online_mask);
 	put_online_cpus();
-	return ret;
+
+	return min(cpus, vecs) + resv;
 }
-- 
cgit v1.2.3


From 67c93c218dc5d1b45d547771f1fdb44a381e1faf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Nov 2016 17:15:03 -0800
Subject: genirq/affinity: Handle pre/post vectors in
 irq_create_affinity_masks()

Only calculate the affinity for the main I/O vectors, and skip the
pre or post vectors specified by struct irq_affinity.

Also remove the irq_affinity cpumask argument that has never been used.
If we ever need it in the future we can pass it through struct
irq_affinity.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-4-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pci/msi.c         |  6 ++++--
 include/linux/interrupt.h |  4 ++--
 kernel/irq/affinity.c     | 46 +++++++++++++++++++++++++---------------------
 3 files changed, 31 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index dad2da7cf80e..f4a108b59336 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -553,12 +553,13 @@ error_attrs:
 static struct msi_desc *
 msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity)
 {
+	static const struct irq_affinity default_affd;
 	struct cpumask *masks = NULL;
 	struct msi_desc *entry;
 	u16 control;
 
 	if (affinity) {
-		masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+		masks = irq_create_affinity_masks(nvec, &default_affd);
 		if (!masks)
 			pr_err("Unable to allocate affinity masks, ignoring\n");
 	}
@@ -692,12 +693,13 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 			      struct msix_entry *entries, int nvec,
 			      bool affinity)
 {
+	static const struct irq_affinity default_affd;
 	struct cpumask *curmsk, *masks = NULL;
 	struct msi_desc *entry;
 	int ret, i;
 
 	if (affinity) {
-		masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+		masks = irq_create_affinity_masks(nvec, &default_affd);
 		if (!masks)
 			pr_err("Unable to allocate affinity masks, ignoring\n");
 	}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9081f23bc0ff..53144e78a369 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -290,7 +290,7 @@ extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
 extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
-struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec);
+struct cpumask *irq_create_affinity_masks(int nvec, const struct irq_affinity *affd);
 int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd);
 
 #else /* CONFIG_SMP */
@@ -325,7 +325,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
 }
 
 static inline struct cpumask *
-irq_create_affinity_masks(const struct cpumask *affinity, int nvec)
+irq_create_affinity_masks(int nvec, const struct irq_affinity *affd)
 {
 	return NULL;
 }
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 8d9259727cb4..17360bd9619b 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -51,16 +51,16 @@ static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
 
 /**
  * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
- * @affinity:		The affinity mask to spread. If NULL cpu_online_mask
- *			is used
- * @nvecs:		The number of vectors
+ * @nvecs:	The total number of vectors
+ * @affd:	Description of the affinity requirements
  *
  * Returns the masks pointer or NULL if allocation failed.
  */
-struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
-					  int nvec)
+struct cpumask *
+irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 {
-	int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
+	int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec;
+	int affv = nvecs - affd->pre_vectors - affd->post_vectors;
 	nodemask_t nodemsk = NODE_MASK_NONE;
 	struct cpumask *masks;
 	cpumask_var_t nmsk;
@@ -68,46 +68,46 @@ struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
 	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 		return NULL;
 
-	masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
+	masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
 	if (!masks)
 		goto out;
 
+	/* Fill out vectors at the beginning that don't need affinity */
+	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
+		cpumask_copy(masks + curvec, cpu_possible_mask);
+
 	/* Stabilize the cpumasks */
 	get_online_cpus();
-	/* If the supplied affinity mask is NULL, use cpu online mask */
-	if (!affinity)
-		affinity = cpu_online_mask;
-
-	nodes = get_nodes_in_cpumask(affinity, &nodemsk);
+	nodes = get_nodes_in_cpumask(cpu_online_mask, &nodemsk);
 
 	/*
 	 * If the number of nodes in the mask is less than or equal the
 	 * number of vectors we just spread the vectors across the nodes.
 	 */
-	if (nvec <= nodes) {
+	if (affv <= nodes) {
 		for_each_node_mask(n, nodemsk) {
 			cpumask_copy(masks + curvec, cpumask_of_node(n));
-			if (++curvec == nvec)
+			if (++curvec == affv)
 				break;
 		}
-		goto outonl;
+		goto done;
 	}
 
 	/* Spread the vectors per node */
-	vecs_per_node = nvec / nodes;
+	vecs_per_node = affv / nodes;
 	/* Account for rounding errors */
-	extra_vecs = nvec - (nodes * vecs_per_node);
+	extra_vecs = affv - (nodes * vecs_per_node);
 
 	for_each_node_mask(n, nodemsk) {
 		int ncpus, v, vecs_to_assign = vecs_per_node;
 
 		/* Get the cpus on this node which are in the mask */
-		cpumask_and(nmsk, affinity, cpumask_of_node(n));
+		cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
 
 		/* Calculate the number of cpus per vector */
 		ncpus = cpumask_weight(nmsk);
 
-		for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
+		for (v = 0; curvec < affv && v < vecs_to_assign; curvec++, v++) {
 			cpus_per_vec = ncpus / vecs_to_assign;
 
 			/* Account for extra vectors to compensate rounding errors */
@@ -119,12 +119,16 @@ struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
 			irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
 		}
 
-		if (curvec >= nvec)
+		if (curvec >= affv)
 			break;
 	}
 
-outonl:
+done:
 	put_online_cpus();
+
+	/* Fill out vectors at the end that don't need affinity */
+	for (; curvec < nvecs; curvec++)
+		cpumask_copy(masks + curvec, cpu_possible_mask);
 out:
 	free_cpumask_var(nmsk);
 	return masks;
-- 
cgit v1.2.3


From 402723ad5c625ee052432698ae5e56b02d38d4ec Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Nov 2016 17:15:05 -0800
Subject: PCI/MSI: Provide pci_alloc_irq_vectors_affinity()

This is a variant of pci_alloc_irq_vectors() that allows passing a struct
irq_affinity to provide fine-grained IRQ affinity control.

For now this means being able to exclude vectors at the beginning or end of
the MSI vector space, but it could also be used for any other quirks needed
in the future (e.g. more vectors than CPUs, or excluding CPUs from the
spreading).

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-6-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pci/msi.c   | 20 +++++++++++++-------
 include/linux/pci.h | 24 +++++++++++++++++++-----
 2 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 512f388a74f2..dd27f73a45fc 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1179,11 +1179,12 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 EXPORT_SYMBOL(pci_enable_msix_range);
 
 /**
- * pci_alloc_irq_vectors - allocate multiple IRQs for a device
+ * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device
  * @dev:		PCI device to operate on
  * @min_vecs:		minimum number of vectors required (must be >= 1)
  * @max_vecs:		maximum (desired) number of vectors
  * @flags:		flags or quirks for the allocation
+ * @affd:		optional description of the affinity requirements
  *
  * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI
  * vectors if available, and fall back to a single legacy vector
@@ -1195,15 +1196,20 @@ EXPORT_SYMBOL(pci_enable_msix_range);
  * To get the Linux IRQ number used for a vector that can be passed to
  * request_irq() use the pci_irq_vector() helper.
  */
-int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
-		unsigned int max_vecs, unsigned int flags)
+int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+				   unsigned int max_vecs, unsigned int flags,
+				   const struct irq_affinity *affd)
 {
 	static const struct irq_affinity msi_default_affd;
-	const struct irq_affinity *affd = NULL;
 	int vecs = -ENOSPC;
 
-	if (flags & PCI_IRQ_AFFINITY)
-		affd = &msi_default_affd;
+	if (flags & PCI_IRQ_AFFINITY) {
+		if (!affd)
+			affd = &msi_default_affd;
+	} else {
+		if (WARN_ON(affd))
+			affd = NULL;
+	}
 
 	if (flags & PCI_IRQ_MSIX) {
 		vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
@@ -1226,7 +1232,7 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
 
 	return vecs;
 }
-EXPORT_SYMBOL(pci_alloc_irq_vectors);
+EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity);
 
 /**
  * pci_free_irq_vectors - free previously allocated IRQs for a device
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e49f70dbd9b..7090f5ff7252 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -244,6 +244,7 @@ struct pci_cap_saved_state {
 	struct pci_cap_saved_data cap;
 };
 
+struct irq_affinity;
 struct pcie_link_state;
 struct pci_vpd;
 struct pci_sriov;
@@ -1310,8 +1311,10 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev,
 		return rc;
 	return 0;
 }
-int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
-		unsigned int max_vecs, unsigned int flags);
+int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+				   unsigned int max_vecs, unsigned int flags,
+				   const struct irq_affinity *affd);
+
 void pci_free_irq_vectors(struct pci_dev *dev);
 int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
 const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
@@ -1339,14 +1342,17 @@ static inline int pci_enable_msix_range(struct pci_dev *dev,
 static inline int pci_enable_msix_exact(struct pci_dev *dev,
 		      struct msix_entry *entries, int nvec)
 { return -ENOSYS; }
-static inline int pci_alloc_irq_vectors(struct pci_dev *dev,
-		unsigned int min_vecs, unsigned int max_vecs,
-		unsigned int flags)
+
+static inline int
+pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+			       unsigned int max_vecs, unsigned int flags,
+			       const struct irq_affinity *aff_desc)
 {
 	if (min_vecs > 1)
 		return -EINVAL;
 	return 1;
 }
+
 static inline void pci_free_irq_vectors(struct pci_dev *dev)
 {
 }
@@ -1364,6 +1370,14 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
 }
 #endif
 
+static inline int
+pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+		      unsigned int max_vecs, unsigned int flags)
+{
+	return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags,
+					      NULL);
+}
+
 #ifdef CONFIG_PCIEPORTBUS
 extern bool pcie_ports_disabled;
 extern bool pcie_ports_auto;
-- 
cgit v1.2.3


From 0cf71b04467bc34063cecae577f12481da6cc565 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Nov 2016 17:15:06 -0800
Subject: PCI: Remove the irq_affinity mask from struct pci_dev

This has never been used, and now is totally unreferenced.  Nuke it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-7-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/pci.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7090f5ff7252..f2ba6ac21c75 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -333,7 +333,6 @@ struct pci_dev {
 	 * directly, use the values stored here. They might be different!
 	 */
 	unsigned int	irq;
-	struct cpumask	*irq_affinity;
 	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
 
 	bool match_driver;		/* Skip attaching driver */
-- 
cgit v1.2.3


From 67db3e4bfbc90657c7be840aad5585be46240d6f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 4 Nov 2016 11:54:32 -0700
Subject: tcp: no longer hold ehash lock while calling tcp_get_info()

We had various problems in the past in tcp_get_info() and used
specific synchronization to avoid deadlocks.

We would like to add more instrumentation points for TCP, and
avoiding grabing socket lock in tcp_getinfo() was too costly.

Being able to lock the socket allows to provide consistent set
of fields.

inet_diag_dump_icsk() can make sure ehash locks are not
held any more when tcp_get_info() is called.

We can remove syncp added in commit d654976cbf85
("tcp: fix a potential deadlock in tcp_get_info()"), but we need
to use lock_sock_fast() instead of spin_lock_bh() since TCP input
path can now be run from process context.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h  |  2 --
 net/ipv4/inet_diag.c | 48 +++++++++++++++++++++++++++++++++---------------
 net/ipv4/tcp.c       | 20 +++++++++-----------
 net/ipv4/tcp_input.c |  4 ----
 4 files changed, 42 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a17ae7b85218..32a7c7e35b71 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -176,8 +176,6 @@ struct tcp_sock {
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
 				 */
-	struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */
-
  	u32	snd_una;	/* First byte we want an ack for	*/
  	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 3b34024202d8..4dea33e5f295 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -861,10 +861,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 			 struct netlink_callback *cb,
 			 const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
+	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
 	struct net *net = sock_net(skb->sk);
-	int i, num, s_i, s_num;
 	u32 idiag_states = r->idiag_states;
-	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+	int i, num, s_i, s_num;
+	struct sock *sk;
 
 	if (idiag_states & TCPF_SYN_RECV)
 		idiag_states |= TCPF_NEW_SYN_RECV;
@@ -877,7 +878,6 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 
 		for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
 			struct inet_listen_hashbucket *ilb;
-			struct sock *sk;
 
 			num = 0;
 			ilb = &hashinfo->listening_hash[i];
@@ -922,13 +922,14 @@ skip_listen_ht:
 	if (!(idiag_states & ~TCPF_LISTEN))
 		goto out;
 
+#define SKARR_SZ 16
 	for (i = s_i; i <= hashinfo->ehash_mask; i++) {
 		struct inet_ehash_bucket *head = &hashinfo->ehash[i];
 		spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
 		struct hlist_nulls_node *node;
-		struct sock *sk;
-
-		num = 0;
+		struct sock *sk_arr[SKARR_SZ];
+		int num_arr[SKARR_SZ];
+		int idx, accum, res;
 
 		if (hlist_nulls_empty(&head->chain))
 			continue;
@@ -936,9 +937,12 @@ skip_listen_ht:
 		if (i > s_i)
 			s_num = 0;
 
+next_chunk:
+		num = 0;
+		accum = 0;
 		spin_lock_bh(lock);
 		sk_nulls_for_each(sk, node, &head->chain) {
-			int state, res;
+			int state;
 
 			if (!net_eq(sock_net(sk), net))
 				continue;
@@ -962,21 +966,35 @@ skip_listen_ht:
 			if (!inet_diag_bc_sk(bc, sk))
 				goto next_normal;
 
-			res = sk_diag_fill(sk, skb, r,
+			sock_hold(sk);
+			num_arr[accum] = num;
+			sk_arr[accum] = sk;
+			if (++accum == SKARR_SZ)
+				break;
+next_normal:
+			++num;
+		}
+		spin_unlock_bh(lock);
+		res = 0;
+		for (idx = 0; idx < accum; idx++) {
+			if (res >= 0) {
+				res = sk_diag_fill(sk_arr[idx], skb, r,
 					   sk_user_ns(NETLINK_CB(cb->skb).sk),
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					   cb->nlh, net_admin);
-			if (res < 0) {
-				spin_unlock_bh(lock);
-				goto done;
+				if (res < 0)
+					num = num_arr[idx];
 			}
-next_normal:
-			++num;
+			sock_gen_put(sk_arr[idx]);
 		}
-
-		spin_unlock_bh(lock);
+		if (res < 0)
+			break;
 		cond_resched();
+		if (accum == SKARR_SZ) {
+			s_num = num + 1;
+			goto next_chunk;
+		}
 	}
 
 done:
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 117982be0cab..a7d54cbcdabb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -405,7 +405,6 @@ void tcp_init_sock(struct sock *sk)
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_clamp = ~0;
 	tp->mss_cache = TCP_MSS_DEFAULT;
-	u64_stats_init(&tp->syncp);
 
 	tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
 	tcp_enable_early_retrans(tp);
@@ -2710,9 +2709,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now = tcp_time_stamp, intv;
-	unsigned int start;
-	int notsent_bytes;
 	u64 rate64;
+	bool slow;
 	u32 rate;
 
 	memset(info, 0, sizeof(*info));
@@ -2792,17 +2790,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
 	info->tcpi_total_retrans = tp->total_retrans;
 
-	do {
-		start = u64_stats_fetch_begin_irq(&tp->syncp);
-		put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
-		put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
-	} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
+	slow = lock_sock_fast(sk);
+
+	put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
+	put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
+	info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt);
+
+	unlock_sock_fast(sk, slow);
+
 	info->tcpi_segs_out = tp->segs_out;
 	info->tcpi_segs_in = tp->segs_in;
 
-	notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
-	info->tcpi_notsent_bytes = max(0, notsent_bytes);
-
 	info->tcpi_min_rtt = tcp_min_rtt(tp);
 	info->tcpi_data_segs_in = tp->data_segs_in;
 	info->tcpi_data_segs_out = tp->data_segs_out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f2c59c8e57ff..a70046fea0e8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3351,9 +3351,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
 	u32 delta = ack - tp->snd_una;
 
 	sock_owned_by_me((struct sock *)tp);
-	u64_stats_update_begin_raw(&tp->syncp);
 	tp->bytes_acked += delta;
-	u64_stats_update_end_raw(&tp->syncp);
 	tp->snd_una = ack;
 }
 
@@ -3363,9 +3361,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
 	u32 delta = seq - tp->rcv_nxt;
 
 	sock_owned_by_me((struct sock *)tp);
-	u64_stats_update_begin_raw(&tp->syncp);
 	tp->bytes_received += delta;
-	u64_stats_update_end_raw(&tp->syncp);
 	tp->rcv_nxt = seq;
 }
 
-- 
cgit v1.2.3


From 5a3c7805c444d9d55f302a4b3930e8758be13fab Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Sat, 5 Nov 2016 14:04:52 +0100
Subject: Revert "net: stmmac: allow to split suspend/resume from init/exit
 callbacks"

Instead of adding hooks inside stmmac_platform it is better to just use
the standard PM callbacks within the specific dwmac-driver. This only
used by the dwmac-rk driver.

This reverts commit cecbc5563a02 ("stmmac: allow to split suspend/resume
from init/exit callbacks").

Signed-off-by: Joachim Eastwood <manabian@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 8 ++------
 include/linux/stmmac.h                                | 2 --
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 0a0d6a86f397..4d544c34c1f2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -417,9 +417,7 @@ static int stmmac_pltfr_suspend(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 
 	ret = stmmac_suspend(dev);
-	if (priv->plat->suspend)
-		priv->plat->suspend(pdev, priv->plat->bsp_priv);
-	else if (priv->plat->exit)
+	if (priv->plat->exit)
 		priv->plat->exit(pdev, priv->plat->bsp_priv);
 
 	return ret;
@@ -438,9 +436,7 @@ static int stmmac_pltfr_resume(struct device *dev)
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	struct platform_device *pdev = to_platform_device(dev);
 
-	if (priv->plat->resume)
-		priv->plat->resume(pdev, priv->plat->bsp_priv);
-	else if (priv->plat->init)
+	if (priv->plat->init)
 		priv->plat->init(pdev, priv->plat->bsp_priv);
 
 	return stmmac_resume(dev);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 705840e0438f..3537fb33cc90 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -135,8 +135,6 @@ struct plat_stmmacenet_data {
 	void (*bus_setup)(void __iomem *ioaddr);
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
-	void (*suspend)(struct platform_device *pdev, void *priv);
-	void (*resume)(struct platform_device *pdev, void *priv);
 	void *bsp_priv;
 	struct stmmac_axi *axi;
 	int has_gmac4;
-- 
cgit v1.2.3


From c9f1b073d0d750ccf8b30b272d1d76479f4cccbc Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Mon, 7 Nov 2016 15:14:44 +0200
Subject: net/mlx5: Add creation flags when adding new flow table

When creating flow tables, allow the caller to specify creation flags.
Currently no flags are used and as such this patch doesn't add any new
functionality.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/main.c                  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c    |  6 ++---
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  2 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  7 +++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   |  7 +++++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 28 +++++++++++++---------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  1 +
 include/linux/mlx5/fs.h                            | 10 ++++++--
 12 files changed, 45 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 292ae8bbeae2..9b16431e1de8 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1857,7 +1857,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 		ft = mlx5_create_auto_grouped_flow_table(ns, priority,
 							 num_entries,
 							 num_groups,
-							 0);
+							 0, 0);
 
 		if (!IS_ERR(ft)) {
 			prio->refcount = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 8ff22e83e1dd..677b23810953 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -324,7 +324,7 @@ static int arfs_create_table(struct mlx5e_priv *priv,
 	int err;
 
 	ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-				       MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL);
+				       MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL, 0);
 	if (IS_ERR(ft->t)) {
 		err = PTR_ERR(ft->t);
 		ft->t = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index bed544d47ba1..9617892e0f15 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -777,7 +777,7 @@ static int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
 	int err;
 
 	ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-				       MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL);
+				       MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL, 0);
 	if (IS_ERR(ft->t)) {
 		err = PTR_ERR(ft->t);
 		ft->t = NULL;
@@ -948,7 +948,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
 
 	ft->num_groups = 0;
 	ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-				       MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL);
+				       MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL, 0);
 
 	if (IS_ERR(ft->t)) {
 		err = PTR_ERR(ft->t);
@@ -1038,7 +1038,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
 
 	ft->num_groups = 0;
 	ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-				       MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL);
+				       MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL, 0);
 
 	if (IS_ERR(ft->t)) {
 		err = PTR_ERR(ft->t);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index cf52c06377f2..87bb3db7b501 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -99,7 +99,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
 			   MLX5E_ETHTOOL_NUM_ENTRIES);
 	ft = mlx5_create_auto_grouped_flow_table(ns, prio,
 						 table_size,
-						 MLX5E_ETHTOOL_NUM_GROUPS, 0);
+						 MLX5E_ETHTOOL_NUM_GROUPS, 0, 0);
 	if (IS_ERR(ft))
 		return (void *)ft;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 165682e2d2be..cdd430330e8e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -83,7 +83,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 							    MLX5E_TC_PRIO,
 							    MLX5E_TC_TABLE_NUM_ENTRIES,
 							    MLX5E_TC_TABLE_NUM_GROUPS,
-							    0);
+							    0, 0);
 		if (IS_ERR(priv->fs.tc.t)) {
 			netdev_err(priv->netdev,
 				   "Failed to create tc offload table\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 9ee002ecb4bb..27f21ac66639 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -361,7 +361,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	memset(flow_group_in, 0, inlen);
 
 	table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
-	fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0);
+	fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0, 0);
 	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create FDB Table err %d\n", err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 53d9d6ce008b..b18f9513e71e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -423,7 +423,8 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 
 	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
 						  ESW_OFFLOADS_NUM_ENTRIES,
-						  ESW_OFFLOADS_NUM_GROUPS, 0);
+						  ESW_OFFLOADS_NUM_GROUPS, 0,
+						  0);
 	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
@@ -432,7 +433,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	esw->fdb_table.fdb = fdb;
 
 	table_size = nvports + MAX_PF_SQ + 1;
-	fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0);
+	fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0, 0);
 	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
@@ -524,7 +525,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
 		return -ENOMEM;
 	}
 
-	ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0);
+	ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0);
 	if (IS_ERR(ft_offloads)) {
 		err = PTR_ERR(ft_offloads);
 		esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 301cec896eb6..cc97bb218e74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -37,6 +37,7 @@
 #include "fs_core.h"
 #include "fs_cmd.h"
 #include "mlx5_core.h"
+#include "eswitch.h"
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
 			    struct mlx5_flow_table *ft)
@@ -61,8 +62,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 			       enum fs_flow_table_op_mod op_mod,
 			       enum fs_flow_table_type type, unsigned int level,
 			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id)
+			       *next_ft, unsigned int *table_id, u32 flags)
 {
+	int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
 	int err;
@@ -78,6 +80,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 		MLX5_SET(create_flow_table_in, in, other_vport, 1);
 	}
 
+	MLX5_SET(create_flow_table_in, in, decap_en, en_encap_decap);
+	MLX5_SET(create_flow_table_in, in, encap_en, en_encap_decap);
+
 	switch (op_mod) {
 	case FS_FT_OP_MOD_NORMAL:
 		if (next_ft) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 86bead1748a7..8fad80688536 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -38,7 +38,7 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 			       enum fs_flow_table_op_mod op_mod,
 			       enum fs_flow_table_type type, unsigned int level,
 			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id);
+			       *next_ft, unsigned int *table_id, u32 flags);
 
 int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
 				struct mlx5_flow_table *ft);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e65eabf9c850..4d28c8d70482 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -505,7 +505,8 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
 
 static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
 						enum fs_flow_table_type table_type,
-						enum fs_flow_table_op_mod op_mod)
+						enum fs_flow_table_op_mod op_mod,
+						u32 flags)
 {
 	struct mlx5_flow_table *ft;
 
@@ -519,6 +520,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
 	ft->type = table_type;
 	ft->vport = vport;
 	ft->max_fte = max_fte;
+	ft->flags = flags;
 	INIT_LIST_HEAD(&ft->fwd_rules);
 	mutex_init(&ft->lock);
 
@@ -777,7 +779,8 @@ static void list_add_flow_table(struct mlx5_flow_table *ft,
 static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
 							enum fs_flow_table_op_mod op_mod,
 							u16 vport, int prio,
-							int max_fte, u32 level)
+							int max_fte, u32 level,
+							u32 flags)
 {
 	struct mlx5_flow_table *next_ft = NULL;
 	struct mlx5_flow_table *ft;
@@ -810,7 +813,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 			      vport,
 			      max_fte ? roundup_pow_of_two(max_fte) : 0,
 			      root->table_type,
-			      op_mod);
+			      op_mod, flags);
 	if (!ft) {
 		err = -ENOMEM;
 		goto unlock_root;
@@ -820,7 +823,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
 	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
-					 ft->level, log_table_sz, next_ft, &ft->id);
+					 ft->level, log_table_sz, next_ft, &ft->id,
+					 ft->flags);
 	if (err)
 		goto free_ft;
 
@@ -845,10 +849,11 @@ unlock_root:
 
 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
 					       int prio, int max_fte,
-					       u32 level)
+					       u32 level,
+					       u32 flags)
 {
 	return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio,
-					max_fte, level);
+					max_fte, level, flags);
 }
 
 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
@@ -856,7 +861,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace
 						     u32 level, u16 vport)
 {
 	return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio,
-					max_fte, level);
+					max_fte, level, 0);
 }
 
 struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
@@ -864,7 +869,7 @@ struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
 					       int prio, u32 level)
 {
 	return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0,
-					level);
+					level, 0);
 }
 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
 
@@ -872,14 +877,15 @@ struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_nam
 							    int prio,
 							    int num_flow_table_entries,
 							    int max_num_groups,
-							    u32 level)
+							    u32 level,
+							    u32 flags)
 {
 	struct mlx5_flow_table *ft;
 
 	if (max_num_groups > num_flow_table_entries)
 		return ERR_PTR(-EINVAL);
 
-	ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level);
+	ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level, flags);
 	if (IS_ERR(ft))
 		return ft;
 
@@ -1822,7 +1828,7 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
 	ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
 	if (!ns)
 		return -EINVAL;
-	ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL);
+	ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0);
 	if (IS_ERR(ft)) {
 		mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
 		return PTR_ERR(ft);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index d5150888645c..9f616ed25a89 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -117,6 +117,7 @@ struct mlx5_flow_table {
 	struct mutex			lock;
 	/* FWD rules that point on this flow table */
 	struct list_head		fwd_rules;
+	u32				flags;
 };
 
 struct mlx5_fc_cache {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 0dcd287f4bd0..ab1a5fd2e995 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -42,6 +42,10 @@ enum {
 	MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO	= 1 << 16,
 };
 
+enum {
+	MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0),
+};
+
 #define LEFTOVERS_RULE_NUM	 2
 static inline void build_leftovers_ft_param(int *priority,
 					    int *n_ent,
@@ -97,13 +101,15 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
 				    int prio,
 				    int num_flow_table_entries,
 				    int max_num_groups,
-				    u32 level);
+				    u32 level,
+				    u32 flags);
 
 struct mlx5_flow_table *
 mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
 		       int prio,
 		       int num_flow_table_entries,
-		       u32 level);
+		       u32 level,
+		       u32 flags);
 struct mlx5_flow_table *
 mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
 			     int prio,
-- 
cgit v1.2.3


From 66958ed906b87816314c0517f05fe0b5766ec7fe Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Mon, 7 Nov 2016 15:14:45 +0200
Subject: net/mlx5: Support encap id when setting new steering entry

In order to support steering rules which add encapsulation headers,
encap_id parameter is needed.

Add new mlx5_flow_act struct which holds action related parameter:
action, flow_tag and encap_id. Use mlx5_flow_act struct when adding a new
steering rule.
This patch doesn't change any functionality.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/main.c                  | 10 ++---
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  | 17 +++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c    | 29 ++++++++-----
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c    | 10 ++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  9 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 23 ++++++-----
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 25 ++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 48 ++++++++++------------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  1 +
 include/linux/mlx5/fs.h                            |  9 +++-
 11 files changed, 104 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 9b16431e1de8..76ed57f1b678 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1877,10 +1877,10 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 {
 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
 	struct mlx5_ib_flow_handler *handler;
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_spec *spec;
 	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
 	unsigned int spec_index;
-	u32 action;
 	int err = 0;
 
 	if (!is_valid_attr(flow_attr))
@@ -1905,12 +1905,12 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	}
 
 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-	action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+	flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
 		MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+	flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
 	handler->rule = mlx5_add_flow_rules(ft, spec,
-					   action,
-					   MLX5_FS_DEFAULT_FLOW_TAG,
-					   dst, 1);
+					    &flow_act,
+					    dst, 1);
 
 	if (IS_ERR(handler->rule)) {
 		err = PTR_ERR(handler->rule);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 677b23810953..68419a01db36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -174,6 +174,11 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 				 enum arfs_type type)
 {
 	struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
 	struct mlx5_flow_destination dest;
 	struct mlx5e_tir *tir = priv->indir_tir;
 	struct mlx5_flow_spec *spec;
@@ -206,8 +211,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 	}
 
 	arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec,
-						   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-						   MLX5_FS_DEFAULT_FLOW_TAG,
+						   &flow_act,
 						   &dest, 1);
 	if (IS_ERR(arfs_t->default_rule)) {
 		err = PTR_ERR(arfs_t->default_rule);
@@ -465,6 +469,11 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
 static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
 					      struct arfs_rule *arfs_rule)
 {
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
 	struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
 	struct arfs_tuple *tuple = &arfs_rule->tuple;
 	struct mlx5_flow_handle *rule = NULL;
@@ -544,9 +553,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
 	}
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
-	rule = mlx5_add_flow_rules(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				   MLX5_FS_DEFAULT_FLOW_TAG,
-				   &dest, 1);
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 9617892e0f15..1fe80de5d68f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -158,6 +158,11 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 				 enum mlx5e_vlan_rule_type rule_type,
 				 u16 vid, struct mlx5_flow_spec *spec)
 {
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
 	struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
 	struct mlx5_flow_destination dest;
 	struct mlx5_flow_handle **rule_p;
@@ -187,10 +192,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 		break;
 	}
 
-	*rule_p = mlx5_add_flow_rules(ft, spec,
-				      MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				      MLX5_FS_DEFAULT_FLOW_TAG,
-				      &dest, 1);
+	*rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 
 	if (IS_ERR(*rule_p)) {
 		err = PTR_ERR(*rule_p);
@@ -623,6 +625,11 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 			u16 etype,
 			u8 proto)
 {
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
 	int err = 0;
@@ -644,10 +651,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
 	}
 
-	rule = mlx5_add_flow_rules(ft, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				   MLX5_FS_DEFAULT_FLOW_TAG,
-				   dest, 1);
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
@@ -810,6 +814,11 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
 static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 				  struct mlx5e_l2_rule *ai, int type)
 {
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
 	struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
 	struct mlx5_flow_destination dest;
 	struct mlx5_flow_spec *spec;
@@ -848,9 +857,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 		break;
 	}
 
-	ai->rule = mlx5_add_flow_rules(ft, spec,
-				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				       MLX5_FS_DEFAULT_FLOW_TAG, &dest, 1);
+	ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 	if (IS_ERR(ai->rule)) {
 		netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n",
 			   __func__, mv_dmac);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 87bb3db7b501..3691451c728c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -290,10 +290,10 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
 		      struct ethtool_rx_flow_spec *fs)
 {
 	struct mlx5_flow_destination *dst = NULL;
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_handle *rule;
 	int err = 0;
-	u32 action;
 
 	spec = mlx5_vzalloc(sizeof(*spec));
 	if (!spec)
@@ -304,7 +304,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
 		goto free;
 
 	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
-		action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	} else {
 		dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 		if (!dst) {
@@ -314,12 +314,12 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
 
 		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 		dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
-		action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	}
 
 	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
-	rule = mlx5_add_flow_rules(ft, spec, action,
-				   MLX5_FS_DEFAULT_FLOW_TAG, dst, 1);
+	flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index cdd430330e8e..35e38d12ba68 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -61,6 +61,11 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 {
 	struct mlx5_core_dev *dev = priv->mdev;
 	struct mlx5_flow_destination dest = { 0 };
+	struct mlx5_flow_act flow_act = {
+		.action = action,
+		.flow_tag = flow_tag,
+		.encap_id = 0,
+	};
 	struct mlx5_fc *counter = NULL;
 	struct mlx5_flow_handle *rule;
 	bool table_created = false;
@@ -95,9 +100,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	}
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	rule = mlx5_add_flow_rules(priv->fs.tc.t, spec,
-				   action, flow_tag,
-				   &dest, 1);
+	rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1);
 
 	if (IS_ERR(rule))
 		goto err_add_rule;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 27f21ac66639..ae05d27832e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -244,6 +244,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 	int match_header = (is_zero_ether_addr(mac_c) ? 0 :
 			    MLX5_MATCH_OUTER_HEADERS);
 	struct mlx5_flow_handle *flow_rule = NULL;
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_destination dest;
 	struct mlx5_flow_spec *spec;
 	void *mv_misc = NULL;
@@ -285,10 +286,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 		  "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
 		  dmac_v, dmac_c, vport);
 	spec->match_criteria_enable = match_header;
+	flow_act.action =  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	flow_rule =
 		mlx5_add_flow_rules(esw->fdb_table.fdb, spec,
-				    MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				    0, &dest, 1);
+				    &flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		esw_warn(esw->dev,
 			 "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
@@ -1212,6 +1213,7 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
 static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 				    struct mlx5_vport *vport)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 	u8 *smac_v;
@@ -1264,10 +1266,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 	}
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 	vport->ingress.allow_rule =
 		mlx5_add_flow_rules(vport->ingress.acl, spec,
-				    MLX5_FLOW_CONTEXT_ACTION_ALLOW,
-				    0, NULL, 0);
+				    &flow_act, NULL, 0);
 	if (IS_ERR(vport->ingress.allow_rule)) {
 		err = PTR_ERR(vport->ingress.allow_rule);
 		esw_warn(esw->dev,
@@ -1278,10 +1280,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 	}
 
 	memset(spec, 0, sizeof(*spec));
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	vport->ingress.drop_rule =
 		mlx5_add_flow_rules(vport->ingress.acl, spec,
-				    MLX5_FLOW_CONTEXT_ACTION_DROP,
-				    0, NULL, 0);
+				    &flow_act, NULL, 0);
 	if (IS_ERR(vport->ingress.drop_rule)) {
 		err = PTR_ERR(vport->ingress.drop_rule);
 		esw_warn(esw->dev,
@@ -1301,6 +1303,7 @@ out:
 static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 				   struct mlx5_vport *vport)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
@@ -1338,10 +1341,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 	vport->egress.allowed_vlan =
 		mlx5_add_flow_rules(vport->egress.acl, spec,
-				    MLX5_FLOW_CONTEXT_ACTION_ALLOW,
-				    0, NULL, 0);
+				    &flow_act, NULL, 0);
 	if (IS_ERR(vport->egress.allowed_vlan)) {
 		err = PTR_ERR(vport->egress.allowed_vlan);
 		esw_warn(esw->dev,
@@ -1353,10 +1356,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 
 	/* Drop others rule (star rule) */
 	memset(spec, 0, sizeof(*spec));
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	vport->egress.drop_rule =
 		mlx5_add_flow_rules(vport->egress.acl, spec,
-				    MLX5_FLOW_CONTEXT_ACTION_DROP,
-				    0, NULL, 0);
+				    &flow_act, NULL, 0);
 	if (IS_ERR(vport->egress.drop_rule)) {
 		err = PTR_ERR(vport->egress.drop_rule);
 		esw_warn(esw->dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index b18f9513e71e..a390117ed34c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -49,23 +49,23 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_flow_destination dest[2] = {};
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_fc *counter = NULL;
 	struct mlx5_flow_handle *rule;
 	void *misc;
-	int action;
 	int i = 0;
 
 	if (esw->mode != SRIOV_OFFLOADS)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	action = attr->action;
+	flow_act.action = attr->action;
 
-	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 		dest[i].vport_num = attr->out_rep->vport;
 		i++;
 	}
-	if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		counter = mlx5_fc_create(esw->dev, true);
 		if (IS_ERR(counter))
 			return ERR_CAST(counter);
@@ -84,7 +84,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				      MLX5_MATCH_MISC_PARAMETERS;
 
 	rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb,
-				   spec, action, 0, dest, i);
+				   spec, &flow_act, dest, i);
 	if (IS_ERR(rule))
 		mlx5_fc_destroy(esw->dev, counter);
 
@@ -274,6 +274,7 @@ out:
 static struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_destination dest;
 	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_spec *spec;
@@ -297,10 +298,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = vport;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
 	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
-					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-					0, &dest, 1);
+					&flow_act, &dest, 1);
 	if (IS_ERR(flow_rule))
 		esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
 out:
@@ -363,6 +364,7 @@ out_err:
 
 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_destination dest;
 	struct mlx5_flow_handle *flow_rule = NULL;
 	struct mlx5_flow_spec *spec;
@@ -377,10 +379,10 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = 0;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
 	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
-					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-					0, &dest, 1);
+					&flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		err = PTR_ERR(flow_rule);
 		esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
@@ -591,6 +593,7 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
 struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_destination dest;
 	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_spec *spec;
@@ -613,9 +616,9 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dest.tir_num = tirn;
 
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
-					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-					0, &dest, 1);
+				       &flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
 		goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index cc97bb218e74..c4478ecd8056 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -248,6 +248,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
 	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
 	MLX5_SET(flow_context, in_flow_context, action, fte->action);
+	MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id);
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
 	memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 4d28c8d70482..9adc766c7a3f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -460,8 +460,7 @@ static void del_flow_group(struct fs_node *node)
 			       fg->id, ft->id);
 }
 
-static struct fs_fte *alloc_fte(u8 action,
-				u32 flow_tag,
+static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act,
 				u32 *match_value,
 				unsigned int index)
 {
@@ -473,9 +472,10 @@ static struct fs_fte *alloc_fte(u8 action,
 
 	memcpy(fte->val, match_value, sizeof(fte->val));
 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
-	fte->flow_tag = flow_tag;
+	fte->flow_tag = flow_act->flow_tag;
 	fte->index = index;
-	fte->action = action;
+	fte->action = flow_act->action;
+	fte->encap_id = flow_act->encap_id;
 
 	return fte;
 }
@@ -1117,15 +1117,14 @@ static unsigned int get_free_fte_index(struct mlx5_flow_group *fg,
 /* prev is output, prev->next = new_fte */
 static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
 				 u32 *match_value,
-				 u8 action,
-				 u32 flow_tag,
+				 struct mlx5_flow_act *flow_act,
 				 struct list_head **prev)
 {
 	struct fs_fte *fte;
 	int index;
 
 	index = get_free_fte_index(fg, prev);
-	fte = alloc_fte(action, flow_tag, match_value, index);
+	fte = alloc_fte(flow_act, match_value, index);
 	if (IS_ERR(fte))
 		return fte;
 
@@ -1219,8 +1218,7 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
 
 static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 					    u32 *match_value,
-					    u8 action,
-					    u32 flow_tag,
+					    struct mlx5_flow_act *flow_act,
 					    struct mlx5_flow_destination *dest,
 					    int dest_num)
 {
@@ -1234,12 +1232,13 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	fs_for_each_fte(fte, fg) {
 		nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
 		if (compare_match_value(&fg->mask, match_value, &fte->val) &&
-		    (action & fte->action) && flow_tag == fte->flow_tag) {
+		    (flow_act->action & fte->action) &&
+		    flow_act->flow_tag == fte->flow_tag) {
 			int old_action = fte->action;
 
-			fte->action |= action;
+			fte->action |= flow_act->action;
 			handle = add_rule_fte(fte, fg, dest, dest_num,
-					      old_action != action);
+					      old_action != flow_act->action);
 			if (IS_ERR(handle)) {
 				fte->action = old_action;
 				goto unlock_fte;
@@ -1255,7 +1254,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 		goto unlock_fg;
 	}
 
-	fte = create_fte(fg, match_value, action, flow_tag, &prev);
+	fte = create_fte(fg, match_value, flow_act, &prev);
 	if (IS_ERR(fte)) {
 		handle = (void *)fte;
 		goto unlock_fg;
@@ -1332,17 +1331,17 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 static struct mlx5_flow_handle *
 _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		     struct mlx5_flow_spec *spec,
-		     u32 action,
-		     u32 flow_tag,
+		     struct mlx5_flow_act *flow_act,
 		     struct mlx5_flow_destination *dest,
 		     int dest_num)
+
 {
 	struct mlx5_flow_group *g;
 	struct mlx5_flow_handle *rule;
 	int i;
 
 	for (i = 0; i < dest_num; i++) {
-		if (!dest_is_valid(&dest[i], action, ft))
+		if (!dest_is_valid(&dest[i], flow_act->action, ft))
 			return ERR_PTR(-EINVAL);
 	}
 
@@ -1353,7 +1352,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 					   g->mask.match_criteria,
 					   spec->match_criteria)) {
 			rule = add_rule_fg(g, spec->match_value,
-					   action, flow_tag, dest, dest_num);
+					   flow_act, dest, dest_num);
 			if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
 				goto unlock;
 		}
@@ -1365,8 +1364,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		goto unlock;
 	}
 
-	rule = add_rule_fg(g, spec->match_value,
-			   action, flow_tag, dest, dest_num);
+	rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num);
 	if (IS_ERR(rule)) {
 		/* Remove assumes refcount > 0 and autogroup creates a group
 		 * with a refcount = 0.
@@ -1390,8 +1388,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
 struct mlx5_flow_handle *
 mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		    struct mlx5_flow_spec *spec,
-		    u32 action,
-		    u32 flow_tag,
+		    struct mlx5_flow_act *flow_act,
 		    struct mlx5_flow_destination *dest,
 		    int dest_num)
 {
@@ -1399,11 +1396,11 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 	struct mlx5_flow_destination gen_dest;
 	struct mlx5_flow_table *next_ft = NULL;
 	struct mlx5_flow_handle *handle = NULL;
-	u32 sw_action = action;
+	u32 sw_action = flow_act->action;
 	struct fs_prio *prio;
 
 	fs_get_obj(prio, ft->node.parent);
-	if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+	if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		if (!fwd_next_prio_supported(ft))
 			return ERR_PTR(-EOPNOTSUPP);
 		if (dest)
@@ -1415,15 +1412,14 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 			gen_dest.ft = next_ft;
 			dest = &gen_dest;
 			dest_num = 1;
-			action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+			flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 		} else {
 			mutex_unlock(&root->chain_lock);
 			return ERR_PTR(-EOPNOTSUPP);
 		}
 	}
 
-	handle = _mlx5_add_flow_rules(ft, spec, action, flow_tag, dest,
-				      dest_num);
+	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num);
 
 	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		if (!IS_ERR_OR_NULL(handle) &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 9f616ed25a89..8e668c63f69e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -151,6 +151,7 @@ struct fs_fte {
 	u32				flow_tag;
 	u32				index;
 	u32				action;
+	u32				encap_id;
 	enum fs_fte_status		status;
 	struct mlx5_fc			*counter;
 };
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index ab1a5fd2e995..949b24b6c479 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -130,14 +130,19 @@ struct mlx5_flow_group *
 mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in);
 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
 
+struct mlx5_flow_act {
+	u32 action;
+	u32 flow_tag;
+	u32 encap_id;
+};
+
 /* Single destination per rule.
  * Group ID is implied by the match criteria.
  */
 struct mlx5_flow_handle *
 mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		    struct mlx5_flow_spec *spec,
-		    u32 action,
-		    u32 flow_tag,
+		    struct mlx5_flow_act *flow_act,
 		    struct mlx5_flow_destination *dest,
 		    int dest_num);
 void mlx5_del_flow_rules(struct mlx5_flow_handle *fr);
-- 
cgit v1.2.3


From 72478a0cc4025e16f68672844ebebf60524e1668 Mon Sep 17 00:00:00 2001
From: Milo Kim <woogyom.kim@gmail.com>
Date: Fri, 28 Oct 2016 21:37:02 +0900
Subject: mfd: tps65217: Fix mismatched interrupt number

Enum value of 'tps65217_irq_type' is not matched with DT parsed hwirq
number[*].

The MFD driver gets the IRQ data by referencing hwirq, but the value is
different. So, irq_to_tps65217_irq() returns mismatched IRQ data.
Eventually, the power button driver enables not PB but USB interrupt
when it is probed.

According to the TPS65217 register map[**], USB interrupt is the LSB.
This patch defines synchronized IRQ value.

[*]  include/dt-bindings/mfd/tps65217.h
[**] http://www.ti.com/lit/ds/symlink/tps65217.pdf

Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/mfd/tps65217.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index 4ccda8969639..3cbec4b2496a 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -234,12 +234,11 @@ struct tps65217_bl_pdata {
 	int dft_brightness;
 };
 
-enum tps65217_irq_type {
-	TPS65217_IRQ_PB,
-	TPS65217_IRQ_AC,
-	TPS65217_IRQ_USB,
-	TPS65217_NUM_IRQ
-};
+/* Interrupt numbers */
+#define TPS65217_IRQ_USB		0
+#define TPS65217_IRQ_AC			1
+#define TPS65217_IRQ_PB			2
+#define TPS65217_NUM_IRQ		3
 
 /**
  * struct tps65217_board - packages regulator init data
-- 
cgit v1.2.3


From fc4d24c9b47150245b3eb5bebc2ad4764c754ef4 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 3 Nov 2016 15:49:57 +0100
Subject: fs/buffer: Convert to hotplug state machine

Install the callbacks via the state machine.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-fsdevel@vger.kernel.org
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161103145021.28528-2-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/buffer.c                | 16 ++++++----------
 include/linux/cpuhotplug.h |  1 +
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index b205a629001d..1613656028d6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3403,7 +3403,7 @@ void free_buffer_head(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(free_buffer_head);
 
-static void buffer_exit_cpu(int cpu)
+static int buffer_exit_cpu_dead(unsigned int cpu)
 {
 	int i;
 	struct bh_lru *b = &per_cpu(bh_lrus, cpu);
@@ -3414,14 +3414,7 @@ static void buffer_exit_cpu(int cpu)
 	}
 	this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
 	per_cpu(bh_accounting, cpu).nr = 0;
-}
-
-static int buffer_cpu_notify(struct notifier_block *self,
-			      unsigned long action, void *hcpu)
-{
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
-		buffer_exit_cpu((unsigned long)hcpu);
-	return NOTIFY_OK;
+	return 0;
 }
 
 /**
@@ -3471,6 +3464,7 @@ EXPORT_SYMBOL(bh_submit_read);
 void __init buffer_init(void)
 {
 	unsigned long nrpages;
+	int ret;
 
 	bh_cachep = kmem_cache_create("buffer_head",
 			sizeof(struct buffer_head), 0,
@@ -3483,5 +3477,7 @@ void __init buffer_init(void)
 	 */
 	nrpages = (nr_free_buffer_pages() * 10) / 100;
 	max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
-	hotcpu_notifier(buffer_cpu_notify, 0);
+	ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
+					NULL, buffer_exit_cpu_dead);
+	WARN_ON(ret < 0);
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index afe641c02dca..69b74fa0da60 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -30,6 +30,7 @@ enum cpuhp_state {
 	CPUHP_ACPI_CPUDRV_DEAD,
 	CPUHP_S390_PFAULT_DEAD,
 	CPUHP_BLK_MQ_DEAD,
+	CPUHP_FS_BUFF_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-- 
cgit v1.2.3


From 90b14889d2f9b29d7e5b4b2d36251c13ce3dd13f Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 3 Nov 2016 15:49:58 +0100
Subject: kernel/printk: Convert to hotplug state machine

Install the callbacks via the state machine.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161103145021.28528-3-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 kernel/printk/printk.c     | 29 +++++++++++++----------------
 2 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 69b74fa0da60..4174083280d7 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -31,6 +31,7 @@ enum cpuhp_state {
 	CPUHP_S390_PFAULT_DEAD,
 	CPUHP_BLK_MQ_DEAD,
 	CPUHP_FS_BUFF_DEAD,
+	CPUHP_PRINTK_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index de08fc90baaf..4487ffcd42d5 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2185,27 +2185,18 @@ void resume_console(void)
 
 /**
  * console_cpu_notify - print deferred console messages after CPU hotplug
- * @self: notifier struct
- * @action: CPU hotplug event
- * @hcpu: unused
+ * @cpu: unused
  *
  * If printk() is called from a CPU that is not online yet, the messages
  * will be spooled but will not show up on the console.  This function is
  * called when a new CPU comes online (or fails to come up), and ensures
  * that any such output gets printed.
  */
-static int console_cpu_notify(struct notifier_block *self,
-	unsigned long action, void *hcpu)
-{
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_DEAD:
-	case CPU_DOWN_FAILED:
-	case CPU_UP_CANCELED:
-		console_lock();
-		console_unlock();
-	}
-	return NOTIFY_OK;
+static int console_cpu_notify(unsigned int cpu)
+{
+	console_lock();
+	console_unlock();
+	return 0;
 }
 
 /**
@@ -2843,6 +2834,7 @@ EXPORT_SYMBOL(unregister_console);
 static int __init printk_late_init(void)
 {
 	struct console *con;
+	int ret;
 
 	for_each_console(con) {
 		if (!keep_bootcon && con->flags & CON_BOOT) {
@@ -2857,7 +2849,12 @@ static int __init printk_late_init(void)
 				unregister_console(con);
 		}
 	}
-	hotcpu_notifier(console_cpu_notify, 0);
+	ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
+					console_cpu_notify);
+	WARN_ON(ret < 0);
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
+					console_cpu_notify, NULL);
+	WARN_ON(ret < 0);
 	return 0;
 }
 late_initcall(printk_late_init);
-- 
cgit v1.2.3


From 308167fcb330296fc80505a6b11ba0661f38a4cc Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 3 Nov 2016 15:49:59 +0100
Subject: mm/memcg: Convert to hotplug state machine

Install the callbacks via the state machine.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: linux-mm@kvack.org
Cc: rt@linutronix.de
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: cgroups@vger.kernel.org
Link: http://lkml.kernel.org/r/20161103145021.28528-4-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 mm/memcontrol.c            | 24 ++++++++----------------
 2 files changed, 9 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 4174083280d7..c622ab349af3 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -32,6 +32,7 @@ enum cpuhp_state {
 	CPUHP_BLK_MQ_DEAD,
 	CPUHP_FS_BUFF_DEAD,
 	CPUHP_PRINTK_DEAD,
+	CPUHP_MM_MEMCQ_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0f870ba43942..6c2043509fb5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1816,22 +1816,13 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
 	mutex_unlock(&percpu_charge_mutex);
 }
 
-static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
-					unsigned long action,
-					void *hcpu)
+static int memcg_hotplug_cpu_dead(unsigned int cpu)
 {
-	int cpu = (unsigned long)hcpu;
 	struct memcg_stock_pcp *stock;
 
-	if (action == CPU_ONLINE)
-		return NOTIFY_OK;
-
-	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
-		return NOTIFY_OK;
-
 	stock = &per_cpu(memcg_stock, cpu);
 	drain_stock(stock);
-	return NOTIFY_OK;
+	return 0;
 }
 
 static void reclaim_high(struct mem_cgroup *memcg,
@@ -5774,16 +5765,17 @@ __setup("cgroup.memory=", cgroup_memory);
 /*
  * subsys_initcall() for memory controller.
  *
- * Some parts like hotcpu_notifier() have to be initialized from this context
- * because of lock dependencies (cgroup_lock -> cpu hotplug) but basically
- * everything that doesn't depend on a specific mem_cgroup structure should
- * be initialized from here.
+ * Some parts like memcg_hotplug_cpu_dead() have to be initialized from this
+ * context because of lock dependencies (cgroup_lock -> cpu hotplug) but
+ * basically everything that doesn't depend on a specific mem_cgroup structure
+ * should be initialized from here.
  */
 static int __init mem_cgroup_init(void)
 {
 	int cpu, node;
 
-	hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
+	cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
+				  memcg_hotplug_cpu_dead);
 
 	for_each_possible_cpu(cpu)
 		INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
-- 
cgit v1.2.3


From 5588f5afb4cfc33eb377b751ba4b97184373e8d6 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 3 Nov 2016 15:50:00 +0100
Subject: lib/percpu_counter: Convert to hotplug state machine

Install the callbacks via the state machine and let the core invoke
the callbacks on the already online CPUs.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161103145021.28528-5-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 lib/percpu_counter.c       | 25 ++++++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index c622ab349af3..04e5f99ffc70 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -33,6 +33,7 @@ enum cpuhp_state {
 	CPUHP_FS_BUFF_DEAD,
 	CPUHP_PRINTK_DEAD,
 	CPUHP_MM_MEMCQ_DEAD,
+	CPUHP_PERCPU_CNT_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 72d36113ccaa..c8cebb137076 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -158,25 +158,21 @@ EXPORT_SYMBOL(percpu_counter_destroy);
 int percpu_counter_batch __read_mostly = 32;
 EXPORT_SYMBOL(percpu_counter_batch);
 
-static void compute_batch_value(void)
+static int compute_batch_value(unsigned int cpu)
 {
 	int nr = num_online_cpus();
 
 	percpu_counter_batch = max(32, nr*2);
+	return 0;
 }
 
-static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
-					unsigned long action, void *hcpu)
+static int percpu_counter_cpu_dead(unsigned int cpu)
 {
 #ifdef CONFIG_HOTPLUG_CPU
-	unsigned int cpu;
 	struct percpu_counter *fbc;
 
-	compute_batch_value();
-	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
-		return NOTIFY_OK;
+	compute_batch_value(cpu);
 
-	cpu = (unsigned long)hcpu;
 	spin_lock_irq(&percpu_counters_lock);
 	list_for_each_entry(fbc, &percpu_counters, list) {
 		s32 *pcount;
@@ -190,7 +186,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
 	}
 	spin_unlock_irq(&percpu_counters_lock);
 #endif
-	return NOTIFY_OK;
+	return 0;
 }
 
 /*
@@ -222,8 +218,15 @@ EXPORT_SYMBOL(__percpu_counter_compare);
 
 static int __init percpu_counter_startup(void)
 {
-	compute_batch_value();
-	hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
+	int ret;
+
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "lib/percpu_cnt:online",
+				compute_batch_value, NULL);
+	WARN_ON(ret < 0);
+	ret = cpuhp_setup_state_nocalls(CPUHP_PERCPU_CNT_DEAD,
+					"lib/percpu_cnt:dead", NULL,
+					percpu_counter_cpu_dead);
+	WARN_ON(ret < 0);
 	return 0;
 }
 module_init(percpu_counter_startup);
-- 
cgit v1.2.3


From d544abd5ff7d8b07c0c67682a63e4939c3c82914 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 3 Nov 2016 15:50:01 +0100
Subject: lib/radix-tree: Convert to hotplug state machine

Install the callbacks via the state machine.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161103145021.28528-6-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 lib/radix-tree.c           | 25 ++++++++++++-------------
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 04e5f99ffc70..89310fb1031d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -34,6 +34,7 @@ enum cpuhp_state {
 	CPUHP_PRINTK_DEAD,
 	CPUHP_MM_MEMCQ_DEAD,
 	CPUHP_PERCPU_CNT_DEAD,
+	CPUHP_RADIX_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e6d552c40dd..4b8bb3618b83 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1642,32 +1642,31 @@ static __init void radix_tree_init_maxnodes(void)
 	}
 }
 
-static int radix_tree_callback(struct notifier_block *nfb,
-				unsigned long action, void *hcpu)
+static int radix_tree_cpu_dead(unsigned int cpu)
 {
-	int cpu = (long)hcpu;
 	struct radix_tree_preload *rtp;
 	struct radix_tree_node *node;
 
 	/* Free per-cpu pool of preloaded nodes */
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		rtp = &per_cpu(radix_tree_preloads, cpu);
-		while (rtp->nr) {
-			node = rtp->nodes;
-			rtp->nodes = node->private_data;
-			kmem_cache_free(radix_tree_node_cachep, node);
-			rtp->nr--;
-		}
+	rtp = &per_cpu(radix_tree_preloads, cpu);
+	while (rtp->nr) {
+		node = rtp->nodes;
+		rtp->nodes = node->private_data;
+		kmem_cache_free(radix_tree_node_cachep, node);
+		rtp->nr--;
 	}
-	return NOTIFY_OK;
+	return 0;
 }
 
 void __init radix_tree_init(void)
 {
+	int ret;
 	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
 			radix_tree_node_ctor);
 	radix_tree_init_maxnodes();
-	hotcpu_notifier(radix_tree_callback, 0);
+	ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
+					NULL, radix_tree_cpu_dead);
+	WARN_ON(ret < 0);
 }
-- 
cgit v1.2.3


From 005fd4bbef168e9dea896085b001d64369e9834a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 3 Nov 2016 15:50:02 +0100
Subject: mm/page_alloc: Convert to hotplug state machine

Install the callbacks via the state machine.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161103145021.28528-7-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 mm/page_alloc.c            | 49 +++++++++++++++++++++++-----------------------
 2 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 89310fb1031d..31c58f6ec3c6 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -35,6 +35,7 @@ enum cpuhp_state {
 	CPUHP_MM_MEMCQ_DEAD,
 	CPUHP_PERCPU_CNT_DEAD,
 	CPUHP_RADIX_DEAD,
+	CPUHP_PAGE_ALLOC_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 072d791dce2d..fc98c2bae905 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6491,38 +6491,39 @@ void __init free_area_init(unsigned long *zones_size)
 			__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
 }
 
-static int page_alloc_cpu_notify(struct notifier_block *self,
-				 unsigned long action, void *hcpu)
+static int page_alloc_cpu_dead(unsigned int cpu)
 {
-	int cpu = (unsigned long)hcpu;
 
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		lru_add_drain_cpu(cpu);
-		drain_pages(cpu);
+	lru_add_drain_cpu(cpu);
+	drain_pages(cpu);
 
-		/*
-		 * Spill the event counters of the dead processor
-		 * into the current processors event counters.
-		 * This artificially elevates the count of the current
-		 * processor.
-		 */
-		vm_events_fold_cpu(cpu);
+	/*
+	 * Spill the event counters of the dead processor
+	 * into the current processors event counters.
+	 * This artificially elevates the count of the current
+	 * processor.
+	 */
+	vm_events_fold_cpu(cpu);
 
-		/*
-		 * Zero the differential counters of the dead processor
-		 * so that the vm statistics are consistent.
-		 *
-		 * This is only okay since the processor is dead and cannot
-		 * race with what we are doing.
-		 */
-		cpu_vm_stats_fold(cpu);
-	}
-	return NOTIFY_OK;
+	/*
+	 * Zero the differential counters of the dead processor
+	 * so that the vm statistics are consistent.
+	 *
+	 * This is only okay since the processor is dead and cannot
+	 * race with what we are doing.
+	 */
+	cpu_vm_stats_fold(cpu);
+	return 0;
 }
 
 void __init page_alloc_init(void)
 {
-	hotcpu_notifier(page_alloc_cpu_notify, 0);
+	int ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD,
+					"mm/page_alloc:dead", NULL,
+					page_alloc_cpu_dead);
+	WARN_ON(ret < 0);
 }
 
 /*
-- 
cgit v1.2.3


From f0bf90def3528cebed45ebd81d9b5d0fa17d7422 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 3 Nov 2016 15:50:04 +0100
Subject: net/dev: Convert to hotplug state machine

Install the callbacks via the state machine.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: netdev@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161103145021.28528-9-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 net/core/dev.c             | 16 ++++++----------
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 31c58f6ec3c6..394eb7ed53be 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -36,6 +36,7 @@ enum cpuhp_state {
 	CPUHP_PERCPU_CNT_DEAD,
 	CPUHP_RADIX_DEAD,
 	CPUHP_PAGE_ALLOC_DEAD,
+	CPUHP_NET_DEV_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
diff --git a/net/core/dev.c b/net/core/dev.c
index 820bac239738..8e909b2a5f2f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7953,18 +7953,13 @@ out:
 }
 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
 
-static int dev_cpu_callback(struct notifier_block *nfb,
-			    unsigned long action,
-			    void *ocpu)
+static int dev_cpu_dead(unsigned int oldcpu)
 {
 	struct sk_buff **list_skb;
 	struct sk_buff *skb;
-	unsigned int cpu, oldcpu = (unsigned long)ocpu;
+	unsigned int cpu;
 	struct softnet_data *sd, *oldsd;
 
-	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
-		return NOTIFY_OK;
-
 	local_irq_disable();
 	cpu = smp_processor_id();
 	sd = &per_cpu(softnet_data, cpu);
@@ -8014,10 +8009,9 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 		input_queue_head_incr(oldsd);
 	}
 
-	return NOTIFY_OK;
+	return 0;
 }
 
-
 /**
  *	netdev_increment_features - increment feature set by one
  *	@all: current feature set
@@ -8351,7 +8345,9 @@ static int __init net_dev_init(void)
 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
 
-	hotcpu_notifier(dev_cpu_callback, 0);
+	rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
+				       NULL, dev_cpu_dead);
+	WARN_ON(rc < 0);
 	dst_subsys_init();
 	rc = 0;
 out:
-- 
cgit v1.2.3


From a4fc1bfc42062e8bc7b2271a90d17403b096ce5d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 3 Nov 2016 15:50:05 +0100
Subject: net/flowcache: Convert to hotplug state machine

Install the callbacks via the state machine. Use multi state support to avoid
custom list handling for the multiple instances.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: netdev@vger.kernel.org
Cc: rt@linutronix.de
Cc: "David S. Miller" <davem@davemloft.net>
Link: http://lkml.kernel.org/r/20161103145021.28528-10-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 include/net/flow.h         |  1 +
 include/net/flowcache.h    |  2 +-
 net/core/flow.c            | 60 ++++++++++++++++++++--------------------------
 net/xfrm/xfrm_policy.c     |  1 +
 5 files changed, 30 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 394eb7ed53be..86b940f19df8 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -56,6 +56,7 @@ enum cpuhp_state {
 	CPUHP_ARM_SHMOBILE_SCU_PREPARE,
 	CPUHP_SH_SH3X_PREPARE,
 	CPUHP_BLK_MQ_PREPARE,
+	CPUHP_NET_FLOW_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
diff --git a/include/net/flow.h b/include/net/flow.h
index 035aa7716967..2e386bd6ee63 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -239,6 +239,7 @@ struct flow_cache_object *flow_cache_lookup(struct net *net,
 					    void *ctx);
 int flow_cache_init(struct net *net);
 void flow_cache_fini(struct net *net);
+void flow_cache_hp_init(void);
 
 void flow_cache_flush(struct net *net);
 void flow_cache_flush_deferred(struct net *net);
diff --git a/include/net/flowcache.h b/include/net/flowcache.h
index c8f665ec6e0d..9caf3bfc8d2d 100644
--- a/include/net/flowcache.h
+++ b/include/net/flowcache.h
@@ -17,7 +17,7 @@ struct flow_cache_percpu {
 struct flow_cache {
 	u32				hash_shift;
 	struct flow_cache_percpu __percpu *percpu;
-	struct notifier_block		hotcpu_notifier;
+	struct hlist_node		node;
 	int				low_watermark;
 	int				high_watermark;
 	struct timer_list		rnd_timer;
diff --git a/net/core/flow.c b/net/core/flow.c
index 3937b1b68d5b..841fd7f87b30 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -419,28 +419,20 @@ static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
 	return 0;
 }
 
-static int flow_cache_cpu(struct notifier_block *nfb,
-			  unsigned long action,
-			  void *hcpu)
+static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
 {
-	struct flow_cache *fc = container_of(nfb, struct flow_cache,
-						hotcpu_notifier);
-	int res, cpu = (unsigned long) hcpu;
+	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
+
+	return flow_cache_cpu_prepare(fc, cpu);
+}
+
+static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
 	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
 
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		res = flow_cache_cpu_prepare(fc, cpu);
-		if (res)
-			return notifier_from_errno(res);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		__flow_cache_shrink(fc, fcp, 0);
-		break;
-	}
-	return NOTIFY_OK;
+	__flow_cache_shrink(fc, fcp, 0);
+	return 0;
 }
 
 int flow_cache_init(struct net *net)
@@ -467,18 +459,8 @@ int flow_cache_init(struct net *net)
 	if (!fc->percpu)
 		return -ENOMEM;
 
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(i) {
-		if (flow_cache_cpu_prepare(fc, i))
-			goto err;
-	}
-	fc->hotcpu_notifier = (struct notifier_block){
-		.notifier_call = flow_cache_cpu,
-	};
-	__register_hotcpu_notifier(&fc->hotcpu_notifier);
-
-	cpu_notifier_register_done();
+	if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
+		goto err;
 
 	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
 		    (unsigned long) fc);
@@ -494,8 +476,6 @@ err:
 		fcp->hash_table = NULL;
 	}
 
-	cpu_notifier_register_done();
-
 	free_percpu(fc->percpu);
 	fc->percpu = NULL;
 
@@ -509,7 +489,8 @@ void flow_cache_fini(struct net *net)
 	struct flow_cache *fc = &net->xfrm.flow_cache_global;
 
 	del_timer_sync(&fc->rnd_timer);
-	unregister_hotcpu_notifier(&fc->hotcpu_notifier);
+
+	cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
 
 	for_each_possible_cpu(i) {
 		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
@@ -521,3 +502,14 @@ void flow_cache_fini(struct net *net)
 	fc->percpu = NULL;
 }
 EXPORT_SYMBOL(flow_cache_fini);
+
+void __init flow_cache_hp_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
+				      "net/flow:prepare",
+				      flow_cache_cpu_up_prep,
+				      flow_cache_cpu_dead);
+	WARN_ON(ret < 0);
+}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index fd6986634e6f..4a8eff11bdad 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3111,6 +3111,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
+	flow_cache_hp_init();
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
-- 
cgit v1.2.3


From 38643a0e691ec947d311eb2db011b289cf95014e Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 3 Nov 2016 15:50:09 +0100
Subject: drivers base/topology: Convert to hotplug state machine

Install the callbacks via the state machine and let the core invoke
the callbacks on the already online CPUs. No functional change

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161103145021.28528-14-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/base/topology.c    | 42 +++++-------------------------------------
 include/linux/cpuhotplug.h |  1 +
 2 files changed, 6 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index df3c97cb4c99..d6ec1c546f5b 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -118,51 +118,19 @@ static int topology_add_dev(unsigned int cpu)
 	return sysfs_create_group(&dev->kobj, &topology_attr_group);
 }
 
-static void topology_remove_dev(unsigned int cpu)
+static int topology_remove_dev(unsigned int cpu)
 {
 	struct device *dev = get_cpu_device(cpu);
 
 	sysfs_remove_group(&dev->kobj, &topology_attr_group);
-}
-
-static int topology_cpu_callback(struct notifier_block *nfb,
-				 unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	int rc = 0;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		rc = topology_add_dev(cpu);
-		break;
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		topology_remove_dev(cpu);
-		break;
-	}
-	return notifier_from_errno(rc);
+	return 0;
 }
 
 static int topology_sysfs_init(void)
 {
-	int cpu;
-	int rc = 0;
-
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(cpu) {
-		rc = topology_add_dev(cpu);
-		if (rc)
-			goto out;
-	}
-	__hotcpu_notifier(topology_cpu_callback, 0);
-
-out:
-	cpu_notifier_register_done();
-	return rc;
+	return cpuhp_setup_state(CPUHP_TOPOLOGY_PREPARE,
+				 "base/topology:prepare", topology_add_dev,
+				 topology_remove_dev);
 }
 
 device_initcall(topology_sysfs_init);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 86b940f19df8..3410d83cc2e2 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -57,6 +57,7 @@ enum cpuhp_state {
 	CPUHP_SH_SH3X_PREPARE,
 	CPUHP_BLK_MQ_PREPARE,
 	CPUHP_NET_FLOW_PREPARE,
+	CPUHP_TOPOLOGY_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
-- 
cgit v1.2.3


From 9484ab1bf4464faae695321dd4fa66365beda74e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 10 Nov 2016 10:26:50 +1100
Subject: dax: Introduce IOMAP_FAULT flag

Introduce a flag telling iomap operations whether they are handling a
fault or other IO. That may influence behavior wrt inode size and
similar things.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/dax.c              | 4 ++--
 fs/iomap.c            | 5 +++--
 include/linux/iomap.h | 1 +
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 281e91a63367..28af41b9da3a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1266,7 +1266,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
 	sector_t sector;
 	struct iomap iomap = { 0 };
-	unsigned flags = 0;
+	unsigned flags = IOMAP_FAULT;
 	int error, major = 0;
 	int locked_status = 0;
 	void *entry;
@@ -1467,7 +1467,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	unsigned long pmd_addr = address & PMD_MASK;
 	bool write = flags & FAULT_FLAG_WRITE;
-	unsigned int iomap_flags = write ? IOMAP_WRITE : 0;
+	unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
 	struct inode *inode = mapping->host;
 	int result = VM_FAULT_FALLBACK;
 	struct iomap iomap = { 0 };
diff --git a/fs/iomap.c b/fs/iomap.c
index 013d1d36fbbf..51a02573405e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -468,8 +468,9 @@ int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	offset = page_offset(page);
 	while (length > 0) {
-		ret = iomap_apply(inode, offset, length, IOMAP_WRITE,
-				ops, page, iomap_page_mkwrite_actor);
+		ret = iomap_apply(inode, offset, length,
+				IOMAP_WRITE | IOMAP_FAULT, ops, page,
+				iomap_page_mkwrite_actor);
 		if (unlikely(ret <= 0))
 			goto out_unlock;
 		offset += ret;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index e63e288dee83..b9e7b8ec8c1d 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -44,6 +44,7 @@ struct iomap {
  */
 #define IOMAP_WRITE		(1 << 0)
 #define IOMAP_ZERO		(1 << 1)
+#define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
 
 struct iomap_ops {
 	/*
-- 
cgit v1.2.3


From 1ababeba4a21f3dba3da3523c670b207fb2feb62 Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Tue, 8 Nov 2016 14:57:39 +0100
Subject: ipv6: implement dataplane support for rthdr type 4 (Segment Routing
 Header)

Implement minimal support for processing of SR-enabled packets
as described in
https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-02.

This patch implements the following operations:
- Intermediate segment endpoint: incrementation of active segment and rerouting.
- Egress for SR-encapsulated packets: decapsulation of outer IPv6 header + SRH
  and routing of inner packet.
- Cleanup flag support for SR-inlined packets: removal of SRH if we are the
  penultimate segment endpoint.

A per-interface sysctl seg6_enabled is provided, to accept/deny SR-enabled
packets. Default is deny.

This patch does not provide support for HMAC-signed packets.

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h      |   1 +
 include/linux/seg6.h      |   6 ++
 include/net/seg6.h        |  36 ++++++++++
 include/uapi/linux/ipv6.h |   2 +
 include/uapi/linux/seg6.h |  54 ++++++++++++++
 net/ipv6/addrconf.c       |  10 +++
 net/ipv6/exthdrs.c        | 175 ++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 284 insertions(+)
 create mode 100644 include/linux/seg6.h
 create mode 100644 include/net/seg6.h
 create mode 100644 include/uapi/linux/seg6.h

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 1afb6e8d35c3..68d3f71f0abf 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -64,6 +64,7 @@ struct ipv6_devconf {
 	} stable_secret;
 	__s32		use_oif_addrs_only;
 	__s32		keep_addr_on_down;
+	__s32		seg6_enabled;
 
 	struct ctl_table_header *sysctl_header;
 };
diff --git a/include/linux/seg6.h b/include/linux/seg6.h
new file mode 100644
index 000000000000..7a66d2b4c5a6
--- /dev/null
+++ b/include/linux/seg6.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_H
+#define _LINUX_SEG6_H
+
+#include <uapi/linux/seg6.h>
+
+#endif
diff --git a/include/net/seg6.h b/include/net/seg6.h
new file mode 100644
index 000000000000..4dd52a7e95f1
--- /dev/null
+++ b/include/net/seg6.h
@@ -0,0 +1,36 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_SEG6_H
+#define _NET_SEG6_H
+
+static inline void update_csum_diff4(struct sk_buff *skb, __be32 from,
+				     __be32 to)
+{
+	__be32 diff[] = { ~from, to };
+
+	skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum);
+}
+
+static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from,
+				      __be32 *to)
+{
+	__be32 diff[] = {
+		~from[0], ~from[1], ~from[2], ~from[3],
+		to[0], to[1], to[2], to[3],
+	};
+
+	skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum);
+}
+
+#endif
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 8c2772340c3f..7ff1d654e333 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -39,6 +39,7 @@ struct in6_ifreq {
 #define IPV6_SRCRT_STRICT	0x01	/* Deprecated; will be removed */
 #define IPV6_SRCRT_TYPE_0	0	/* Deprecated; will be removed */
 #define IPV6_SRCRT_TYPE_2	2	/* IPv6 type 2 Routing Header	*/
+#define IPV6_SRCRT_TYPE_4	4	/* Segment Routing with IPv6 */
 
 /*
  *	routing header
@@ -178,6 +179,7 @@ enum {
 	DEVCONF_DROP_UNSOLICITED_NA,
 	DEVCONF_KEEP_ADDR_ON_DOWN,
 	DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
+	DEVCONF_SEG6_ENABLED,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h
new file mode 100644
index 000000000000..c396a8052f73
--- /dev/null
+++ b/include/uapi/linux/seg6.h
@@ -0,0 +1,54 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_H
+#define _UAPI_LINUX_SEG6_H
+
+/*
+ * SRH
+ */
+struct ipv6_sr_hdr {
+	__u8	nexthdr;
+	__u8	hdrlen;
+	__u8	type;
+	__u8	segments_left;
+	__u8	first_segment;
+	__u8	flag_1;
+	__u8	flag_2;
+	__u8	reserved;
+
+	struct in6_addr segments[0];
+};
+
+#define SR6_FLAG1_CLEANUP	(1 << 7)
+#define SR6_FLAG1_PROTECTED	(1 << 6)
+#define SR6_FLAG1_OAM		(1 << 5)
+#define SR6_FLAG1_ALERT		(1 << 4)
+#define SR6_FLAG1_HMAC		(1 << 3)
+
+#define SR6_TLV_INGRESS		1
+#define SR6_TLV_EGRESS		2
+#define SR6_TLV_OPAQUE		3
+#define SR6_TLV_PADDING		4
+#define SR6_TLV_HMAC		5
+
+#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP)
+#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC)
+
+struct sr6_tlv {
+	__u8 type;
+	__u8 len;
+	__u8 data[0];
+};
+
+#endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 060dd9922018..2ac6cb460af0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -238,6 +238,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.use_oif_addrs_only	= 0,
 	.ignore_routes_with_linkdown = 0,
 	.keep_addr_on_down	= 0,
+	.seg6_enabled		= 0,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -284,6 +285,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.use_oif_addrs_only	= 0,
 	.ignore_routes_with_linkdown = 0,
 	.keep_addr_on_down	= 0,
+	.seg6_enabled		= 0,
 };
 
 /* Check if a valid qdisc is available */
@@ -4944,6 +4946,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
 	array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
 	array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
+	array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -6035,6 +6038,13 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.proc_handler	= proc_dointvec,
 
 	},
+	{
+		.procname	= "seg6_enabled",
+		.data		= &ipv6_devconf.seg6_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		/* sentinel */
 	}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 139ceb68bd37..b8ba3961ff8a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -47,6 +47,8 @@
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 #include <net/xfrm.h>
 #endif
+#include <linux/seg6.h>
+#include <net/seg6.h>
 
 #include <linux/uaccess.h>
 
@@ -286,6 +288,175 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
 	return -1;
 }
 
+static void seg6_update_csum(struct sk_buff *skb)
+{
+	struct ipv6_sr_hdr *hdr;
+	struct in6_addr *addr;
+	__be32 from, to;
+
+	/* srh is at transport offset and seg_left is already decremented
+	 * but daddr is not yet updated with next segment
+	 */
+
+	hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+	addr = hdr->segments + hdr->segments_left;
+
+	hdr->segments_left++;
+	from = *(__be32 *)hdr;
+
+	hdr->segments_left--;
+	to = *(__be32 *)hdr;
+
+	/* update skb csum with diff resulting from seg_left decrement */
+
+	update_csum_diff4(skb, from, to);
+
+	/* compute csum diff between current and next segment and update */
+
+	update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr),
+			   (__be32 *)addr);
+}
+
+static int ipv6_srh_rcv(struct sk_buff *skb)
+{
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct net *net = dev_net(skb->dev);
+	struct ipv6_sr_hdr *hdr;
+	struct inet6_dev *idev;
+	struct in6_addr *addr;
+	bool cleanup = false;
+	int accept_seg6;
+
+	hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+	idev = __in6_dev_get(skb->dev);
+
+	accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
+	if (accept_seg6 > idev->cnf.seg6_enabled)
+		accept_seg6 = idev->cnf.seg6_enabled;
+
+	if (!accept_seg6) {
+		kfree_skb(skb);
+		return -1;
+	}
+
+looped_back:
+	if (hdr->segments_left > 0) {
+		if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 &&
+		    sr_has_cleanup(hdr))
+			cleanup = true;
+	} else {
+		if (hdr->nexthdr == NEXTHDR_IPV6) {
+			int offset = (hdr->hdrlen + 1) << 3;
+
+			skb_postpull_rcsum(skb, skb_network_header(skb),
+					   skb_network_header_len(skb));
+
+			if (!pskb_pull(skb, offset)) {
+				kfree_skb(skb);
+				return -1;
+			}
+			skb_postpull_rcsum(skb, skb_transport_header(skb),
+					   offset);
+
+			skb_reset_network_header(skb);
+			skb_reset_transport_header(skb);
+			skb->encapsulation = 0;
+
+			__skb_tunnel_rx(skb, skb->dev, net);
+
+			netif_rx(skb);
+			return -1;
+		}
+
+		opt->srcrt = skb_network_header_len(skb);
+		opt->lastopt = opt->srcrt;
+		skb->transport_header += (hdr->hdrlen + 1) << 3;
+		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
+
+		return 1;
+	}
+
+	if (hdr->segments_left >= (hdr->hdrlen >> 1)) {
+		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+				IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  ((&hdr->segments_left) -
+				   skb_network_header(skb)));
+		kfree_skb(skb);
+		return -1;
+	}
+
+	if (skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+					IPSTATS_MIB_OUTDISCARDS);
+			kfree_skb(skb);
+			return -1;
+		}
+	}
+
+	hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+	hdr->segments_left--;
+	addr = hdr->segments + hdr->segments_left;
+
+	skb_push(skb, sizeof(struct ipv6hdr));
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		seg6_update_csum(skb);
+
+	ipv6_hdr(skb)->daddr = *addr;
+
+	if (cleanup) {
+		int srhlen = (hdr->hdrlen + 1) << 3;
+		int nh = hdr->nexthdr;
+
+		skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen);
+		memmove(skb_network_header(skb) + srhlen,
+			skb_network_header(skb),
+			(unsigned char *)hdr - skb_network_header(skb));
+		skb->network_header += srhlen;
+		ipv6_hdr(skb)->nexthdr = nh;
+		ipv6_hdr(skb)->payload_len = htons(skb->len -
+						   sizeof(struct ipv6hdr));
+		skb_push_rcsum(skb, sizeof(struct ipv6hdr));
+	}
+
+	skb_dst_drop(skb);
+
+	ip6_route_input(skb);
+
+	if (skb_dst(skb)->error) {
+		dst_input(skb);
+		return -1;
+	}
+
+	if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
+		if (ipv6_hdr(skb)->hop_limit <= 1) {
+			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+					IPSTATS_MIB_INHDRERRORS);
+			icmpv6_send(skb, ICMPV6_TIME_EXCEED,
+				    ICMPV6_EXC_HOPLIMIT, 0);
+			kfree_skb(skb);
+			return -1;
+		}
+		ipv6_hdr(skb)->hop_limit--;
+
+		/* be sure that srh is still present before reinjecting */
+		if (!cleanup) {
+			skb_pull(skb, sizeof(struct ipv6hdr));
+			goto looped_back;
+		}
+		skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+		IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+	}
+
+	dst_input(skb);
+
+	return -1;
+}
+
 /********************************
   Routing header.
  ********************************/
@@ -326,6 +497,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 		return -1;
 	}
 
+	/* segment routing */
+	if (hdr->type == IPV6_SRCRT_TYPE_4)
+		return ipv6_srh_rcv(skb);
+
 looped_back:
 	if (hdr->segments_left == 0) {
 		switch (hdr->type) {
-- 
cgit v1.2.3


From 915d7e5e5930b4f01d0971d93b9b25ed17d221aa Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Tue, 8 Nov 2016 14:57:40 +0100
Subject: ipv6: sr: add code base for control plane support of SR-IPv6

This patch adds the necessary hooks and structures to provide support
for SR-IPv6 control plane, essentially the Generic Netlink commands
that will be used for userspace control over the Segment Routing
kernel structures.

The genetlink commands provide control over two different structures:
tunnel source and HMAC data. The tunnel source is the source address
that will be used by default when encapsulating packets into an
outer IPv6 header + SRH. If the tunnel source is set to :: then an
address of the outgoing interface will be selected as the source.

The HMAC commands currently just return ENOTSUPP and will be implemented
in a future patch.

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/seg6_genl.h      |   6 ++
 include/net/netns/ipv6.h       |   1 +
 include/net/seg6.h             |  16 +++
 include/uapi/linux/seg6_genl.h |  32 ++++++
 net/ipv6/Makefile              |   2 +-
 net/ipv6/af_inet6.c            |   9 +-
 net/ipv6/seg6.c                | 214 +++++++++++++++++++++++++++++++++++++++++
 7 files changed, 278 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/seg6_genl.h
 create mode 100644 include/uapi/linux/seg6_genl.h
 create mode 100644 net/ipv6/seg6.c

(limited to 'include/linux')

diff --git a/include/linux/seg6_genl.h b/include/linux/seg6_genl.h
new file mode 100644
index 000000000000..d6c3fb4f3734
--- /dev/null
+++ b/include/linux/seg6_genl.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_GENL_H
+#define _LINUX_SEG6_GENL_H
+
+#include <uapi/linux/seg6_genl.h>
+
+#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 10d0848f5b8a..de7745e2edcc 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -85,6 +85,7 @@ struct netns_ipv6 {
 #endif
 	atomic_t		dev_addr_genid;
 	atomic_t		fib6_sernum;
+	struct seg6_pernet_data *seg6_data;
 };
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/net/seg6.h b/include/net/seg6.h
index 4dd52a7e95f1..7c7b8ed39661 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -14,6 +14,9 @@
 #ifndef _NET_SEG6_H
 #define _NET_SEG6_H
 
+#include <linux/net.h>
+#include <linux/ipv6.h>
+
 static inline void update_csum_diff4(struct sk_buff *skb, __be32 from,
 				     __be32 to)
 {
@@ -33,4 +36,17 @@ static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from,
 	skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum);
 }
 
+struct seg6_pernet_data {
+	struct mutex lock;
+	struct in6_addr __rcu *tun_src;
+};
+
+static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
+{
+	return net->ipv6.seg6_data;
+}
+
+extern int seg6_init(void);
+extern void seg6_exit(void);
+
 #endif
diff --git a/include/uapi/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h
new file mode 100644
index 000000000000..fcf1c60d7df3
--- /dev/null
+++ b/include/uapi/linux/seg6_genl.h
@@ -0,0 +1,32 @@
+#ifndef _UAPI_LINUX_SEG6_GENL_H
+#define _UAPI_LINUX_SEG6_GENL_H
+
+#define SEG6_GENL_NAME		"SEG6"
+#define SEG6_GENL_VERSION	0x1
+
+enum {
+	SEG6_ATTR_UNSPEC,
+	SEG6_ATTR_DST,
+	SEG6_ATTR_DSTLEN,
+	SEG6_ATTR_HMACKEYID,
+	SEG6_ATTR_SECRET,
+	SEG6_ATTR_SECRETLEN,
+	SEG6_ATTR_ALGID,
+	SEG6_ATTR_HMACINFO,
+	__SEG6_ATTR_MAX,
+};
+
+#define SEG6_ATTR_MAX (__SEG6_ATTR_MAX - 1)
+
+enum {
+	SEG6_CMD_UNSPEC,
+	SEG6_CMD_SETHMAC,
+	SEG6_CMD_DUMPHMAC,
+	SEG6_CMD_SET_TUNSRC,
+	SEG6_CMD_GET_TUNSRC,
+	__SEG6_CMD_MAX,
+};
+
+#define SEG6_CMD_MAX (__SEG6_CMD_MAX - 1)
+
+#endif
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index c174ccb340a1..c92010d62afc 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
 		exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
-		udp_offload.o
+		udp_offload.o seg6.o
 
 ipv6-offload :=	ip6_offload.o tcpv6_offload.o exthdrs_offload.o
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c86911b63f8a..d424f3a3737a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -61,6 +61,7 @@
 #include <net/ip6_tunnel.h>
 #endif
 #include <net/calipso.h>
+#include <net/seg6.h>
 
 #include <asm/uaccess.h>
 #include <linux/mroute6.h>
@@ -991,6 +992,10 @@ static int __init inet6_init(void)
 	if (err)
 		goto calipso_fail;
 
+	err = seg6_init();
+	if (err)
+		goto seg6_fail;
+
 #ifdef CONFIG_SYSCTL
 	err = ipv6_sysctl_register();
 	if (err)
@@ -1001,8 +1006,10 @@ out:
 
 #ifdef CONFIG_SYSCTL
 sysctl_fail:
-	calipso_exit();
+	seg6_exit();
 #endif
+seg6_fail:
+	calipso_exit();
 calipso_fail:
 	pingv6_exit();
 pingv6_fail:
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
new file mode 100644
index 000000000000..e246b0ba12ac
--- /dev/null
+++ b/net/ipv6/seg6.c
@@ -0,0 +1,214 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *	  modify it under the terms of the GNU General Public License
+ *	  as published by the Free Software Foundation; either version
+ *	  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/slab.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+
+#include <net/seg6.h>
+#include <net/genetlink.h>
+#include <linux/seg6.h>
+#include <linux/seg6_genl.h>
+
+static struct genl_family seg6_genl_family;
+
+static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = {
+	[SEG6_ATTR_DST]				= { .type = NLA_BINARY,
+		.len = sizeof(struct in6_addr) },
+	[SEG6_ATTR_DSTLEN]			= { .type = NLA_S32, },
+	[SEG6_ATTR_HMACKEYID]		= { .type = NLA_U32, },
+	[SEG6_ATTR_SECRET]			= { .type = NLA_BINARY, },
+	[SEG6_ATTR_SECRETLEN]		= { .type = NLA_U8, },
+	[SEG6_ATTR_ALGID]			= { .type = NLA_U8, },
+	[SEG6_ATTR_HMACINFO]		= { .type = NLA_NESTED, },
+};
+
+static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
+{
+	return -ENOTSUPP;
+}
+
+static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct in6_addr *val, *t_old, *t_new;
+	struct seg6_pernet_data *sdata;
+
+	sdata = seg6_pernet(net);
+
+	if (!info->attrs[SEG6_ATTR_DST])
+		return -EINVAL;
+
+	val = nla_data(info->attrs[SEG6_ATTR_DST]);
+	t_new = kmemdup(val, sizeof(*val), GFP_KERNEL);
+
+	mutex_lock(&sdata->lock);
+
+	t_old = sdata->tun_src;
+	rcu_assign_pointer(sdata->tun_src, t_new);
+
+	mutex_unlock(&sdata->lock);
+
+	synchronize_net();
+	kfree(t_old);
+
+	return 0;
+}
+
+static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct in6_addr *tun_src;
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			  &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC);
+	if (!hdr)
+		goto free_msg;
+
+	rcu_read_lock();
+	tun_src = rcu_dereference(seg6_pernet(net)->tun_src);
+
+	if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src))
+		goto nla_put_failure;
+
+	rcu_read_unlock();
+
+	genlmsg_end(msg, hdr);
+	genlmsg_reply(msg, info);
+
+	return 0;
+
+nla_put_failure:
+	rcu_read_unlock();
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	nlmsg_free(msg);
+	return -ENOMEM;
+}
+
+static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return -ENOTSUPP;
+}
+
+static int __net_init seg6_net_init(struct net *net)
+{
+	struct seg6_pernet_data *sdata;
+
+	sdata = kzalloc(sizeof(*sdata), GFP_KERNEL);
+	if (!sdata)
+		return -ENOMEM;
+
+	mutex_init(&sdata->lock);
+
+	sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL);
+	if (!sdata->tun_src) {
+		kfree(sdata);
+		return -ENOMEM;
+	}
+
+	net->ipv6.seg6_data = sdata;
+
+	return 0;
+}
+
+static void __net_exit seg6_net_exit(struct net *net)
+{
+	struct seg6_pernet_data *sdata = seg6_pernet(net);
+
+	kfree(sdata->tun_src);
+	kfree(sdata);
+}
+
+static struct pernet_operations ip6_segments_ops = {
+	.init = seg6_net_init,
+	.exit = seg6_net_exit,
+};
+
+static const struct genl_ops seg6_genl_ops[] = {
+	{
+		.cmd	= SEG6_CMD_SETHMAC,
+		.doit	= seg6_genl_sethmac,
+		.policy	= seg6_genl_policy,
+		.flags	= GENL_ADMIN_PERM,
+	},
+	{
+		.cmd	= SEG6_CMD_DUMPHMAC,
+		.dumpit	= seg6_genl_dumphmac,
+		.policy	= seg6_genl_policy,
+		.flags	= GENL_ADMIN_PERM,
+	},
+	{
+		.cmd	= SEG6_CMD_SET_TUNSRC,
+		.doit	= seg6_genl_set_tunsrc,
+		.policy	= seg6_genl_policy,
+		.flags	= GENL_ADMIN_PERM,
+	},
+	{
+		.cmd	= SEG6_CMD_GET_TUNSRC,
+		.doit	= seg6_genl_get_tunsrc,
+		.policy = seg6_genl_policy,
+		.flags	= GENL_ADMIN_PERM,
+	},
+};
+
+static struct genl_family seg6_genl_family __ro_after_init = {
+	.hdrsize	= 0,
+	.name		= SEG6_GENL_NAME,
+	.version	= SEG6_GENL_VERSION,
+	.maxattr	= SEG6_ATTR_MAX,
+	.netnsok	= true,
+	.parallel_ops	= true,
+	.ops		= seg6_genl_ops,
+	.n_ops		= ARRAY_SIZE(seg6_genl_ops),
+	.module		= THIS_MODULE,
+};
+
+int __init seg6_init(void)
+{
+	int err = -ENOMEM;
+
+	err = genl_register_family(&seg6_genl_family);
+	if (err)
+		goto out;
+
+	err = register_pernet_subsys(&ip6_segments_ops);
+	if (err)
+		goto out_unregister_genl;
+
+	pr_info("Segment Routing with IPv6\n");
+
+out:
+	return err;
+out_unregister_genl:
+	genl_unregister_family(&seg6_genl_family);
+	goto out;
+}
+
+void seg6_exit(void)
+{
+	unregister_pernet_subsys(&ip6_segments_ops);
+	genl_unregister_family(&seg6_genl_family);
+}
-- 
cgit v1.2.3


From 6c8702c60b88651072460f3f4026c7dfe2521d12 Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Tue, 8 Nov 2016 14:57:41 +0100
Subject: ipv6: sr: add support for SRH encapsulation and injection with
 lwtunnels

This patch creates a new type of interfaceless lightweight tunnel (SEG6),
enabling the encapsulation and injection of SRH within locally emitted
packets and forwarded packets.

>From a configuration viewpoint, a seg6 tunnel would be configured as follows:

  ip -6 ro ad fc00::1/128 encap seg6 mode encap segs fc42::1,fc42::2,fc42::3 dev eth0

Any packet whose destination address is fc00::1 would thus be encapsulated
within an outer IPv6 header containing the SRH with three segments, and would
actually be routed to the first segment of the list. If `mode inline' was
specified instead of `mode encap', then the SRH would be directly inserted
after the IPv6 header without outer encapsulation.

The inline mode is only available if CONFIG_IPV6_SEG6_INLINE is enabled. This
feature was made configurable because direct header insertion may break
several mechanisms such as PMTUD or IPSec AH.

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/seg6_iptunnel.h      |   6 +
 include/net/seg6.h                 |   6 +
 include/uapi/linux/lwtunnel.h      |   1 +
 include/uapi/linux/seg6_iptunnel.h |  44 ++++
 net/core/lwtunnel.c                |   2 +
 net/ipv6/Kconfig                   |  12 ++
 net/ipv6/Makefile                  |   2 +-
 net/ipv6/seg6.c                    |  44 ++++
 net/ipv6/seg6_iptunnel.c           | 410 +++++++++++++++++++++++++++++++++++++
 9 files changed, 526 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/seg6_iptunnel.h
 create mode 100644 include/uapi/linux/seg6_iptunnel.h
 create mode 100644 net/ipv6/seg6_iptunnel.c

(limited to 'include/linux')

diff --git a/include/linux/seg6_iptunnel.h b/include/linux/seg6_iptunnel.h
new file mode 100644
index 000000000000..5377cf6a5a02
--- /dev/null
+++ b/include/linux/seg6_iptunnel.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_IPTUNNEL_H
+#define _LINUX_SEG6_IPTUNNEL_H
+
+#include <uapi/linux/seg6_iptunnel.h>
+
+#endif
diff --git a/include/net/seg6.h b/include/net/seg6.h
index 7c7b8ed39661..ff5da0ce83e9 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -16,6 +16,8 @@
 
 #include <linux/net.h>
 #include <linux/ipv6.h>
+#include <net/lwtunnel.h>
+#include <linux/seg6.h>
 
 static inline void update_csum_diff4(struct sk_buff *skb, __be32 from,
 				     __be32 to)
@@ -48,5 +50,9 @@ static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
 
 extern int seg6_init(void);
 extern void seg6_exit(void);
+extern int seg6_iptunnel_init(void);
+extern void seg6_iptunnel_exit(void);
+
+extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
 
 #endif
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index a478fe80e203..453cc6215bfd 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -9,6 +9,7 @@ enum lwtunnel_encap_types {
 	LWTUNNEL_ENCAP_IP,
 	LWTUNNEL_ENCAP_ILA,
 	LWTUNNEL_ENCAP_IP6,
+	LWTUNNEL_ENCAP_SEG6,
 	__LWTUNNEL_ENCAP_MAX,
 };
 
diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
new file mode 100644
index 000000000000..0f7dbd280a9c
--- /dev/null
+++ b/include/uapi/linux/seg6_iptunnel.h
@@ -0,0 +1,44 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_IPTUNNEL_H
+#define _UAPI_LINUX_SEG6_IPTUNNEL_H
+
+enum {
+	SEG6_IPTUNNEL_UNSPEC,
+	SEG6_IPTUNNEL_SRH,
+	__SEG6_IPTUNNEL_MAX,
+};
+#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1)
+
+struct seg6_iptunnel_encap {
+	int mode;
+	struct ipv6_sr_hdr srh[0];
+};
+
+#define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3))
+
+enum {
+	SEG6_IPTUN_MODE_INLINE,
+	SEG6_IPTUN_MODE_ENCAP,
+};
+
+static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
+{
+	int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP);
+
+	return ((tuninfo->srh->hdrlen + 1) << 3) +
+	       (encap * sizeof(struct ipv6hdr));
+}
+
+#endif
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 88fd64250b02..03976e939818 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -39,6 +39,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
 		return "MPLS";
 	case LWTUNNEL_ENCAP_ILA:
 		return "ILA";
+	case LWTUNNEL_ENCAP_SEG6:
+		return "SEG6";
 	case LWTUNNEL_ENCAP_IP6:
 	case LWTUNNEL_ENCAP_IP:
 	case LWTUNNEL_ENCAP_NONE:
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 2343e4f2e0bf..1123a001d729 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -289,4 +289,16 @@ config IPV6_PIMSM_V2
 	  Support for IPv6 PIM multicast routing protocol PIM-SMv2.
 	  If unsure, say N.
 
+config IPV6_SEG6_INLINE
+	bool "IPv6: direct Segment Routing Header insertion "
+	depends on IPV6
+	---help---
+	  Support for direct insertion of the Segment Routing Header,
+	  also known as inline mode. Be aware that direct insertion of
+	  extension headers (as opposed to encapsulation) may break
+	  multiple mechanisms such as PMTUD or IPSec AH. Use this feature
+	  only if you know exactly what you are doing.
+
+	  If unsure, say N.
+
 endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index c92010d62afc..59ee92fb3689 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
 		exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
-		udp_offload.o seg6.o
+		udp_offload.o seg6.o seg6_iptunnel.o
 
 ipv6-offload :=	ip6_offload.o tcpv6_offload.o exthdrs_offload.o
 
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index e246b0ba12ac..9c78053e67e0 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -26,6 +26,43 @@
 #include <linux/seg6.h>
 #include <linux/seg6_genl.h>
 
+bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
+{
+	int trailing;
+	unsigned int tlv_offset;
+
+	if (srh->type != IPV6_SRCRT_TYPE_4)
+		return false;
+
+	if (((srh->hdrlen + 1) << 3) != len)
+		return false;
+
+	if (srh->segments_left != srh->first_segment)
+		return false;
+
+	tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
+
+	trailing = len - tlv_offset;
+	if (trailing < 0)
+		return false;
+
+	while (trailing) {
+		struct sr6_tlv *tlv;
+		unsigned int tlv_len;
+
+		tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset);
+		tlv_len = sizeof(*tlv) + tlv->len;
+
+		trailing -= tlv_len;
+		if (trailing < 0)
+			return false;
+
+		tlv_offset += tlv_len;
+	}
+
+	return true;
+}
+
 static struct genl_family seg6_genl_family;
 
 static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = {
@@ -198,10 +235,16 @@ int __init seg6_init(void)
 	if (err)
 		goto out_unregister_genl;
 
+	err = seg6_iptunnel_init();
+	if (err)
+		goto out_unregister_pernet;
+
 	pr_info("Segment Routing with IPv6\n");
 
 out:
 	return err;
+out_unregister_pernet:
+	unregister_pernet_subsys(&ip6_segments_ops);
 out_unregister_genl:
 	genl_unregister_family(&seg6_genl_family);
 	goto out;
@@ -209,6 +252,7 @@ out_unregister_genl:
 
 void seg6_exit(void)
 {
+	seg6_iptunnel_exit();
 	unregister_pernet_subsys(&ip6_segments_ops);
 	genl_unregister_family(&seg6_genl_family);
 }
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
new file mode 100644
index 000000000000..39762b2fa7a2
--- /dev/null
+++ b/net/ipv6/seg6_iptunnel.c
@@ -0,0 +1,410 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *        modify it under the terms of the GNU General Public License
+ *        as published by the Free Software Foundation; either version
+ *        2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/lwtunnel.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include <net/ip6_fib.h>
+#include <net/route.h>
+#include <net/seg6.h>
+#include <linux/seg6.h>
+#include <linux/seg6_iptunnel.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#ifdef CONFIG_DST_CACHE
+#include <net/dst_cache.h>
+#endif
+
+struct seg6_lwt {
+#ifdef CONFIG_DST_CACHE
+	struct dst_cache cache;
+#endif
+	struct seg6_iptunnel_encap tuninfo[0];
+};
+
+static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
+{
+	return (struct seg6_lwt *)lwt->data;
+}
+
+static inline struct seg6_iptunnel_encap *
+seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
+{
+	return seg6_lwt_lwtunnel(lwt)->tuninfo;
+}
+
+static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
+	[SEG6_IPTUNNEL_SRH]	= { .type = NLA_BINARY },
+};
+
+int nla_put_srh(struct sk_buff *skb, int attrtype,
+		struct seg6_iptunnel_encap *tuninfo)
+{
+	struct seg6_iptunnel_encap *data;
+	struct nlattr *nla;
+	int len;
+
+	len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
+
+	nla = nla_reserve(skb, attrtype, len);
+	if (!nla)
+		return -EMSGSIZE;
+
+	data = nla_data(nla);
+	memcpy(data, tuninfo, len);
+
+	return 0;
+}
+
+static void set_tun_src(struct net *net, struct net_device *dev,
+			struct in6_addr *daddr, struct in6_addr *saddr)
+{
+	struct seg6_pernet_data *sdata = seg6_pernet(net);
+	struct in6_addr *tun_src;
+
+	rcu_read_lock();
+
+	tun_src = rcu_dereference(sdata->tun_src);
+
+	if (!ipv6_addr_any(tun_src)) {
+		memcpy(saddr, tun_src, sizeof(struct in6_addr));
+	} else {
+		ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
+				   saddr);
+	}
+
+	rcu_read_unlock();
+}
+
+/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+	struct net *net = dev_net(skb_dst(skb)->dev);
+	struct ipv6hdr *hdr, *inner_hdr;
+	struct ipv6_sr_hdr *isrh;
+	int hdrlen, tot_len, err;
+
+	hdrlen = (osrh->hdrlen + 1) << 3;
+	tot_len = hdrlen + sizeof(*hdr);
+
+	err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC);
+	if (unlikely(err))
+		return err;
+
+	inner_hdr = ipv6_hdr(skb);
+
+	skb_push(skb, tot_len);
+	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+	hdr = ipv6_hdr(skb);
+
+	/* inherit tc, flowlabel and hlim
+	 * hlim will be decremented in ip6_forward() afterwards and
+	 * decapsulation will overwrite inner hlim with outer hlim
+	 */
+	ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+		     ip6_flowlabel(inner_hdr));
+	hdr->hop_limit = inner_hdr->hop_limit;
+	hdr->nexthdr = NEXTHDR_ROUTING;
+
+	isrh = (void *)hdr + sizeof(*hdr);
+	memcpy(isrh, osrh, hdrlen);
+
+	isrh->nexthdr = NEXTHDR_IPV6;
+
+	hdr->daddr = isrh->segments[isrh->first_segment];
+	set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
+
+	skb_postpush_rcsum(skb, hdr, tot_len);
+
+	return 0;
+}
+
+/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+#ifdef CONFIG_IPV6_SEG6_INLINE
+static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+	struct ipv6hdr *hdr, *oldhdr;
+	struct ipv6_sr_hdr *isrh;
+	int hdrlen, err;
+
+	hdrlen = (osrh->hdrlen + 1) << 3;
+
+	err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC);
+	if (unlikely(err))
+		return err;
+
+	oldhdr = ipv6_hdr(skb);
+
+	skb_pull(skb, sizeof(struct ipv6hdr));
+	skb_postpull_rcsum(skb, skb_network_header(skb),
+			   sizeof(struct ipv6hdr));
+
+	skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
+	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
+	hdr = ipv6_hdr(skb);
+
+	memmove(hdr, oldhdr, sizeof(*hdr));
+
+	isrh = (void *)hdr + sizeof(*hdr);
+	memcpy(isrh, osrh, hdrlen);
+
+	isrh->nexthdr = hdr->nexthdr;
+	hdr->nexthdr = NEXTHDR_ROUTING;
+
+	isrh->segments[0] = hdr->daddr;
+	hdr->daddr = isrh->segments[isrh->first_segment];
+
+	skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
+
+	return 0;
+}
+#endif
+
+static int seg6_do_srh(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct seg6_iptunnel_encap *tinfo;
+	int err = 0;
+
+	tinfo = seg6_encap_lwtunnel(dst->lwtstate);
+
+	if (likely(!skb->encapsulation)) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+	}
+
+	switch (tinfo->mode) {
+#ifdef CONFIG_IPV6_SEG6_INLINE
+	case SEG6_IPTUN_MODE_INLINE:
+		err = seg6_do_srh_inline(skb, tinfo->srh);
+		skb_reset_inner_headers(skb);
+		break;
+#endif
+	case SEG6_IPTUN_MODE_ENCAP:
+		err = seg6_do_srh_encap(skb, tinfo->srh);
+		break;
+	}
+
+	if (err)
+		return err;
+
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+	skb_set_inner_protocol(skb, skb->protocol);
+
+	return 0;
+}
+
+int seg6_input(struct sk_buff *skb)
+{
+	int err;
+
+	err = seg6_do_srh(skb);
+	if (unlikely(err)) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	skb_dst_drop(skb);
+	ip6_route_input(skb);
+
+	return dst_input(skb);
+}
+
+int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	struct dst_entry *orig_dst = skb_dst(skb);
+	struct dst_entry *dst = NULL;
+	struct seg6_lwt *slwt;
+	int err = -EINVAL;
+
+	err = seg6_do_srh(skb);
+	if (unlikely(err))
+		goto drop;
+
+	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
+#ifdef CONFIG_DST_CACHE
+	dst = dst_cache_get(&slwt->cache);
+#endif
+
+	if (unlikely(!dst)) {
+		struct ipv6hdr *hdr = ipv6_hdr(skb);
+		struct flowi6 fl6;
+
+		fl6.daddr = hdr->daddr;
+		fl6.saddr = hdr->saddr;
+		fl6.flowlabel = ip6_flowinfo(hdr);
+		fl6.flowi6_mark = skb->mark;
+		fl6.flowi6_proto = hdr->nexthdr;
+
+		dst = ip6_route_output(net, NULL, &fl6);
+		if (dst->error) {
+			err = dst->error;
+			dst_release(dst);
+			goto drop;
+		}
+
+#ifdef CONFIG_DST_CACHE
+		dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+#endif
+	}
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	return dst_output(net, sk, skb);
+drop:
+	kfree_skb(skb);
+	return err;
+}
+
+static int seg6_build_state(struct net_device *dev, struct nlattr *nla,
+			    unsigned int family, const void *cfg,
+			    struct lwtunnel_state **ts)
+{
+	struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
+	struct seg6_iptunnel_encap *tuninfo;
+	struct lwtunnel_state *newts;
+	int tuninfo_len, min_size;
+	struct seg6_lwt *slwt;
+	int err;
+
+	err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
+			       seg6_iptunnel_policy);
+
+	if (err < 0)
+		return err;
+
+	if (!tb[SEG6_IPTUNNEL_SRH])
+		return -EINVAL;
+
+	tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
+	tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
+
+	/* tuninfo must contain at least the iptunnel encap structure,
+	 * the SRH and one segment
+	 */
+	min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
+		   sizeof(struct in6_addr);
+	if (tuninfo_len < min_size)
+		return -EINVAL;
+
+	switch (tuninfo->mode) {
+#ifdef CONFIG_IPV6_SEG6_INLINE
+	case SEG6_IPTUN_MODE_INLINE:
+		break;
+#endif
+	case SEG6_IPTUN_MODE_ENCAP:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* verify that SRH is consistent */
+	if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
+		return -EINVAL;
+
+	newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
+	if (!newts)
+		return -ENOMEM;
+
+	slwt = seg6_lwt_lwtunnel(newts);
+
+#ifdef CONFIG_DST_CACHE
+	err = dst_cache_init(&slwt->cache, GFP_KERNEL);
+	if (err) {
+		kfree(newts);
+		return err;
+	}
+#endif
+
+	memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
+
+	newts->type = LWTUNNEL_ENCAP_SEG6;
+	newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
+			LWTUNNEL_STATE_INPUT_REDIRECT;
+	newts->headroom = seg6_lwt_headroom(tuninfo);
+
+	*ts = newts;
+
+	return 0;
+}
+
+#ifdef CONFIG_DST_CACHE
+static void seg6_destroy_state(struct lwtunnel_state *lwt)
+{
+	dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
+}
+#endif
+
+static int seg6_fill_encap_info(struct sk_buff *skb,
+				struct lwtunnel_state *lwtstate)
+{
+	struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
+
+	if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+	struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
+
+	return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
+}
+
+static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+	struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
+	struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
+	int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
+
+	if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
+		return 1;
+
+	return memcmp(a_hdr, b_hdr, len);
+}
+
+static const struct lwtunnel_encap_ops seg6_iptun_ops = {
+	.build_state = seg6_build_state,
+#ifdef CONFIG_DST_CACHE
+	.destroy_state = seg6_destroy_state,
+#endif
+	.output = seg6_output,
+	.input = seg6_input,
+	.fill_encap = seg6_fill_encap_info,
+	.get_encap_size = seg6_encap_nlsize,
+	.cmp_encap = seg6_encap_cmp,
+};
+
+int __init seg6_iptunnel_init(void)
+{
+	return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
+}
+
+void seg6_iptunnel_exit(void)
+{
+	lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
+}
-- 
cgit v1.2.3


From bf355b8d2c30a289232042cacc1cfaea4923936c Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Tue, 8 Nov 2016 14:57:42 +0100
Subject: ipv6: sr: add core files for SR HMAC support

This patch adds the necessary functions to compute and check the HMAC signature
of an SR-enabled packet. Two HMAC algorithms are supported: hmac(sha1) and
hmac(sha256).

In order to avoid dynamic memory allocation for each HMAC computation,
a per-cpu ring buffer is allocated for this purpose.

A new per-interface sysctl called seg6_require_hmac is added, allowing a
user-defined policy for processing HMAC-signed SR-enabled packets.
A value of -1 means that the HMAC field will always be ignored.
A value of 0 means that if an HMAC field is present, its validity will
be enforced (the packet is dropped is the signature is incorrect).
Finally, a value of 1 means that any SR-enabled packet that does not
contain an HMAC signature or whose signature is incorrect will be dropped.

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h           |   3 +
 include/linux/seg6_hmac.h      |   6 +
 include/net/seg6.h             |   4 +
 include/net/seg6_hmac.h        |  62 ++++++
 include/uapi/linux/ipv6.h      |   1 +
 include/uapi/linux/seg6_hmac.h |  21 ++
 net/ipv6/Kconfig               |  12 +
 net/ipv6/Makefile              |   1 +
 net/ipv6/addrconf.c            |  18 ++
 net/ipv6/seg6_hmac.c           | 484 +++++++++++++++++++++++++++++++++++++++++
 10 files changed, 612 insertions(+)
 create mode 100644 include/linux/seg6_hmac.h
 create mode 100644 include/net/seg6_hmac.h
 create mode 100644 include/uapi/linux/seg6_hmac.h
 create mode 100644 net/ipv6/seg6_hmac.c

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 68d3f71f0abf..93756585521f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -65,6 +65,9 @@ struct ipv6_devconf {
 	__s32		use_oif_addrs_only;
 	__s32		keep_addr_on_down;
 	__s32		seg6_enabled;
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	__s32		seg6_require_hmac;
+#endif
 
 	struct ctl_table_header *sysctl_header;
 };
diff --git a/include/linux/seg6_hmac.h b/include/linux/seg6_hmac.h
new file mode 100644
index 000000000000..da437ebdc6cd
--- /dev/null
+++ b/include/linux/seg6_hmac.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_HMAC_H
+#define _LINUX_SEG6_HMAC_H
+
+#include <uapi/linux/seg6_hmac.h>
+
+#endif
diff --git a/include/net/seg6.h b/include/net/seg6.h
index ff5da0ce83e9..4e0357517d79 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -18,6 +18,7 @@
 #include <linux/ipv6.h>
 #include <net/lwtunnel.h>
 #include <linux/seg6.h>
+#include <linux/rhashtable.h>
 
 static inline void update_csum_diff4(struct sk_buff *skb, __be32 from,
 				     __be32 to)
@@ -41,6 +42,9 @@ static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from,
 struct seg6_pernet_data {
 	struct mutex lock;
 	struct in6_addr __rcu *tun_src;
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	struct rhashtable hmac_infos;
+#endif
 };
 
 static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h
new file mode 100644
index 000000000000..69c3a106056b
--- /dev/null
+++ b/include/net/seg6_hmac.h
@@ -0,0 +1,62 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_SEG6_HMAC_H
+#define _NET_SEG6_HMAC_H
+
+#include <net/flow.h>
+#include <net/ip6_fib.h>
+#include <net/sock.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/route.h>
+#include <net/seg6.h>
+#include <linux/seg6_hmac.h>
+#include <linux/rhashtable.h>
+
+#define SEG6_HMAC_MAX_DIGESTSIZE	160
+#define SEG6_HMAC_RING_SIZE		256
+
+struct seg6_hmac_info {
+	struct rhash_head node;
+	struct rcu_head rcu;
+
+	u32 hmackeyid;
+	char secret[SEG6_HMAC_SECRET_LEN];
+	u8 slen;
+	u8 alg_id;
+};
+
+struct seg6_hmac_algo {
+	u8 alg_id;
+	char name[64];
+	struct crypto_shash * __percpu *tfms;
+	struct shash_desc * __percpu *shashs;
+};
+
+extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo,
+			     struct ipv6_sr_hdr *hdr, struct in6_addr *saddr,
+			     u8 *output);
+extern struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key);
+extern int seg6_hmac_info_add(struct net *net, u32 key,
+			      struct seg6_hmac_info *hinfo);
+extern int seg6_hmac_info_del(struct net *net, u32 key);
+extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
+			  struct ipv6_sr_hdr *srh);
+extern bool seg6_hmac_validate_skb(struct sk_buff *skb);
+extern int seg6_hmac_init(void);
+extern void seg6_hmac_exit(void);
+extern int seg6_hmac_net_init(struct net *net);
+extern void seg6_hmac_net_exit(struct net *net);
+
+#endif
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 7ff1d654e333..53561be1ac21 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -180,6 +180,7 @@ enum {
 	DEVCONF_KEEP_ADDR_ON_DOWN,
 	DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
 	DEVCONF_SEG6_ENABLED,
+	DEVCONF_SEG6_REQUIRE_HMAC,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h
new file mode 100644
index 000000000000..b652dfd51bc5
--- /dev/null
+++ b/include/uapi/linux/seg6_hmac.h
@@ -0,0 +1,21 @@
+#ifndef _UAPI_LINUX_SEG6_HMAC_H
+#define _UAPI_LINUX_SEG6_HMAC_H
+
+#include <linux/seg6.h>
+
+#define SEG6_HMAC_SECRET_LEN	64
+#define SEG6_HMAC_FIELD_LEN	32
+
+struct sr6_tlv_hmac {
+	struct sr6_tlv tlvhdr;
+	__u16 reserved;
+	__be32 hmackeyid;
+	__u8 hmac[SEG6_HMAC_FIELD_LEN];
+};
+
+enum {
+	SEG6_HMAC_ALGO_SHA1 = 1,
+	SEG6_HMAC_ALGO_SHA256 = 2,
+};
+
+#endif
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 1123a001d729..0f00811a785f 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -301,4 +301,16 @@ config IPV6_SEG6_INLINE
 
 	  If unsure, say N.
 
+config IPV6_SEG6_HMAC
+	bool "IPv6: Segment Routing HMAC support"
+	depends on IPV6
+	select CRYPTO_HMAC
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	---help---
+	  Support for HMAC signature generation and verification
+	  of SR-enabled packets.
+
+	  If unsure, say N.
+
 endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 59ee92fb3689..129cad2ba960 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
 obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
 obj-$(CONFIG_IPV6_FOU) += fou6.o
+obj-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
 
 obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
 obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2ac6cb460af0..86219c0a0104 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -239,6 +239,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.ignore_routes_with_linkdown = 0,
 	.keep_addr_on_down	= 0,
 	.seg6_enabled		= 0,
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	.seg6_require_hmac	= 0,
+#endif
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -286,6 +289,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.ignore_routes_with_linkdown = 0,
 	.keep_addr_on_down	= 0,
 	.seg6_enabled		= 0,
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	.seg6_require_hmac	= 0,
+#endif
 };
 
 /* Check if a valid qdisc is available */
@@ -4947,6 +4953,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
 	array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
 	array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
+#endif
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -6045,6 +6054,15 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	{
+		.procname	= "seg6_require_hmac",
+		.data		= &ipv6_devconf.seg6_require_hmac,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 	{
 		/* sentinel */
 	}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
new file mode 100644
index 000000000000..ef1c8a46e7ac
--- /dev/null
+++ b/net/ipv6/seg6_hmac.c
@@ -0,0 +1,484 @@
+/*
+ *  SR-IPv6 implementation -- HMAC functions
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/mroute6.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+
+#include <linux/cryptohash.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <net/seg6.h>
+#include <net/genetlink.h>
+#include <net/seg6_hmac.h>
+#include <linux/random.h>
+
+static char * __percpu *hmac_ring;
+
+static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+	const struct seg6_hmac_info *hinfo = obj;
+
+	return (hinfo->hmackeyid != *(__u32 *)arg->key);
+}
+
+static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo)
+{
+	kfree_rcu(hinfo, rcu);
+}
+
+static void seg6_free_hi(void *ptr, void *arg)
+{
+	struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr;
+
+	if (hinfo)
+		seg6_hinfo_release(hinfo);
+}
+
+static const struct rhashtable_params rht_params = {
+	.head_offset		= offsetof(struct seg6_hmac_info, node),
+	.key_offset		= offsetof(struct seg6_hmac_info, hmackeyid),
+	.key_len		= sizeof(u32),
+	.automatic_shrinking	= true,
+	.obj_cmpfn		= seg6_hmac_cmpfn,
+};
+
+static struct seg6_hmac_algo hmac_algos[] = {
+	{
+		.alg_id = SEG6_HMAC_ALGO_SHA1,
+		.name = "hmac(sha1)",
+	},
+	{
+		.alg_id = SEG6_HMAC_ALGO_SHA256,
+		.name = "hmac(sha256)",
+	},
+};
+
+static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh)
+{
+	struct sr6_tlv_hmac *tlv;
+
+	if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5)
+		return NULL;
+
+	if (!sr_has_hmac(srh))
+		return NULL;
+
+	tlv = (struct sr6_tlv_hmac *)
+	      ((char *)srh + ((srh->hdrlen + 1) << 3) - 40);
+
+	if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38)
+		return NULL;
+
+	return tlv;
+}
+
+static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id)
+{
+	struct seg6_hmac_algo *algo;
+	int i, alg_count;
+
+	alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+	for (i = 0; i < alg_count; i++) {
+		algo = &hmac_algos[i];
+		if (algo->alg_id == alg_id)
+			return algo;
+	}
+
+	return NULL;
+}
+
+static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize,
+		     u8 *output, int outlen)
+{
+	struct seg6_hmac_algo *algo;
+	struct crypto_shash *tfm;
+	struct shash_desc *shash;
+	int ret, dgsize;
+
+	algo = __hmac_get_algo(hinfo->alg_id);
+	if (!algo)
+		return -ENOENT;
+
+	tfm = *this_cpu_ptr(algo->tfms);
+
+	dgsize = crypto_shash_digestsize(tfm);
+	if (dgsize > outlen) {
+		pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n",
+			 dgsize, outlen);
+		return -ENOMEM;
+	}
+
+	ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen);
+	if (ret < 0) {
+		pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret);
+		goto failed;
+	}
+
+	shash = *this_cpu_ptr(algo->shashs);
+	shash->tfm = tfm;
+
+	ret = crypto_shash_digest(shash, text, psize, output);
+	if (ret < 0) {
+		pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret);
+		goto failed;
+	}
+
+	return dgsize;
+
+failed:
+	return ret;
+}
+
+int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
+		      struct in6_addr *saddr, u8 *output)
+{
+	__be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid);
+	u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE];
+	int plen, i, dgsize, wrsize;
+	char *ring, *off;
+
+	/* a 160-byte buffer for digest output allows to store highest known
+	 * hash function (RadioGatun) with up to 1216 bits
+	 */
+
+	/* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */
+	plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16;
+
+	/* this limit allows for 14 segments */
+	if (plen >= SEG6_HMAC_RING_SIZE)
+		return -EMSGSIZE;
+
+	/* Let's build the HMAC text on the ring buffer. The text is composed
+	 * as follows, in order:
+	 *
+	 * 1. Source IPv6 address (128 bits)
+	 * 2. first_segment value (8 bits)
+	 * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0)
+	 * 4. HMAC Key ID (32 bits)
+	 * 5. All segments in the segments list (n * 128 bits)
+	 */
+
+	local_bh_disable();
+	ring = *this_cpu_ptr(hmac_ring);
+	off = ring;
+
+	/* source address */
+	memcpy(off, saddr, 16);
+	off += 16;
+
+	/* first_segment value */
+	*off++ = hdr->first_segment;
+
+	/* cleanup flag */
+	*off++ = !!(sr_has_cleanup(hdr)) << 7;
+
+	/* HMAC Key ID */
+	memcpy(off, &hmackeyid, 4);
+	off += 4;
+
+	/* all segments in the list */
+	for (i = 0; i < hdr->first_segment + 1; i++) {
+		memcpy(off, hdr->segments + i, 16);
+		off += 16;
+	}
+
+	dgsize = __do_hmac(hinfo, ring, plen, tmp_out,
+			   SEG6_HMAC_MAX_DIGESTSIZE);
+	local_bh_enable();
+
+	if (dgsize < 0)
+		return dgsize;
+
+	wrsize = SEG6_HMAC_FIELD_LEN;
+	if (wrsize > dgsize)
+		wrsize = dgsize;
+
+	memset(output, 0, SEG6_HMAC_FIELD_LEN);
+	memcpy(output, tmp_out, wrsize);
+
+	return 0;
+}
+EXPORT_SYMBOL(seg6_hmac_compute);
+
+/* checks if an incoming SR-enabled packet's HMAC status matches
+ * the incoming policy.
+ *
+ * called with rcu_read_lock()
+ */
+bool seg6_hmac_validate_skb(struct sk_buff *skb)
+{
+	u8 hmac_output[SEG6_HMAC_FIELD_LEN];
+	struct net *net = dev_net(skb->dev);
+	struct seg6_hmac_info *hinfo;
+	struct sr6_tlv_hmac *tlv;
+	struct ipv6_sr_hdr *srh;
+	struct inet6_dev *idev;
+
+	idev = __in6_dev_get(skb->dev);
+
+	srh = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+	tlv = seg6_get_tlv_hmac(srh);
+
+	/* mandatory check but no tlv */
+	if (idev->cnf.seg6_require_hmac > 0 && !tlv)
+		return false;
+
+	/* no check */
+	if (idev->cnf.seg6_require_hmac < 0)
+		return true;
+
+	/* check only if present */
+	if (idev->cnf.seg6_require_hmac == 0 && !tlv)
+		return true;
+
+	/* now, seg6_require_hmac >= 0 && tlv */
+
+	hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid));
+	if (!hinfo)
+		return false;
+
+	if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output))
+		return false;
+
+	if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL(seg6_hmac_validate_skb);
+
+/* called with rcu_read_lock() */
+struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key)
+{
+	struct seg6_pernet_data *sdata = seg6_pernet(net);
+	struct seg6_hmac_info *hinfo;
+
+	hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params);
+
+	return hinfo;
+}
+EXPORT_SYMBOL(seg6_hmac_info_lookup);
+
+int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo)
+{
+	struct seg6_pernet_data *sdata = seg6_pernet(net);
+	int err;
+
+	err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node,
+					    rht_params);
+
+	return err;
+}
+EXPORT_SYMBOL(seg6_hmac_info_add);
+
+int seg6_hmac_info_del(struct net *net, u32 key)
+{
+	struct seg6_pernet_data *sdata = seg6_pernet(net);
+	struct seg6_hmac_info *hinfo;
+	int err = -ENOENT;
+
+	hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params);
+	if (!hinfo)
+		goto out;
+
+	err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node,
+				     rht_params);
+	if (err)
+		goto out;
+
+	seg6_hinfo_release(hinfo);
+
+out:
+	return err;
+}
+EXPORT_SYMBOL(seg6_hmac_info_del);
+
+int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
+		   struct ipv6_sr_hdr *srh)
+{
+	struct seg6_hmac_info *hinfo;
+	struct sr6_tlv_hmac *tlv;
+	int err = -ENOENT;
+
+	tlv = seg6_get_tlv_hmac(srh);
+	if (!tlv)
+		return -EINVAL;
+
+	rcu_read_lock();
+
+	hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid));
+	if (!hinfo)
+		goto out;
+
+	memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN);
+	err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac);
+
+out:
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(seg6_push_hmac);
+
+static int seg6_hmac_init_ring(void)
+{
+	int i;
+
+	hmac_ring = alloc_percpu(char *);
+
+	if (!hmac_ring)
+		return -ENOMEM;
+
+	for_each_possible_cpu(i) {
+		char *ring = kzalloc(SEG6_HMAC_RING_SIZE, GFP_KERNEL);
+
+		if (!ring)
+			return -ENOMEM;
+
+		*per_cpu_ptr(hmac_ring, i) = ring;
+	}
+
+	return 0;
+}
+
+static int seg6_hmac_init_algo(void)
+{
+	struct seg6_hmac_algo *algo;
+	struct crypto_shash *tfm;
+	struct shash_desc *shash;
+	int i, alg_count, cpu;
+
+	alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+
+	for (i = 0; i < alg_count; i++) {
+		struct crypto_shash **p_tfm;
+		int shsize;
+
+		algo = &hmac_algos[i];
+		algo->tfms = alloc_percpu(struct crypto_shash *);
+		if (!algo->tfms)
+			return -ENOMEM;
+
+		for_each_possible_cpu(cpu) {
+			tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL);
+			if (IS_ERR(tfm))
+				return PTR_ERR(tfm);
+			p_tfm = per_cpu_ptr(algo->tfms, cpu);
+			*p_tfm = tfm;
+		}
+
+		p_tfm = this_cpu_ptr(algo->tfms);
+		tfm = *p_tfm;
+
+		shsize = sizeof(*shash) + crypto_shash_descsize(tfm);
+
+		algo->shashs = alloc_percpu(struct shash_desc *);
+		if (!algo->shashs)
+			return -ENOMEM;
+
+		for_each_possible_cpu(cpu) {
+			shash = kzalloc(shsize, GFP_KERNEL);
+			if (!shash)
+				return -ENOMEM;
+			*per_cpu_ptr(algo->shashs, cpu) = shash;
+		}
+	}
+
+	return 0;
+}
+
+int __init seg6_hmac_init(void)
+{
+	int ret;
+
+	ret = seg6_hmac_init_ring();
+	if (ret < 0)
+		goto out;
+
+	ret = seg6_hmac_init_algo();
+
+out:
+	return ret;
+}
+EXPORT_SYMBOL(seg6_hmac_init);
+
+int __net_init seg6_hmac_net_init(struct net *net)
+{
+	struct seg6_pernet_data *sdata = seg6_pernet(net);
+
+	rhashtable_init(&sdata->hmac_infos, &rht_params);
+
+	return 0;
+}
+EXPORT_SYMBOL(seg6_hmac_net_init);
+
+void seg6_hmac_exit(void)
+{
+	struct seg6_hmac_algo *algo = NULL;
+	int i, alg_count, cpu;
+
+	for_each_possible_cpu(i) {
+		char *ring = *per_cpu_ptr(hmac_ring, i);
+
+		kfree(ring);
+	}
+	free_percpu(hmac_ring);
+
+	alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+	for (i = 0; i < alg_count; i++) {
+		algo = &hmac_algos[i];
+		for_each_possible_cpu(cpu) {
+			struct crypto_shash *tfm;
+			struct shash_desc *shash;
+
+			shash = *per_cpu_ptr(algo->shashs, cpu);
+			kfree(shash);
+			tfm = *per_cpu_ptr(algo->tfms, cpu);
+			crypto_free_shash(tfm);
+		}
+		free_percpu(algo->tfms);
+		free_percpu(algo->shashs);
+	}
+}
+EXPORT_SYMBOL(seg6_hmac_exit);
+
+void __net_exit seg6_hmac_net_exit(struct net *net)
+{
+	struct seg6_pernet_data *sdata = seg6_pernet(net);
+
+	rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL);
+}
+EXPORT_SYMBOL(seg6_hmac_net_exit);
-- 
cgit v1.2.3


From 149d6ad83663b4820ca09c9d40b1eea7f5c22c2b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 8 Nov 2016 11:07:28 -0800
Subject: net: napi_hash_add() is no longer exported

There are no more users except from net/core/dev.c
napi_hash_add() can now be static.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 11 -----------
 net/core/dev.c            |  3 +--
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 66fd61c681d9..d64135a0ab71 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -467,17 +467,6 @@ static inline void napi_complete(struct napi_struct *n)
 	return napi_complete_done(n, 0);
 }
 
-/**
- *	napi_hash_add - add a NAPI to global hashtable
- *	@napi: NAPI context
- *
- * Generate a new napi_id and store a @napi under it in napi_hash.
- * Used for busy polling (CONFIG_NET_RX_BUSY_POLL).
- * Note: This is normally automatically done from netif_napi_add(),
- * so might disappear in a future Linux version.
- */
-void napi_hash_add(struct napi_struct *napi);
-
 /**
  *	napi_hash_del - remove a NAPI from global table
  *	@napi: NAPI context
diff --git a/net/core/dev.c b/net/core/dev.c
index c9837fa08dfc..7385c1a152fd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5017,7 +5017,7 @@ EXPORT_SYMBOL(sk_busy_loop);
 
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 
-void napi_hash_add(struct napi_struct *napi)
+static void napi_hash_add(struct napi_struct *napi)
 {
 	if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
 	    test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
@@ -5037,7 +5037,6 @@ void napi_hash_add(struct napi_struct *napi)
 
 	spin_unlock(&napi_hash_lock);
 }
-EXPORT_SYMBOL_GPL(napi_hash_add);
 
 /* Warning : caller is responsible to make sure rcu grace period
  * is respected before freeing memory containing @napi
-- 
cgit v1.2.3


From d8d26354191399627bac9cf0da0667b0f5178686 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Tue, 8 Nov 2016 22:49:16 +0100
Subject: ptp: Introduce a high resolution frequency adjustment method.

The internal PTP Hardware Clock (PHC) interface limits the resolution for
frequency adjustments to one part per billion.  However, some hardware
devices allow finer adjustment, and making use of the increased resolution
improves synchronization measurably on such devices.

This patch adds an alternative method that allows finer frequency tuning
by passing the scaled ppm value to PHC drivers.  This value comes from
user space, and it has a resolution of about 0.015 ppb.  We also deprecate
the older method, anticipating its removal once existing drivers have been
converted over.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Suggested-by: Ulrik De Bie <ulrik.debie-os@e2big.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c          | 5 ++++-
 include/linux/ptp_clock_kernel.h | 8 ++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 86280b7e41f3..9c13381b6966 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -153,7 +153,10 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx)
 		s32 ppb = scaled_ppm_to_ppb(tx->freq);
 		if (ppb > ops->max_adj || ppb < -ops->max_adj)
 			return -ERANGE;
-		err = ops->adjfreq(ops, ppb);
+		if (ops->adjfine)
+			err = ops->adjfine(ops, tx->freq);
+		else
+			err = ops->adjfreq(ops, ppb);
 		ptp->dialed_frequency = tx->freq;
 	} else if (tx->modes == 0) {
 		tx->freq = ptp->dialed_frequency;
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 5ad54fc66cf0..b76d47aba564 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -58,7 +58,14 @@ struct system_device_crosststamp;
  *
  * clock operations
  *
+ * @adjfine:  Adjusts the frequency of the hardware clock.
+ *            parameter scaled_ppm: Desired frequency offset from
+ *            nominal frequency in parts per million, but with a
+ *            16 bit binary fractional field.
+ *
  * @adjfreq:  Adjusts the frequency of the hardware clock.
+ *            This method is deprecated.  New drivers should implement
+ *            the @adjfine method instead.
  *            parameter delta: Desired frequency offset from nominal frequency
  *            in parts per billion
  *
@@ -108,6 +115,7 @@ struct ptp_clock_info {
 	int n_pins;
 	int pps;
 	struct ptp_pin_desc *pin_config;
+	int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
 	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
 	int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
-- 
cgit v1.2.3


From 7bdc9650f03604b06ba7434fab694e8ae8ca782d Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 19 Oct 2016 19:40:02 -0700
Subject: remoteproc: Introduce subdevices

A subdevice is an abstract entity that can be used to tie actions to the
booting and shutting down of a remote processor. The subdevice object is
expected to be embedded in concrete implementations, allowing for a
variety of use cases to be implemented.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 72 ++++++++++++++++++++++++++++++++++++
 include/linux/remoteproc.h           | 22 +++++++++++
 2 files changed, 94 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index b1860949d106..b5e314fe1f4c 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -736,6 +736,34 @@ static int rproc_handle_resources(struct rproc *rproc, int len,
 	return ret;
 }
 
+static int rproc_probe_subdevices(struct rproc *rproc)
+{
+	struct rproc_subdev *subdev;
+	int ret;
+
+	list_for_each_entry(subdev, &rproc->subdevs, node) {
+		ret = subdev->probe(subdev);
+		if (ret)
+			goto unroll_registration;
+	}
+
+	return 0;
+
+unroll_registration:
+	list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
+		subdev->remove(subdev);
+
+	return ret;
+}
+
+static void rproc_remove_subdevices(struct rproc *rproc)
+{
+	struct rproc_subdev *subdev;
+
+	list_for_each_entry(subdev, &rproc->subdevs, node)
+		subdev->remove(subdev);
+}
+
 /**
  * rproc_resource_cleanup() - clean up and free all acquired resources
  * @rproc: rproc handle
@@ -878,12 +906,22 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 		goto clean_up_resources;
 	}
 
+	/* probe any subdevices for the remote processor */
+	ret = rproc_probe_subdevices(rproc);
+	if (ret) {
+		dev_err(dev, "failed to probe subdevices for %s: %d\n",
+			rproc->name, ret);
+		goto stop_rproc;
+	}
+
 	rproc->state = RPROC_RUNNING;
 
 	dev_info(dev, "remote processor %s is now up\n", rproc->name);
 
 	return 0;
 
+stop_rproc:
+	rproc->ops->stop(rproc);
 clean_up_resources:
 	rproc_resource_cleanup(rproc);
 clean_up:
@@ -1121,6 +1159,9 @@ void rproc_shutdown(struct rproc *rproc)
 	if (!atomic_dec_and_test(&rproc->power))
 		goto out;
 
+	/* remove any subdevices for the remote processor */
+	rproc_remove_subdevices(rproc);
+
 	/* power off the remote processor */
 	ret = rproc->ops->stop(rproc);
 	if (ret) {
@@ -1372,6 +1413,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
 	INIT_LIST_HEAD(&rproc->mappings);
 	INIT_LIST_HEAD(&rproc->traces);
 	INIT_LIST_HEAD(&rproc->rvdevs);
+	INIT_LIST_HEAD(&rproc->subdevs);
 
 	INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work);
 	init_completion(&rproc->crash_comp);
@@ -1458,6 +1500,36 @@ int rproc_del(struct rproc *rproc)
 }
 EXPORT_SYMBOL(rproc_del);
 
+/**
+ * rproc_add_subdev() - add a subdevice to a remoteproc
+ * @rproc: rproc handle to add the subdevice to
+ * @subdev: subdev handle to register
+ * @probe: function to call when the rproc boots
+ * @remove: function to call when the rproc shuts down
+ */
+void rproc_add_subdev(struct rproc *rproc,
+		      struct rproc_subdev *subdev,
+		      int (*probe)(struct rproc_subdev *subdev),
+		      void (*remove)(struct rproc_subdev *subdev))
+{
+	subdev->probe = probe;
+	subdev->remove = remove;
+
+	list_add_tail(&subdev->node, &rproc->subdevs);
+}
+EXPORT_SYMBOL(rproc_add_subdev);
+
+/**
+ * rproc_remove_subdev() - remove a subdevice from a remoteproc
+ * @rproc: rproc handle to remove the subdevice from
+ * @subdev: subdev handle, previously registered with rproc_add_subdev()
+ */
+void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev)
+{
+	list_del(&subdev->node);
+}
+EXPORT_SYMBOL(rproc_remove_subdev);
+
 /**
  * rproc_report_crash() - rproc crash reporter function
  * @rproc: remote processor
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 940e4cf2ac48..f6d5e66854e4 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -400,6 +400,7 @@ enum rproc_crash_type {
  * @firmware_loading_complete: marks e/o asynchronous firmware loading
  * @bootaddr: address of first instruction to boot rproc with (optional)
  * @rvdevs: list of remote virtio devices
+ * @subdevs: list of subdevices, to following the running state
  * @notifyids: idr for dynamically assigning rproc-wide unique notify ids
  * @index: index of this rproc device
  * @crash_handler: workqueue for handling a crash
@@ -431,6 +432,7 @@ struct rproc {
 	struct completion firmware_loading_complete;
 	u32 bootaddr;
 	struct list_head rvdevs;
+	struct list_head subdevs;
 	struct idr notifyids;
 	int index;
 	struct work_struct crash_handler;
@@ -444,6 +446,19 @@ struct rproc {
 	bool auto_boot;
 };
 
+/**
+ * struct rproc_subdev - subdevice tied to a remoteproc
+ * @node: list node related to the rproc subdevs list
+ * @probe: probe function, called as the rproc is started
+ * @remove: remove function, called as the rproc is stopped
+ */
+struct rproc_subdev {
+	struct list_head node;
+
+	int (*probe)(struct rproc_subdev *subdev);
+	void (*remove)(struct rproc_subdev *subdev);
+};
+
 /* we currently support only two vrings per rvdev */
 
 #define RVDEV_NUM_VRINGS 2
@@ -511,4 +526,11 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 	return rvdev->rproc;
 }
 
+void rproc_add_subdev(struct rproc *rproc,
+		      struct rproc_subdev *subdev,
+		      int (*probe)(struct rproc_subdev *subdev),
+		      void (*remove)(struct rproc_subdev *subdev));
+
+void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
+
 #endif /* REMOTEPROC_H */
-- 
cgit v1.2.3


From c97db7cc7778e34a53b42d58c766f0ec0e30d580 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 21 Sep 2016 14:57:19 +0800
Subject: base: soc: Introduce soc_device_match() interface

We keep running into cases where device drivers want to know the exact
version of the a SoC they are currently running on. In the past, this has
usually been done through a vendor specific API that can be called by a
driver, or by directly accessing some kind of version register that is
not part of the device itself but that belongs to a global register area
of the chip.

Common reasons for doing this include:

- A machine is not using devicetree or similar for passing data about
  on-chip devices, but just announces their presence using boot-time
  platform devices, and the machine code itself does not care about the
  revision.

- There is existing firmware or boot loaders with existing DT binaries
  with generic compatible strings that do not identify the particular
  revision of each device, but the driver knows which SoC revisions
  include which part.

- A prerelease version of a chip has some quirks and we are using the same
  version of the bootloader and the DT blob on both the prerelease and the
  final version. An update of the DT binding seems inappropriate because
  that would involve maintaining multiple copies of the dts and/or
  bootloader.

This patch introduces the soc_device_match() interface that is meant to
work like of_match_node() but instead of identifying the version of a
device, it identifies the SoC itself using a vendor-agnostic interface.

Unlike of_match_node(), we do not do an exact string compare but instead
use glob_match() to allow wildcards in strings.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/Kconfig    |  1 +
 drivers/base/soc.c      | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sys_soc.h |  3 +++
 3 files changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index fdf44cac08e6..991b21e1f89b 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -235,6 +235,7 @@ config GENERIC_CPU_AUTOPROBE
 
 config SOC_BUS
 	bool
+	select GLOB
 
 source "drivers/base/regmap/Kconfig"
 
diff --git a/drivers/base/soc.c b/drivers/base/soc.c
index 028cef377fd4..04ee597fc3a3 100644
--- a/drivers/base/soc.c
+++ b/drivers/base/soc.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/sys_soc.h>
 #include <linux/err.h>
+#include <linux/glob.h>
 
 static DEFINE_IDA(soc_ida);
 
@@ -168,3 +169,68 @@ static int __init soc_bus_register(void)
 	return bus_register(&soc_bus_type);
 }
 core_initcall(soc_bus_register);
+
+static int soc_device_match_one(struct device *dev, void *arg)
+{
+	struct soc_device *soc_dev = container_of(dev, struct soc_device, dev);
+	const struct soc_device_attribute *match = arg;
+
+	if (match->machine &&
+	    !glob_match(match->machine, soc_dev->attr->machine))
+		return 0;
+
+	if (match->family &&
+	    !glob_match(match->family, soc_dev->attr->family))
+		return 0;
+
+	if (match->revision &&
+	    !glob_match(match->revision, soc_dev->attr->revision))
+		return 0;
+
+	if (match->soc_id &&
+	    !glob_match(match->soc_id, soc_dev->attr->soc_id))
+		return 0;
+
+	return 1;
+}
+
+/*
+ * soc_device_match - identify the SoC in the machine
+ * @matches: zero-terminated array of possible matches
+ *
+ * returns the first matching entry of the argument array, or NULL
+ * if none of them match.
+ *
+ * This function is meant as a helper in place of of_match_node()
+ * in cases where either no device tree is available or the information
+ * in a device node is insufficient to identify a particular variant
+ * by its compatible strings or other properties. For new devices,
+ * the DT binding should always provide unique compatible strings
+ * that allow the use of of_match_node() instead.
+ *
+ * The calling function can use the .data entry of the
+ * soc_device_attribute to pass a structure or function pointer for
+ * each entry.
+ */
+const struct soc_device_attribute *soc_device_match(
+	const struct soc_device_attribute *matches)
+{
+	int ret = 0;
+
+	if (!matches)
+		return NULL;
+
+	while (!ret) {
+		if (!(matches->machine || matches->family ||
+		      matches->revision || matches->soc_id))
+			break;
+		ret = bus_for_each_dev(&soc_bus_type, NULL, (void *)matches,
+				       soc_device_match_one);
+		if (!ret)
+			matches++;
+		else
+			return matches;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(soc_device_match);
diff --git a/include/linux/sys_soc.h b/include/linux/sys_soc.h
index 2739ccb69571..9f5eb06f9fd8 100644
--- a/include/linux/sys_soc.h
+++ b/include/linux/sys_soc.h
@@ -13,6 +13,7 @@ struct soc_device_attribute {
 	const char *family;
 	const char *revision;
 	const char *soc_id;
+	const void *data;
 };
 
 /**
@@ -34,4 +35,6 @@ void soc_device_unregister(struct soc_device *soc_dev);
  */
 struct device *soc_device_to_device(struct soc_device *soc);
 
+const struct soc_device_attribute *soc_device_match(
+	const struct soc_device_attribute *matches);
 #endif /* __SOC_BUS_H */
-- 
cgit v1.2.3


From da65a1589dacc7ec44ea0557a14d70a39d991f32 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 26 Oct 2016 15:13:15 +0200
Subject: base: soc: Provide a dummy implementation of soc_device_match()

Provide a dummy implementation of soc_device_match(), to allow compiling
drivers that may be used on SoCs both with and without CONFIG_SOC_BUS,
and for compile testing.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/linux/sys_soc.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sys_soc.h b/include/linux/sys_soc.h
index 9f5eb06f9fd8..bed223b70217 100644
--- a/include/linux/sys_soc.h
+++ b/include/linux/sys_soc.h
@@ -35,6 +35,12 @@ void soc_device_unregister(struct soc_device *soc_dev);
  */
 struct device *soc_device_to_device(struct soc_device *soc);
 
+#ifdef CONFIG_SOC_BUS
 const struct soc_device_attribute *soc_device_match(
 	const struct soc_device_attribute *matches);
+#else
+static inline const struct soc_device_attribute *soc_device_match(
+	const struct soc_device_attribute *matches) { return NULL; }
+#endif
+
 #endif /* __SOC_BUS_H */
-- 
cgit v1.2.3


From 2da16a6948ca8f025e2c226ea4fc32baa6b90f27 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 10 Nov 2016 11:17:25 +0100
Subject: netfilter: ipset: Remove extra whitespaces in ip_set.h

Remove unnecessary whitespaces.

Ported from a patch proposed by Sergey Popovich <popovich_sergei@mail.ua>.

Suggested-by: Sergey Popovich <popovich_sergei@mail.ua>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 83b9a2e0d8d4..5b1fd090f34b 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -336,14 +336,15 @@ ip_set_update_counter(struct ip_set_counter *counter,
 
 static inline void
 ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
-		      const struct ip_set_ext *ext,
-		      struct ip_set_ext *mext, u32 flags)
+		   const struct ip_set_ext *ext,
+		   struct ip_set_ext *mext, u32 flags)
 {
-		mext->skbmark = skbinfo->skbmark;
-		mext->skbmarkmask = skbinfo->skbmarkmask;
-		mext->skbprio = skbinfo->skbprio;
-		mext->skbqueue = skbinfo->skbqueue;
+	mext->skbmark = skbinfo->skbmark;
+	mext->skbmarkmask = skbinfo->skbmarkmask;
+	mext->skbprio = skbinfo->skbprio;
+	mext->skbqueue = skbinfo->skbqueue;
 }
+
 static inline bool
 ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
 {
-- 
cgit v1.2.3


From da9fbfa76f32a031cb70b11e9fa650e30c85d040 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 10 Nov 2016 11:24:15 +0100
Subject: netfilter: ipset: Mark some helper args as const.

Mark some of the helpers arguments as const.

Ported from a patch proposed by Sergey Popovich <popovich_sergei@mail.ua>.

Suggested-by: Sergey Popovich <popovich_sergei@mail.ua>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h         | 4 ++--
 include/linux/netfilter/ipset/ip_set_comment.h | 2 +-
 include/linux/netfilter/ipset/ip_set_timeout.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 5b1fd090f34b..524467f933bf 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -346,7 +346,7 @@ ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
 }
 
 static inline bool
-ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
+ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
 {
 	/* Send nonzero parameters only */
 	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
@@ -373,7 +373,7 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
 }
 
 static inline bool
-ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
 {
 	return nla_put_net64(skb, IPSET_ATTR_BYTES,
 			     cpu_to_be64(ip_set_get_bytes(counter)),
diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index 8d0248525957..bae5c7609be2 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -43,7 +43,7 @@ ip_set_init_comment(struct ip_set_comment *comment,
 
 /* Used only when dumping a set, protected by rcu_read_lock_bh() */
 static inline int
-ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
+ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
 {
 	struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
 
diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 1d6a935c1ac5..bfb3531fd88a 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -40,7 +40,7 @@ ip_set_timeout_uget(struct nlattr *tb)
 }
 
 static inline bool
-ip_set_timeout_expired(unsigned long *t)
+ip_set_timeout_expired(const unsigned long *t)
 {
 	return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
 }
@@ -63,7 +63,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 value)
 }
 
 static inline u32
-ip_set_timeout_get(unsigned long *timeout)
+ip_set_timeout_get(const unsigned long *timeout)
 {
 	return *timeout == IPSET_ELEM_PERMANENT ? 0 :
 		jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
-- 
cgit v1.2.3


From 7ffea37957b900422ce8b82e9651f7a0a6fac733 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 10 Nov 2016 11:31:03 +0100
Subject: netfilter: ipset: Headers file cleanup

Group counter helper functions together.

Ported from a patch proposed by Sergey Popovich <popovich_sergei@mail.ua>.

Suggested-by: Sergey Popovich <popovich_sergei@mail.ua>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 42 +++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 524467f933bf..1ea28e30a6dd 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -334,6 +334,27 @@ ip_set_update_counter(struct ip_set_counter *counter,
 	}
 }
 
+static inline bool
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
+{
+	return nla_put_net64(skb, IPSET_ATTR_BYTES,
+			     cpu_to_be64(ip_set_get_bytes(counter)),
+			     IPSET_ATTR_PAD) ||
+	       nla_put_net64(skb, IPSET_ATTR_PACKETS,
+			     cpu_to_be64(ip_set_get_packets(counter)),
+			     IPSET_ATTR_PAD);
+}
+
+static inline void
+ip_set_init_counter(struct ip_set_counter *counter,
+		    const struct ip_set_ext *ext)
+{
+	if (ext->bytes != ULLONG_MAX)
+		atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
+	if (ext->packets != ULLONG_MAX)
+		atomic64_set(&(counter)->packets, (long long)(ext->packets));
+}
+
 static inline void
 ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
 		   const struct ip_set_ext *ext,
@@ -372,27 +393,6 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
 	skbinfo->skbqueue = ext->skbqueue;
 }
 
-static inline bool
-ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
-{
-	return nla_put_net64(skb, IPSET_ATTR_BYTES,
-			     cpu_to_be64(ip_set_get_bytes(counter)),
-			     IPSET_ATTR_PAD) ||
-	       nla_put_net64(skb, IPSET_ATTR_PACKETS,
-			     cpu_to_be64(ip_set_get_packets(counter)),
-			     IPSET_ATTR_PAD);
-}
-
-static inline void
-ip_set_init_counter(struct ip_set_counter *counter,
-		    const struct ip_set_ext *ext)
-{
-	if (ext->bytes != ULLONG_MAX)
-		atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
-	if (ext->packets != ULLONG_MAX)
-		atomic64_set(&(counter)->packets, (long long)(ext->packets));
-}
-
 /* Netlink CB args */
 enum {
 	IPSET_CB_NET = 0,	/* net namespace */
-- 
cgit v1.2.3


From bec810d973003b30bc477146904af6bd93fd2df8 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 5 May 2015 17:13:28 +0200
Subject: netfilter: ipset: Improve skbinfo get/init helpers

Use struct ip_set_skbinfo in struct ip_set_ext instead of open
coded fields and assign structure members in get/init helpers
instead of copying members one by one. Explicitly note that
struct ip_set_skbinfo must be padded to prevent non-aligned
access in the extension blob.

Ported from a patch proposed by Sergey Popovich <popovich_sergei@mail.ua>.

Suggested-by: Sergey Popovich <popovich_sergei@mail.ua>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 30 +++++++++++-------------------
 net/netfilter/ipset/ip_set_core.c      | 12 ++++++------
 net/netfilter/xt_set.c                 | 12 +++++++-----
 3 files changed, 24 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 1ea28e30a6dd..780262124632 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -92,17 +92,6 @@ struct ip_set_ext_type {
 
 extern const struct ip_set_ext_type ip_set_extensions[];
 
-struct ip_set_ext {
-	u64 packets;
-	u64 bytes;
-	u32 timeout;
-	u32 skbmark;
-	u32 skbmarkmask;
-	u32 skbprio;
-	u16 skbqueue;
-	char *comment;
-};
-
 struct ip_set_counter {
 	atomic64_t bytes;
 	atomic64_t packets;
@@ -122,6 +111,15 @@ struct ip_set_skbinfo {
 	u32 skbmarkmask;
 	u32 skbprio;
 	u16 skbqueue;
+	u16 __pad;
+};
+
+struct ip_set_ext {
+	struct ip_set_skbinfo skbinfo;
+	u64 packets;
+	u64 bytes;
+	char *comment;
+	u32 timeout;
 };
 
 struct ip_set;
@@ -360,10 +358,7 @@ ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
 		   const struct ip_set_ext *ext,
 		   struct ip_set_ext *mext, u32 flags)
 {
-	mext->skbmark = skbinfo->skbmark;
-	mext->skbmarkmask = skbinfo->skbmarkmask;
-	mext->skbprio = skbinfo->skbprio;
-	mext->skbqueue = skbinfo->skbqueue;
+	mext->skbinfo = *skbinfo;
 }
 
 static inline bool
@@ -387,10 +382,7 @@ static inline void
 ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
 		    const struct ip_set_ext *ext)
 {
-	skbinfo->skbmark = ext->skbmark;
-	skbinfo->skbmarkmask = ext->skbmarkmask;
-	skbinfo->skbprio = ext->skbprio;
-	skbinfo->skbqueue = ext->skbqueue;
+	*skbinfo = ext->skbinfo;
 }
 
 /* Netlink CB args */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 3f1b945a24d5..bfacccff7196 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -426,20 +426,20 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 		if (!SET_WITH_SKBINFO(set))
 			return -IPSET_ERR_SKBINFO;
 		fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
-		ext->skbmark = fullmark >> 32;
-		ext->skbmarkmask = fullmark & 0xffffffff;
+		ext->skbinfo.skbmark = fullmark >> 32;
+		ext->skbinfo.skbmarkmask = fullmark & 0xffffffff;
 	}
 	if (tb[IPSET_ATTR_SKBPRIO]) {
 		if (!SET_WITH_SKBINFO(set))
 			return -IPSET_ERR_SKBINFO;
-		ext->skbprio = be32_to_cpu(nla_get_be32(
-					    tb[IPSET_ATTR_SKBPRIO]));
+		ext->skbinfo.skbprio =
+			be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO]));
 	}
 	if (tb[IPSET_ATTR_SKBQUEUE]) {
 		if (!SET_WITH_SKBINFO(set))
 			return -IPSET_ERR_SKBINFO;
-		ext->skbqueue = be16_to_cpu(nla_get_be16(
-					    tb[IPSET_ATTR_SKBQUEUE]));
+		ext->skbinfo.skbqueue =
+			be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE]));
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 1bfede7be418..64285702afd5 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -423,6 +423,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 
 /* Revision 3 target */
 
+#define MOPT(opt, member)	((opt).ext.skbinfo.member)
+
 static unsigned int
 set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -453,14 +455,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 		if (!ret)
 			return XT_CONTINUE;
 		if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
-			skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask))
-				    ^ (map_opt.ext.skbmark);
+			skb->mark = (skb->mark & ~MOPT(map_opt,skbmarkmask))
+				    ^ MOPT(map_opt, skbmark);
 		if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
-			skb->priority = map_opt.ext.skbprio;
+			skb->priority = MOPT(map_opt, skbprio);
 		if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
 		    skb->dev &&
-		    skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
-			skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+		    skb->dev->real_num_tx_queues > MOPT(map_opt, skbqueue))
+			skb_set_queue_mapping(skb, MOPT(map_opt, skbqueue));
 	}
 	return XT_CONTINUE;
 }
-- 
cgit v1.2.3


From 1d0d6bd61d495d271b9774a15fbea93e4875474b Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Wed, 6 May 2015 07:27:28 +0200
Subject: netfilter: ipset: Use kmalloc() in comment extension helper

Allocate memory with kmalloc() rather than kzalloc(): the string
is immediately initialized so it is unnecessary to zero out
the allocated memory area.

Ported from a patch proposed by Sergey Popovich <popovich_sergei@mail.ua>.

Suggested-by: Sergey Popovich <popovich_sergei@mail.ua>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set_comment.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index bae5c7609be2..5444b1bbe656 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -34,7 +34,7 @@ ip_set_init_comment(struct ip_set_comment *comment,
 		return;
 	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
 		len = IPSET_MAX_COMMENT_SIZE;
-	c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+	c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
 	if (unlikely(!c))
 		return;
 	strlcpy(c->str, ext->comment, len + 1);
-- 
cgit v1.2.3


From 57982edc2739b4473868e7579c0185270468bae1 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 10 Oct 2016 21:34:56 +0200
Subject: netfilter: ipset: Split extensions into separate files

Cleanup to separate all extensions into individual files.

Ported from a patch proposed by Sergey Popovich <popovich_sergei@mail.ua>.

Suggested-by: Sergey Popovich <popovich_sergei@mail.ua>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h         | 95 +-------------------------
 include/linux/netfilter/ipset/ip_set_counter.h | 75 ++++++++++++++++++++
 include/linux/netfilter/ipset/ip_set_skbinfo.h | 46 +++++++++++++
 3 files changed, 123 insertions(+), 93 deletions(-)
 create mode 100644 include/linux/netfilter/ipset/ip_set_counter.h
 create mode 100644 include/linux/netfilter/ipset/ip_set_skbinfo.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 780262124632..b5bd0fb3d07b 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -292,99 +292,6 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
 	return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
 }
 
-static inline void
-ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
-{
-	atomic64_add((long long)bytes, &(counter)->bytes);
-}
-
-static inline void
-ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
-{
-	atomic64_add((long long)packets, &(counter)->packets);
-}
-
-static inline u64
-ip_set_get_bytes(const struct ip_set_counter *counter)
-{
-	return (u64)atomic64_read(&(counter)->bytes);
-}
-
-static inline u64
-ip_set_get_packets(const struct ip_set_counter *counter)
-{
-	return (u64)atomic64_read(&(counter)->packets);
-}
-
-static inline void
-ip_set_update_counter(struct ip_set_counter *counter,
-		      const struct ip_set_ext *ext,
-		      struct ip_set_ext *mext, u32 flags)
-{
-	if (ext->packets != ULLONG_MAX &&
-	    !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
-		ip_set_add_bytes(ext->bytes, counter);
-		ip_set_add_packets(ext->packets, counter);
-	}
-	if (flags & IPSET_FLAG_MATCH_COUNTERS) {
-		mext->packets = ip_set_get_packets(counter);
-		mext->bytes = ip_set_get_bytes(counter);
-	}
-}
-
-static inline bool
-ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
-{
-	return nla_put_net64(skb, IPSET_ATTR_BYTES,
-			     cpu_to_be64(ip_set_get_bytes(counter)),
-			     IPSET_ATTR_PAD) ||
-	       nla_put_net64(skb, IPSET_ATTR_PACKETS,
-			     cpu_to_be64(ip_set_get_packets(counter)),
-			     IPSET_ATTR_PAD);
-}
-
-static inline void
-ip_set_init_counter(struct ip_set_counter *counter,
-		    const struct ip_set_ext *ext)
-{
-	if (ext->bytes != ULLONG_MAX)
-		atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
-	if (ext->packets != ULLONG_MAX)
-		atomic64_set(&(counter)->packets, (long long)(ext->packets));
-}
-
-static inline void
-ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
-		   const struct ip_set_ext *ext,
-		   struct ip_set_ext *mext, u32 flags)
-{
-	mext->skbinfo = *skbinfo;
-}
-
-static inline bool
-ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
-{
-	/* Send nonzero parameters only */
-	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
-		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
-			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
-					  skbinfo->skbmarkmask),
-			      IPSET_ATTR_PAD)) ||
-	       (skbinfo->skbprio &&
-		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
-			      cpu_to_be32(skbinfo->skbprio))) ||
-	       (skbinfo->skbqueue &&
-		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
-			     cpu_to_be16(skbinfo->skbqueue)));
-}
-
-static inline void
-ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
-		    const struct ip_set_ext *ext)
-{
-	*skbinfo = ext->skbinfo;
-}
-
 /* Netlink CB args */
 enum {
 	IPSET_CB_NET = 0,	/* net namespace */
@@ -539,6 +446,8 @@ bitmap_bytes(u32 a, u32 b)
 
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_comment.h>
+#include <linux/netfilter/ipset/ip_set_counter.h>
+#include <linux/netfilter/ipset/ip_set_skbinfo.h>
 
 int
 ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
diff --git a/include/linux/netfilter/ipset/ip_set_counter.h b/include/linux/netfilter/ipset/ip_set_counter.h
new file mode 100644
index 000000000000..bb6fba480118
--- /dev/null
+++ b/include/linux/netfilter/ipset/ip_set_counter.h
@@ -0,0 +1,75 @@
+#ifndef _IP_SET_COUNTER_H
+#define _IP_SET_COUNTER_H
+
+/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef __KERNEL__
+
+static inline void
+ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
+{
+	atomic64_add((long long)bytes, &(counter)->bytes);
+}
+
+static inline void
+ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
+{
+	atomic64_add((long long)packets, &(counter)->packets);
+}
+
+static inline u64
+ip_set_get_bytes(const struct ip_set_counter *counter)
+{
+	return (u64)atomic64_read(&(counter)->bytes);
+}
+
+static inline u64
+ip_set_get_packets(const struct ip_set_counter *counter)
+{
+	return (u64)atomic64_read(&(counter)->packets);
+}
+
+static inline void
+ip_set_update_counter(struct ip_set_counter *counter,
+		      const struct ip_set_ext *ext,
+		      struct ip_set_ext *mext, u32 flags)
+{
+	if (ext->packets != ULLONG_MAX &&
+	    !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
+		ip_set_add_bytes(ext->bytes, counter);
+		ip_set_add_packets(ext->packets, counter);
+	}
+	if (flags & IPSET_FLAG_MATCH_COUNTERS) {
+		mext->packets = ip_set_get_packets(counter);
+		mext->bytes = ip_set_get_bytes(counter);
+	}
+}
+
+static inline bool
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
+{
+	return nla_put_net64(skb, IPSET_ATTR_BYTES,
+			     cpu_to_be64(ip_set_get_bytes(counter)),
+			     IPSET_ATTR_PAD) ||
+	       nla_put_net64(skb, IPSET_ATTR_PACKETS,
+			     cpu_to_be64(ip_set_get_packets(counter)),
+			     IPSET_ATTR_PAD);
+}
+
+static inline void
+ip_set_init_counter(struct ip_set_counter *counter,
+		    const struct ip_set_ext *ext)
+{
+	if (ext->bytes != ULLONG_MAX)
+		atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
+	if (ext->packets != ULLONG_MAX)
+		atomic64_set(&(counter)->packets, (long long)(ext->packets));
+}
+
+#endif /* __KERNEL__ */
+#endif /* _IP_SET_COUNTER_H */
diff --git a/include/linux/netfilter/ipset/ip_set_skbinfo.h b/include/linux/netfilter/ipset/ip_set_skbinfo.h
new file mode 100644
index 000000000000..29d7ef2bc3fa
--- /dev/null
+++ b/include/linux/netfilter/ipset/ip_set_skbinfo.h
@@ -0,0 +1,46 @@
+#ifndef _IP_SET_SKBINFO_H
+#define _IP_SET_SKBINFO_H
+
+/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef __KERNEL__
+
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+		   const struct ip_set_ext *ext,
+		   struct ip_set_ext *mext, u32 flags)
+{
+	mext->skbinfo = *skbinfo;
+}
+
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
+{
+	/* Send nonzero parameters only */
+	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					  skbinfo->skbmarkmask),
+			      IPSET_ATTR_PAD)) ||
+	       (skbinfo->skbprio &&
+		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			      cpu_to_be32(skbinfo->skbprio))) ||
+	       (skbinfo->skbqueue &&
+		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			      cpu_to_be16(skbinfo->skbqueue)));
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+		    const struct ip_set_ext *ext)
+{
+	*skbinfo = ext->skbinfo;
+}
+
+#endif /* __KERNEL__ */
+#endif /* _IP_SET_SKBINFO_H */
-- 
cgit v1.2.3


From 837a90eab67edfa464dcc0ddef193449d23da408 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 10 Oct 2016 21:52:51 +0200
Subject: netfilter: ipset: Regroup ip_set_put_extensions and add extern

Cleanup: group ip_set_put_extensions and ip_set_get_extensions
together and add missing extern.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index b5bd0fb3d07b..7a218eb74887 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -331,6 +331,8 @@ extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
 			      size_t len, size_t align);
 extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 				 struct ip_set_ext *ext);
+extern int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
+				 const void *e, bool active);
 
 static inline int
 ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr)
@@ -449,10 +451,6 @@ bitmap_bytes(u32 a, u32 b)
 #include <linux/netfilter/ipset/ip_set_counter.h>
 #include <linux/netfilter/ipset/ip_set_skbinfo.h>
 
-int
-ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
-		      const void *e, bool active);
-
 #define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1,		\
 	  .timeout = ip_set_adt_opt_timeout(opt, set) }
-- 
cgit v1.2.3


From 702b71e7c666a1c9be9d49e8cd173f0d4d1e859f Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 10 Oct 2016 22:07:41 +0200
Subject: netfilter: ipset: Add element count to all set types header

It is better to list the set elements for all set types, thus the
header information is uniform. Element counts are therefore added
to the bitmap and list types.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h        |  2 ++
 include/linux/netfilter/ipset/ip_set_bitmap.h |  2 +-
 net/netfilter/ipset/ip_set_bitmap_gen.h       | 10 +++++++++-
 net/netfilter/ipset/ip_set_hash_gen.h         | 21 ++++++++++-----------
 net/netfilter/ipset/ip_set_list_set.c         |  6 +++++-
 5 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 7a218eb74887..4671d740610f 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -250,6 +250,8 @@ struct ip_set {
 	u8 flags;
 	/* Default timeout value, if enabled */
 	u32 timeout;
+	/* Number of elements (vs timeout) */
+	u32 elements;
 	/* Element data size */
 	size_t dsize;
 	/* Offsets to extensions in elements */
diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h
index 5e4662a71e01..366d6c0ea04f 100644
--- a/include/linux/netfilter/ipset/ip_set_bitmap.h
+++ b/include/linux/netfilter/ipset/ip_set_bitmap.h
@@ -6,8 +6,8 @@
 #define IPSET_BITMAP_MAX_RANGE	0x0000FFFF
 
 enum {
+	IPSET_ADD_STORE_PLAIN_TIMEOUT = -1,
 	IPSET_ADD_FAILED = 1,
-	IPSET_ADD_STORE_PLAIN_TIMEOUT,
 	IPSET_ADD_START_STORED_TIMEOUT,
 };
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 4f07b90f8ef4..1810d1c06e3d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -83,6 +83,7 @@ mtype_flush(struct ip_set *set)
 	if (set->extensions & IPSET_EXT_DESTROY)
 		mtype_ext_cleanup(set);
 	memset(map->members, 0, map->memsize);
+	set->elements = 0;
 }
 
 /* Calculate the actual memory size of the set data */
@@ -105,7 +106,8 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 		goto nla_put_failure;
 	if (mtype_do_head(skb, map) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
@@ -149,6 +151,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	if (ret == IPSET_ADD_FAILED) {
 		if (SET_WITH_TIMEOUT(set) &&
 		    ip_set_timeout_expired(ext_timeout(x, set))) {
+			set->elements--;
 			ret = 0;
 		} else if (!(flags & IPSET_FLAG_EXIST)) {
 			set_bit(e->id, map->members);
@@ -157,6 +160,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		/* Element is re-added, cleanup extensions */
 		ip_set_ext_destroy(set, x);
 	}
+	if (ret > 0)
+		set->elements--;
 
 	if (SET_WITH_TIMEOUT(set))
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
@@ -174,6 +179,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	/* Activate element */
 	set_bit(e->id, map->members);
+	set->elements++;
 
 	return 0;
 }
@@ -190,6 +196,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		return -IPSET_ERR_EXIST;
 
 	ip_set_ext_destroy(set, x);
+	set->elements--;
 	if (SET_WITH_TIMEOUT(set) &&
 	    ip_set_timeout_expired(ext_timeout(x, set)))
 		return -IPSET_ERR_EXIST;
@@ -285,6 +292,7 @@ mtype_gc(unsigned long ul_set)
 			if (ip_set_timeout_expired(ext_timeout(x, set))) {
 				clear_bit(id, map->members);
 				ip_set_ext_destroy(set, x);
+				set->elements--;
 			}
 		}
 	spin_unlock_bh(&set->lock);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index f5acfb9709c9..6e967f198d1e 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -275,7 +275,6 @@ htable_bits(u32 hashsize)
 struct htype {
 	struct htable __rcu *table; /* the hash table */
 	u32 maxelem;		/* max elements in the hash */
-	u32 elements;		/* current element (vs timeout) */
 	u32 initval;		/* random jhash init value */
 #ifdef IP_SET_HASH_WITH_MARKMASK
 	u32 markmask;		/* markmask value for mark mask to store */
@@ -400,7 +399,7 @@ mtype_flush(struct ip_set *set)
 #ifdef IP_SET_HASH_WITH_NETS
 	memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
 #endif
-	h->elements = 0;
+	set->elements = 0;
 }
 
 /* Destroy the hashtable part of the set */
@@ -506,7 +505,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 						nets_length, k);
 #endif
 				ip_set_ext_destroy(set, data);
-				h->elements--;
+				set->elements--;
 				d++;
 			}
 		}
@@ -715,11 +714,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	bool deleted = false, forceadd = false, reuse = false;
 	u32 key, multi = 0;
 
-	if (h->elements >= h->maxelem) {
+	if (set->elements >= h->maxelem) {
 		if (SET_WITH_TIMEOUT(set))
 			/* FIXME: when set is full, we slow down here */
 			mtype_expire(set, h, NLEN(set->family), set->dsize);
-		if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set))
+		if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
 			forceadd = true;
 	}
 
@@ -732,7 +731,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 				pr_warn("Set %s is full, maxelem %u reached\n",
 					set->name, h->maxelem);
 			return -IPSET_ERR_HASH_FULL;
-		} else if (h->elements >= h->maxelem) {
+		} else if (set->elements >= h->maxelem) {
 			goto set_full;
 		}
 		old = NULL;
@@ -781,11 +780,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 					NLEN(set->family), i);
 #endif
 			ip_set_ext_destroy(set, data);
-			h->elements--;
+			set->elements--;
 		}
 		goto copy_data;
 	}
-	if (h->elements >= h->maxelem)
+	if (set->elements >= h->maxelem)
 		goto set_full;
 	/* Create a new slot */
 	if (n->pos >= n->size) {
@@ -810,7 +809,7 @@ copy_elem:
 	j = n->pos++;
 	data = ahash_data(n, j, set->dsize);
 copy_data:
-	h->elements++;
+	set->elements++;
 #ifdef IP_SET_HASH_WITH_NETS
 	for (i = 0; i < IPSET_NET_COUNT; i++)
 		mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)),
@@ -883,7 +882,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		smp_mb__after_atomic();
 		if (i + 1 == n->pos)
 			n->pos--;
-		h->elements--;
+		set->elements--;
 #ifdef IP_SET_HASH_WITH_NETS
 		for (j = 0; j < IPSET_NET_COUNT; j++)
 			mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
@@ -1084,7 +1083,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 #endif
 	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
-	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(h->elements)))
+	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 462b0b1870e2..c45516695934 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -166,6 +166,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
 static inline void
 list_set_del(struct ip_set *set, struct set_elem *e)
 {
+	set->elements--;
 	list_del_rcu(&e->list);
 	call_rcu(&e->rcu, __list_set_del_rcu);
 }
@@ -309,6 +310,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		list_add_rcu(&e->list, &prev->list);
 	else
 		list_add_tail_rcu(&e->list, &map->members);
+	set->elements++;
 
 	return 0;
 }
@@ -419,6 +421,7 @@ list_set_flush(struct ip_set *set)
 
 	list_for_each_entry_safe(e, n, &map->members, list)
 		list_set_del(set, e);
+	set->elements = 0;
 }
 
 static void
@@ -471,7 +474,8 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
-- 
cgit v1.2.3


From 9e41f26a505cca04b7122e65053cf6447007ea79 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 10 Nov 2016 12:05:34 +0100
Subject: netfilter: ipset: Count non-static extension memory for userspace

Non-static (i.e. comment) extension was not counted into the memory
size. A new internal counter is introduced for this. In the case of
the hash types the sizes of the arrays are counted there as well so
that we can avoid to scan the whole set when just the header data
is requested.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h         |  8 ++++++--
 include/linux/netfilter/ipset/ip_set_comment.h |  7 +++++--
 net/netfilter/ipset/ip_set_bitmap_gen.h        |  5 +++--
 net/netfilter/ipset/ip_set_core.c              |  2 +-
 net/netfilter/ipset/ip_set_hash_gen.h          | 26 ++++++++++++++------------
 net/netfilter/ipset/ip_set_list_set.c          |  5 +++--
 6 files changed, 32 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4671d740610f..8e42253e5d4d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -79,10 +79,12 @@ enum ip_set_ext_id {
 	IPSET_EXT_ID_MAX,
 };
 
+struct ip_set;
+
 /* Extension type */
 struct ip_set_ext_type {
 	/* Destroy extension private data (can be NULL) */
-	void (*destroy)(void *ext);
+	void (*destroy)(struct ip_set *set, void *ext);
 	enum ip_set_extension type;
 	enum ipset_cadt_flags flag;
 	/* Size and minimal alignment */
@@ -252,6 +254,8 @@ struct ip_set {
 	u32 timeout;
 	/* Number of elements (vs timeout) */
 	u32 elements;
+	/* Size of the dynamic extensions (vs timeout) */
+	size_t ext_size;
 	/* Element data size */
 	size_t dsize;
 	/* Offsets to extensions in elements */
@@ -268,7 +272,7 @@ ip_set_ext_destroy(struct ip_set *set, void *data)
 	 */
 	if (SET_WITH_COMMENT(set))
 		ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(
-			ext_comment(data, set));
+			set, ext_comment(data, set));
 }
 
 static inline int
diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index 5444b1bbe656..8e2bab1e8e90 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -20,13 +20,14 @@ ip_set_comment_uget(struct nlattr *tb)
  * The kadt functions don't use the comment extensions in any way.
  */
 static inline void
-ip_set_init_comment(struct ip_set_comment *comment,
+ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
 		    const struct ip_set_ext *ext)
 {
 	struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
 	size_t len = ext->comment ? strlen(ext->comment) : 0;
 
 	if (unlikely(c)) {
+		set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
 		kfree_rcu(c, rcu);
 		rcu_assign_pointer(comment->c, NULL);
 	}
@@ -38,6 +39,7 @@ ip_set_init_comment(struct ip_set_comment *comment,
 	if (unlikely(!c))
 		return;
 	strlcpy(c->str, ext->comment, len + 1);
+	set->ext_size += sizeof(*c) + strlen(c->str) + 1;
 	rcu_assign_pointer(comment->c, c);
 }
 
@@ -58,13 +60,14 @@ ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
  * of the set data anymore.
  */
 static inline void
-ip_set_comment_free(struct ip_set_comment *comment)
+ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment)
 {
 	struct ip_set_comment_rcu *c;
 
 	c = rcu_dereference_protected(comment->c, 1);
 	if (unlikely(!c))
 		return;
+	set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
 	kfree_rcu(c, rcu);
 	rcu_assign_pointer(comment->c, NULL);
 }
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 1810d1c06e3d..f8ea26cafa30 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -84,6 +84,7 @@ mtype_flush(struct ip_set *set)
 		mtype_ext_cleanup(set);
 	memset(map->members, 0, map->memsize);
 	set->elements = 0;
+	set->ext_size = 0;
 }
 
 /* Calculate the actual memory size of the set data */
@@ -99,7 +100,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
 	const struct mtype *map = set->data;
 	struct nlattr *nested;
-	size_t memsize = mtype_memsize(map, set->dsize);
+	size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size;
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
@@ -173,7 +174,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	if (SET_WITH_COUNTER(set))
 		ip_set_init_counter(ext_counter(x, set), ext);
 	if (SET_WITH_COMMENT(set))
-		ip_set_init_comment(ext_comment(x, set), ext);
+		ip_set_init_comment(set, ext_comment(x, set), ext);
 	if (SET_WITH_SKBINFO(set))
 		ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bfacccff7196..23345d2d136a 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -324,7 +324,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 
-typedef void (*destroyer)(void *);
+typedef void (*destroyer)(struct ip_set *, void *);
 /* ipset data extension types, in size order */
 
 const struct ip_set_ext_type ip_set_extensions[] = {
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 6e967f198d1e..0746405a1d14 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -343,21 +343,13 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 /* Calculate the actual memory size of the set data */
 static size_t
 mtype_ahash_memsize(const struct htype *h, const struct htable *t,
-		    u8 nets_length, size_t dsize)
+		    u8 nets_length)
 {
-	u32 i;
-	struct hbucket *n;
 	size_t memsize = sizeof(*h) + sizeof(*t);
 
 #ifdef IP_SET_HASH_WITH_NETS
 	memsize += sizeof(struct net_prefixes) * nets_length;
 #endif
-	for (i = 0; i < jhash_size(t->htable_bits); i++) {
-		n = rcu_dereference_bh(hbucket(t, i));
-		if (!n)
-			continue;
-		memsize += sizeof(struct hbucket) + n->size * dsize;
-	}
 
 	return memsize;
 }
@@ -400,6 +392,7 @@ mtype_flush(struct ip_set *set)
 	memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
 #endif
 	set->elements = 0;
+	set->ext_size = 0;
 }
 
 /* Destroy the hashtable part of the set */
@@ -531,6 +524,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 				d++;
 			}
 			tmp->pos = d;
+			set->ext_size -= AHASH_INIT_SIZE * dsize;
 			rcu_assign_pointer(hbucket(t, i), tmp);
 			kfree_rcu(n, rcu);
 		}
@@ -562,7 +556,7 @@ mtype_resize(struct ip_set *set, bool retried)
 	struct htype *h = set->data;
 	struct htable *t, *orig;
 	u8 htable_bits;
-	size_t dsize = set->dsize;
+	size_t extsize, dsize = set->dsize;
 #ifdef IP_SET_HASH_WITH_NETS
 	u8 flags;
 	struct mtype_elem *tmp;
@@ -605,6 +599,7 @@ retry:
 	/* There can't be another parallel resizing, but dumping is possible */
 	atomic_set(&orig->ref, 1);
 	atomic_inc(&orig->uref);
+	extsize = 0;
 	pr_debug("attempt to resize set %s from %u to %u, t %p\n",
 		 set->name, orig->htable_bits, htable_bits, orig);
 	for (i = 0; i < jhash_size(orig->htable_bits); i++) {
@@ -635,6 +630,7 @@ retry:
 					goto cleanup;
 				}
 				m->size = AHASH_INIT_SIZE;
+				extsize = sizeof(*m) + AHASH_INIT_SIZE * dsize;
 				RCU_INIT_POINTER(hbucket(t, key), m);
 			} else if (m->pos >= m->size) {
 				struct hbucket *ht;
@@ -654,6 +650,7 @@ retry:
 				memcpy(ht, m, sizeof(struct hbucket) +
 					      m->size * dsize);
 				ht->size = m->size + AHASH_INIT_SIZE;
+				extsize += AHASH_INIT_SIZE * dsize;
 				kfree(m);
 				m = ht;
 				RCU_INIT_POINTER(hbucket(t, key), ht);
@@ -667,6 +664,7 @@ retry:
 		}
 	}
 	rcu_assign_pointer(h->table, t);
+	set->ext_size = extsize;
 
 	spin_unlock_bh(&set->lock);
 
@@ -740,6 +738,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		if (!n)
 			return -ENOMEM;
 		n->size = AHASH_INIT_SIZE;
+		set->ext_size += sizeof(*n) + AHASH_INIT_SIZE * set->dsize;
 		goto copy_elem;
 	}
 	for (i = 0; i < n->pos; i++) {
@@ -803,6 +802,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		memcpy(n, old, sizeof(struct hbucket) +
 		       old->size * set->dsize);
 		n->size = old->size + AHASH_INIT_SIZE;
+		set->ext_size += AHASH_INIT_SIZE * set->dsize;
 	}
 
 copy_elem:
@@ -823,7 +823,7 @@ overwrite_extensions:
 	if (SET_WITH_COUNTER(set))
 		ip_set_init_counter(ext_counter(data, set), ext);
 	if (SET_WITH_COMMENT(set))
-		ip_set_init_comment(ext_comment(data, set), ext);
+		ip_set_init_comment(set, ext_comment(data, set), ext);
 	if (SET_WITH_SKBINFO(set))
 		ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
 	/* Must come last for the case when timed out entry is reused */
@@ -895,6 +895,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 				k++;
 		}
 		if (n->pos == 0 && k == 0) {
+			set->ext_size -= sizeof(*n) + n->size * dsize;
 			rcu_assign_pointer(hbucket(t, key), NULL);
 			kfree_rcu(n, rcu);
 		} else if (k >= AHASH_INIT_SIZE) {
@@ -913,6 +914,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 				k++;
 			}
 			tmp->pos = k;
+			set->ext_size -= AHASH_INIT_SIZE * dsize;
 			rcu_assign_pointer(hbucket(t, key), tmp);
 			kfree_rcu(n, rcu);
 		}
@@ -1061,7 +1063,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 
 	rcu_read_lock_bh();
 	t = rcu_dereference_bh_nfnl(h->table);
-	memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
+	memsize = mtype_ahash_memsize(h, t, NLEN(set->family)) + set->ext_size;
 	htable_bits = t->htable_bits;
 	rcu_read_unlock_bh();
 
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index c45516695934..dede343a662b 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -228,7 +228,7 @@ list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext,
 	if (SET_WITH_COUNTER(set))
 		ip_set_init_counter(ext_counter(e, set), ext);
 	if (SET_WITH_COMMENT(set))
-		ip_set_init_comment(ext_comment(e, set), ext);
+		ip_set_init_comment(set, ext_comment(e, set), ext);
 	if (SET_WITH_SKBINFO(set))
 		ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
 	/* Update timeout last */
@@ -422,6 +422,7 @@ list_set_flush(struct ip_set *set)
 	list_for_each_entry_safe(e, n, &map->members, list)
 		list_set_del(set, e);
 	set->elements = 0;
+	set->ext_size = 0;
 }
 
 static void
@@ -467,7 +468,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
 {
 	const struct list_set *map = set->data;
 	struct nlattr *nested;
-	size_t memsize = list_set_memsize(map, set->dsize);
+	size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size;
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
-- 
cgit v1.2.3


From 39a842e22c1bf3ec3dce36e01fe8ba8ee66c80c8 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@opensource.altera.com>
Date: Tue, 1 Nov 2016 14:14:22 -0500
Subject: of/overlay: add of overlay notifications

This patch add of overlay notifications.

When DT overlays are being added, some drivers/subsystems
need to see device tree overlays before the changes go into
the live tree.

This is distinct from reconfig notifiers that are
post-apply or post-remove and which issue very granular
notifications without providing access to the context
of a whole overlay.

The following 4 notificatons are issued:
  OF_OVERLAY_PRE_APPLY
  OF_OVERLAY_POST_APPLY
  OF_OVERLAY_PRE_REMOVE
  OF_OVERLAY_POST_REMOVE

In the case of pre-apply notification, if the notifier
returns error, the overlay will be rejected.

This patch exports two functions for registering/unregistering
notifications:
  of_overlay_notifier_register(struct notifier_block *nb)
  of_overlay_notifier_unregister(struct notifier_block *nb)

The of_mutex is held during these notifications. The
notification data includes pointers to the overlay target
and the overlay:

struct of_overlay_notify_data {
       struct device_node *overlay;
       struct device_node *target;
};

Signed-off-by: Alan Tull <atull@opensource.altera.com>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/overlay.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/of.h   | 25 +++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index 318dbb51e7a2..0d4cda7050e0 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -58,6 +58,41 @@ struct of_overlay {
 static int of_overlay_apply_one(struct of_overlay *ov,
 		struct device_node *target, const struct device_node *overlay);
 
+static BLOCKING_NOTIFIER_HEAD(of_overlay_chain);
+
+int of_overlay_notifier_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&of_overlay_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_overlay_notifier_register);
+
+int of_overlay_notifier_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&of_overlay_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_overlay_notifier_unregister);
+
+static int of_overlay_notify(struct of_overlay *ov,
+			     enum of_overlay_notify_action action)
+{
+	struct of_overlay_notify_data nd;
+	int i, ret;
+
+	for (i = 0; i < ov->count; i++) {
+		struct of_overlay_info *ovinfo = &ov->ovinfo_tab[i];
+
+		nd.target = ovinfo->target;
+		nd.overlay = ovinfo->overlay;
+
+		ret = blocking_notifier_call_chain(&of_overlay_chain,
+						   action, &nd);
+		if (ret)
+			return notifier_to_errno(ret);
+	}
+
+	return 0;
+}
+
 static int of_overlay_apply_single_property(struct of_overlay *ov,
 		struct device_node *target, struct property *prop)
 {
@@ -368,6 +403,13 @@ int of_overlay_create(struct device_node *tree)
 		goto err_free_idr;
 	}
 
+	err = of_overlay_notify(ov, OF_OVERLAY_PRE_APPLY);
+	if (err < 0) {
+		pr_err("%s: Pre-apply notifier failed (err=%d)\n",
+		       __func__, err);
+		goto err_free_idr;
+	}
+
 	/* apply the overlay */
 	err = of_overlay_apply(ov);
 	if (err)
@@ -382,6 +424,8 @@ int of_overlay_create(struct device_node *tree)
 	/* add to the tail of the overlay list */
 	list_add_tail(&ov->node, &ov_list);
 
+	of_overlay_notify(ov, OF_OVERLAY_POST_APPLY);
+
 	mutex_unlock(&of_mutex);
 
 	return id;
@@ -498,9 +542,10 @@ int of_overlay_destroy(int id)
 		goto out;
 	}
 
-
+	of_overlay_notify(ov, OF_OVERLAY_PRE_REMOVE);
 	list_del(&ov->node);
 	__of_changeset_revert(&ov->cset);
+	of_overlay_notify(ov, OF_OVERLAY_POST_REMOVE);
 	of_free_overlay_info(ov);
 	idr_remove(&ov_idr, id);
 	of_changeset_destroy(&ov->cset);
diff --git a/include/linux/of.h b/include/linux/of.h
index 299aeb192727..d72f01009297 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -1266,6 +1266,18 @@ static inline bool of_device_is_system_power_controller(const struct device_node
  * Overlay support
  */
 
+enum of_overlay_notify_action {
+	OF_OVERLAY_PRE_APPLY,
+	OF_OVERLAY_POST_APPLY,
+	OF_OVERLAY_PRE_REMOVE,
+	OF_OVERLAY_POST_REMOVE,
+};
+
+struct of_overlay_notify_data {
+	struct device_node *overlay;
+	struct device_node *target;
+};
+
 #ifdef CONFIG_OF_OVERLAY
 
 /* ID based overlays; the API for external users */
@@ -1273,6 +1285,9 @@ int of_overlay_create(struct device_node *tree);
 int of_overlay_destroy(int id);
 int of_overlay_destroy_all(void);
 
+int of_overlay_notifier_register(struct notifier_block *nb);
+int of_overlay_notifier_unregister(struct notifier_block *nb);
+
 #else
 
 static inline int of_overlay_create(struct device_node *tree)
@@ -1290,6 +1305,16 @@ static inline int of_overlay_destroy_all(void)
 	return -ENOTSUPP;
 }
 
+static inline int of_overlay_notifier_register(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int of_overlay_notifier_unregister(struct notifier_block *nb)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From 9dce0287a60d72656a787b075f1b9162ff3cb142 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@opensource.altera.com>
Date: Tue, 1 Nov 2016 14:14:23 -0500
Subject: fpga: add method to get fpga manager from device

The intent is to provide a non-DT method of getting
ahold of a FPGA manager to do some FPGA programming.

This patch refactors of_fpga_mgr_get() to reuse most of it
while adding a new method fpga_mgr_get() for getting a
pointer to a fpga manager struct, given the device.

Signed-off-by: Alan Tull <atull@opensource.altera.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/fpga/fpga-mgr.txt |  6 ++--
 drivers/fpga/fpga-mgr.c         | 76 +++++++++++++++++++++++++++++------------
 include/linux/fpga/fpga-mgr.h   |  2 ++
 3 files changed, 60 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/fpga/fpga-mgr.txt b/Documentation/fpga/fpga-mgr.txt
index ce3e84fa9023..d056d691e8fd 100644
--- a/Documentation/fpga/fpga-mgr.txt
+++ b/Documentation/fpga/fpga-mgr.txt
@@ -38,11 +38,13 @@ To get/put a reference to a FPGA manager:
 -----------------------------------------
 
 	struct fpga_manager *of_fpga_mgr_get(struct device_node *node);
+	struct fpga_manager *fpga_mgr_get(struct device *dev);
+
+Given a DT node or device, get an exclusive reference to a FPGA manager.
 
 	void fpga_mgr_put(struct fpga_manager *mgr);
 
-Given a DT node, get an exclusive reference to a FPGA manager or release
-the reference.
+Release the reference.
 
 
 To register or unregister the low level FPGA-specific driver:
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index 953dc9195937..b690e65d55fe 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -39,7 +39,8 @@ static struct class *fpga_mgr_class;
  * Step the low level fpga manager through the device-specific steps of getting
  * an FPGA ready to be configured, writing the image to it, then doing whatever
  * post-configuration steps necessary.  This code assumes the caller got the
- * mgr pointer from of_fpga_mgr_get() and checked that it is not an error code.
+ * mgr pointer from of_fpga_mgr_get() or fpga_mgr_get() and checked that it is
+ * not an error code.
  *
  * Return: 0 on success, negative error code otherwise.
  */
@@ -99,7 +100,8 @@ EXPORT_SYMBOL_GPL(fpga_mgr_buf_load);
  * Request an FPGA image using the firmware class, then write out to the FPGA.
  * Update the state before each step to provide info on what step failed if
  * there is a failure.  This code assumes the caller got the mgr pointer
- * from of_fpga_mgr_get() and checked that it is not an error code.
+ * from of_fpga_mgr_get() or fpga_mgr_get() and checked that it is not an error
+ * code.
  *
  * Return: 0 on success, negative error code otherwise.
  */
@@ -181,30 +183,11 @@ static struct attribute *fpga_mgr_attrs[] = {
 };
 ATTRIBUTE_GROUPS(fpga_mgr);
 
-static int fpga_mgr_of_node_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
-/**
- * of_fpga_mgr_get - get an exclusive reference to a fpga mgr
- * @node:	device node
- *
- * Given a device node, get an exclusive reference to a fpga mgr.
- *
- * Return: fpga manager struct or IS_ERR() condition containing error code.
- */
-struct fpga_manager *of_fpga_mgr_get(struct device_node *node)
+struct fpga_manager *__fpga_mgr_get(struct device *dev)
 {
 	struct fpga_manager *mgr;
-	struct device *dev;
 	int ret = -ENODEV;
 
-	dev = class_find_device(fpga_mgr_class, NULL, node,
-				fpga_mgr_of_node_match);
-	if (!dev)
-		return ERR_PTR(-ENODEV);
-
 	mgr = to_fpga_manager(dev);
 	if (!mgr)
 		goto err_dev;
@@ -226,6 +209,55 @@ err_dev:
 	put_device(dev);
 	return ERR_PTR(ret);
 }
+
+static int fpga_mgr_dev_match(struct device *dev, const void *data)
+{
+	return dev->parent == data;
+}
+
+/**
+ * fpga_mgr_get - get an exclusive reference to a fpga mgr
+ * @dev:	parent device that fpga mgr was registered with
+ *
+ * Given a device, get an exclusive reference to a fpga mgr.
+ *
+ * Return: fpga manager struct or IS_ERR() condition containing error code.
+ */
+struct fpga_manager *fpga_mgr_get(struct device *dev)
+{
+	struct device *mgr_dev = class_find_device(fpga_mgr_class, NULL, dev,
+						   fpga_mgr_dev_match);
+	if (!mgr_dev)
+		return ERR_PTR(-ENODEV);
+
+	return __fpga_mgr_get(mgr_dev);
+}
+EXPORT_SYMBOL_GPL(fpga_mgr_get);
+
+static int fpga_mgr_of_node_match(struct device *dev, const void *data)
+{
+	return dev->of_node == data;
+}
+
+/**
+ * of_fpga_mgr_get - get an exclusive reference to a fpga mgr
+ * @node:	device node
+ *
+ * Given a device node, get an exclusive reference to a fpga mgr.
+ *
+ * Return: fpga manager struct or IS_ERR() condition containing error code.
+ */
+struct fpga_manager *of_fpga_mgr_get(struct device_node *node)
+{
+	struct device *dev;
+
+	dev = class_find_device(fpga_mgr_class, NULL, node,
+				fpga_mgr_of_node_match);
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	return __fpga_mgr_get(dev);
+}
 EXPORT_SYMBOL_GPL(of_fpga_mgr_get);
 
 /**
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 0940bf45e2f2..957b5ac9428a 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -117,6 +117,8 @@ int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags,
 
 struct fpga_manager *of_fpga_mgr_get(struct device_node *node);
 
+struct fpga_manager *fpga_mgr_get(struct device *dev);
+
 void fpga_mgr_put(struct fpga_manager *mgr);
 
 int fpga_mgr_register(struct device *dev, const char *name,
-- 
cgit v1.2.3


From 1df2865f8dd9d56cb76aa7aa1298921e7bece2af Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@opensource.altera.com>
Date: Tue, 1 Nov 2016 14:14:26 -0500
Subject: fpga-mgr: add fpga image information struct

This patch adds a minor change in the FPGA Manager API
to hold information that is specific to an FPGA image
file.  This change is expected to bring little, if any,
pain.  The socfpga and zynq drivers are fixed up in
this patch.

An FPGA image file will have particulars that affect how the
image is programmed to the FPGA.  One example is that
current 'flags' currently has one bit which shows whether the
FPGA image was built for full reconfiguration or partial
reconfiguration.  Another example is timeout values for
enabling or disabling the bridges in the FPGA.  As the
complexity of the FPGA design increases, the bridges in the
FPGA may take longer times to enable or disable.

This patch adds a new 'struct fpga_image_info', moves the
current 'u32 flags' to it.  Two other image-specific u32's
are added for the bridge enable/disable timeouts.  The FPGA
Manager API functions are changed, replacing the 'u32 flag'
parameter with a pointer to struct fpga_image_info.
Subsequent patches fix the existing low level FPGA manager
drivers.

Signed-off-by: Alan Tull <atull@opensource.altera.com>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/fpga-mgr.c       | 17 +++++++++--------
 drivers/fpga/socfpga.c        |  7 ++++---
 drivers/fpga/zynq-fpga.c      | 10 ++++++----
 include/linux/fpga/fpga-mgr.h | 23 +++++++++++++++++++----
 4 files changed, 38 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index b690e65d55fe..79ce2eea44db 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -32,7 +32,7 @@ static struct class *fpga_mgr_class;
 /**
  * fpga_mgr_buf_load - load fpga from image in buffer
  * @mgr:	fpga manager
- * @flags:	flags setting fpga confuration modes
+ * @info:	fpga image specific information
  * @buf:	buffer contain fpga image
  * @count:	byte count of buf
  *
@@ -44,8 +44,8 @@ static struct class *fpga_mgr_class;
  *
  * Return: 0 on success, negative error code otherwise.
  */
-int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, const char *buf,
-		      size_t count)
+int fpga_mgr_buf_load(struct fpga_manager *mgr, struct fpga_image_info *info,
+		      const char *buf, size_t count)
 {
 	struct device *dev = &mgr->dev;
 	int ret;
@@ -56,7 +56,7 @@ int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, const char *buf,
 	 * ready to receive an FPGA image.
 	 */
 	mgr->state = FPGA_MGR_STATE_WRITE_INIT;
-	ret = mgr->mops->write_init(mgr, flags, buf, count);
+	ret = mgr->mops->write_init(mgr, info, buf, count);
 	if (ret) {
 		dev_err(dev, "Error preparing FPGA for writing\n");
 		mgr->state = FPGA_MGR_STATE_WRITE_INIT_ERR;
@@ -79,7 +79,7 @@ int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, const char *buf,
 	 * steps to finish and set the FPGA into operating mode.
 	 */
 	mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE;
-	ret = mgr->mops->write_complete(mgr, flags);
+	ret = mgr->mops->write_complete(mgr, info);
 	if (ret) {
 		dev_err(dev, "Error after writing image data to FPGA\n");
 		mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR;
@@ -94,7 +94,7 @@ EXPORT_SYMBOL_GPL(fpga_mgr_buf_load);
 /**
  * fpga_mgr_firmware_load - request firmware and load to fpga
  * @mgr:	fpga manager
- * @flags:	flags setting fpga confuration modes
+ * @info:	fpga image specific information
  * @image_name:	name of image file on the firmware search path
  *
  * Request an FPGA image using the firmware class, then write out to the FPGA.
@@ -105,7 +105,8 @@ EXPORT_SYMBOL_GPL(fpga_mgr_buf_load);
  *
  * Return: 0 on success, negative error code otherwise.
  */
-int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags,
+int fpga_mgr_firmware_load(struct fpga_manager *mgr,
+			   struct fpga_image_info *info,
 			   const char *image_name)
 {
 	struct device *dev = &mgr->dev;
@@ -123,7 +124,7 @@ int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags,
 		return ret;
 	}
 
-	ret = fpga_mgr_buf_load(mgr, flags, fw->data, fw->size);
+	ret = fpga_mgr_buf_load(mgr, info, fw->data, fw->size);
 
 	release_firmware(fw);
 
diff --git a/drivers/fpga/socfpga.c b/drivers/fpga/socfpga.c
index 27d2ff28132c..b6672e66cda6 100644
--- a/drivers/fpga/socfpga.c
+++ b/drivers/fpga/socfpga.c
@@ -407,13 +407,14 @@ static int socfpga_fpga_reset(struct fpga_manager *mgr)
 /*
  * Prepare the FPGA to receive the configuration data.
  */
-static int socfpga_fpga_ops_configure_init(struct fpga_manager *mgr, u32 flags,
+static int socfpga_fpga_ops_configure_init(struct fpga_manager *mgr,
+					   struct fpga_image_info *info,
 					   const char *buf, size_t count)
 {
 	struct socfpga_fpga_priv *priv = mgr->priv;
 	int ret;
 
-	if (flags & FPGA_MGR_PARTIAL_RECONFIG) {
+	if (info->flags & FPGA_MGR_PARTIAL_RECONFIG) {
 		dev_err(&mgr->dev, "Partial reconfiguration not supported.\n");
 		return -EINVAL;
 	}
@@ -478,7 +479,7 @@ static int socfpga_fpga_ops_configure_write(struct fpga_manager *mgr,
 }
 
 static int socfpga_fpga_ops_configure_complete(struct fpga_manager *mgr,
-					       u32 flags)
+					       struct fpga_image_info *info)
 {
 	struct socfpga_fpga_priv *priv = mgr->priv;
 	u32 status;
diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index c2fb4120bd62..249682e92502 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -175,7 +175,8 @@ static irqreturn_t zynq_fpga_isr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, u32 flags,
+static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
+				    struct fpga_image_info *info,
 				    const char *buf, size_t count)
 {
 	struct zynq_fpga_priv *priv;
@@ -189,7 +190,7 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, u32 flags,
 		return err;
 
 	/* don't globally reset PL if we're doing partial reconfig */
-	if (!(flags & FPGA_MGR_PARTIAL_RECONFIG)) {
+	if (!(info->flags & FPGA_MGR_PARTIAL_RECONFIG)) {
 		/* assert AXI interface resets */
 		regmap_write(priv->slcr, SLCR_FPGA_RST_CTRL_OFFSET,
 			     FPGA_RST_ALL_MASK);
@@ -343,7 +344,8 @@ out_free:
 	return err;
 }
 
-static int zynq_fpga_ops_write_complete(struct fpga_manager *mgr, u32 flags)
+static int zynq_fpga_ops_write_complete(struct fpga_manager *mgr,
+					struct fpga_image_info *info)
 {
 	struct zynq_fpga_priv *priv = mgr->priv;
 	int err;
@@ -364,7 +366,7 @@ static int zynq_fpga_ops_write_complete(struct fpga_manager *mgr, u32 flags)
 		return err;
 
 	/* for the partial reconfig case we didn't touch the level shifters */
-	if (!(flags & FPGA_MGR_PARTIAL_RECONFIG)) {
+	if (!(info->flags & FPGA_MGR_PARTIAL_RECONFIG)) {
 		/* enable level shifters from PL to PS */
 		regmap_write(priv->slcr, SLCR_LVL_SHFTR_EN_OFFSET,
 			     LVL_SHFTR_ENABLE_PL_TO_PS);
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 957b5ac9428a..55803186e0ea 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -68,6 +68,18 @@ enum fpga_mgr_states {
  */
 #define FPGA_MGR_PARTIAL_RECONFIG	BIT(0)
 
+/**
+ * struct fpga_image_info - information specific to a FPGA image
+ * @flags: boolean flags as defined above
+ * @enable_timeout_us: maximum time to enable traffic through bridge (uSec)
+ * @disable_timeout_us: maximum time to disable traffic through bridge (uSec)
+ */
+struct fpga_image_info {
+	u32 flags;
+	u32 enable_timeout_us;
+	u32 disable_timeout_us;
+};
+
 /**
  * struct fpga_manager_ops - ops for low level fpga manager drivers
  * @state: returns an enum value of the FPGA's state
@@ -82,10 +94,12 @@ enum fpga_mgr_states {
  */
 struct fpga_manager_ops {
 	enum fpga_mgr_states (*state)(struct fpga_manager *mgr);
-	int (*write_init)(struct fpga_manager *mgr, u32 flags,
+	int (*write_init)(struct fpga_manager *mgr,
+			  struct fpga_image_info *info,
 			  const char *buf, size_t count);
 	int (*write)(struct fpga_manager *mgr, const char *buf, size_t count);
-	int (*write_complete)(struct fpga_manager *mgr, u32 flags);
+	int (*write_complete)(struct fpga_manager *mgr,
+			      struct fpga_image_info *info);
 	void (*fpga_remove)(struct fpga_manager *mgr);
 };
 
@@ -109,10 +123,11 @@ struct fpga_manager {
 
 #define to_fpga_manager(d) container_of(d, struct fpga_manager, dev)
 
-int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags,
+int fpga_mgr_buf_load(struct fpga_manager *mgr, struct fpga_image_info *info,
 		      const char *buf, size_t count);
 
-int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags,
+int fpga_mgr_firmware_load(struct fpga_manager *mgr,
+			   struct fpga_image_info *info,
 			   const char *image_name);
 
 struct fpga_manager *of_fpga_mgr_get(struct device_node *node);
-- 
cgit v1.2.3


From 21aeda950c5f84a8351b862816d832120b217a9b Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@opensource.altera.com>
Date: Tue, 1 Nov 2016 14:14:28 -0500
Subject: fpga: add fpga bridge framework

This framework adds API functions for enabling/
disabling FPGA bridges under kernel control.

This allows the Linux kernel to disable FPGA bridges
during FPGA reprogramming and to enable FPGA bridges
when FPGA reprogramming is done.  This framework is
be manufacturer-agnostic, allowing it to be used in
interfaces that use the FPGA Manager Framework to
reprogram FPGA's.

The functions are:
* of_fpga_bridge_get
* fpga_bridge_put
   Get/put an exclusive reference to a FPGA bridge.

* fpga_bridge_enable
* fpga_bridge_disable
   Enable/Disable traffic through a bridge.

* fpga_bridge_register
* fpga_bridge_unregister
   Register/unregister a device-specific low level FPGA
   Bridge driver.

Get an exclusive reference to a bridge and add it to a list:
* fpga_bridge_get_to_list

To enable/disable/put a set of bridges that are on a list:
* fpga_bridges_enable
* fpga_bridges_disable
* fpga_bridges_put

Signed-off-by: Alan Tull <atull@opensource.altera.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/Kconfig             |   7 +
 drivers/fpga/Makefile            |   3 +
 drivers/fpga/fpga-bridge.c       | 395 +++++++++++++++++++++++++++++++++++++++
 include/linux/fpga/fpga-bridge.h |  60 ++++++
 4 files changed, 465 insertions(+)
 create mode 100644 drivers/fpga/fpga-bridge.c
 create mode 100644 include/linux/fpga/fpga-bridge.h

(limited to 'include/linux')

diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index cd84934774cc..9b20f45c85bf 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -26,6 +26,13 @@ config FPGA_MGR_ZYNQ_FPGA
 	help
 	  FPGA manager driver support for Xilinx Zynq FPGAs.
 
+config FPGA_BRIDGE
+	tristate "FPGA Bridge Framework"
+	depends on OF
+	help
+	  Say Y here if you want to support bridges connected between host
+	  processors and FPGAs or between FPGAs.
+
 endif # FPGA
 
 endmenu
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index 8d83fc6b1613..4baef0022d4c 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -8,3 +8,6 @@ obj-$(CONFIG_FPGA)			+= fpga-mgr.o
 # FPGA Manager Drivers
 obj-$(CONFIG_FPGA_MGR_SOCFPGA)		+= socfpga.o
 obj-$(CONFIG_FPGA_MGR_ZYNQ_FPGA)	+= zynq-fpga.o
+
+# FPGA Bridge Drivers
+obj-$(CONFIG_FPGA_BRIDGE)		+= fpga-bridge.o
diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
new file mode 100644
index 000000000000..33ee83e6373c
--- /dev/null
+++ b/drivers/fpga/fpga-bridge.c
@@ -0,0 +1,395 @@
+/*
+ * FPGA Bridge Framework Driver
+ *
+ *  Copyright (C) 2013-2016 Altera Corporation, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/fpga/fpga-bridge.h>
+#include <linux/idr.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+static DEFINE_IDA(fpga_bridge_ida);
+static struct class *fpga_bridge_class;
+
+/* Lock for adding/removing bridges to linked lists*/
+spinlock_t bridge_list_lock;
+
+static int fpga_bridge_of_node_match(struct device *dev, const void *data)
+{
+	return dev->of_node == data;
+}
+
+/**
+ * fpga_bridge_enable - Enable transactions on the bridge
+ *
+ * @bridge: FPGA bridge
+ *
+ * Return: 0 for success, error code otherwise.
+ */
+int fpga_bridge_enable(struct fpga_bridge *bridge)
+{
+	dev_dbg(&bridge->dev, "enable\n");
+
+	if (bridge->br_ops && bridge->br_ops->enable_set)
+		return bridge->br_ops->enable_set(bridge, 1);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fpga_bridge_enable);
+
+/**
+ * fpga_bridge_disable - Disable transactions on the bridge
+ *
+ * @bridge: FPGA bridge
+ *
+ * Return: 0 for success, error code otherwise.
+ */
+int fpga_bridge_disable(struct fpga_bridge *bridge)
+{
+	dev_dbg(&bridge->dev, "disable\n");
+
+	if (bridge->br_ops && bridge->br_ops->enable_set)
+		return bridge->br_ops->enable_set(bridge, 0);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fpga_bridge_disable);
+
+/**
+ * of_fpga_bridge_get - get an exclusive reference to a fpga bridge
+ *
+ * @np: node pointer of a FPGA bridge
+ * @info: fpga image specific information
+ *
+ * Return fpga_bridge struct if successful.
+ * Return -EBUSY if someone already has a reference to the bridge.
+ * Return -ENODEV if @np is not a FPGA Bridge.
+ */
+struct fpga_bridge *of_fpga_bridge_get(struct device_node *np,
+				       struct fpga_image_info *info)
+
+{
+	struct device *dev;
+	struct fpga_bridge *bridge;
+	int ret = -ENODEV;
+
+	dev = class_find_device(fpga_bridge_class, NULL, np,
+				fpga_bridge_of_node_match);
+	if (!dev)
+		goto err_dev;
+
+	bridge = to_fpga_bridge(dev);
+	if (!bridge)
+		goto err_dev;
+
+	bridge->info = info;
+
+	if (!mutex_trylock(&bridge->mutex)) {
+		ret = -EBUSY;
+		goto err_dev;
+	}
+
+	if (!try_module_get(dev->parent->driver->owner))
+		goto err_ll_mod;
+
+	dev_dbg(&bridge->dev, "get\n");
+
+	return bridge;
+
+err_ll_mod:
+	mutex_unlock(&bridge->mutex);
+err_dev:
+	put_device(dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(of_fpga_bridge_get);
+
+/**
+ * fpga_bridge_put - release a reference to a bridge
+ *
+ * @bridge: FPGA bridge
+ */
+void fpga_bridge_put(struct fpga_bridge *bridge)
+{
+	dev_dbg(&bridge->dev, "put\n");
+
+	bridge->info = NULL;
+	module_put(bridge->dev.parent->driver->owner);
+	mutex_unlock(&bridge->mutex);
+	put_device(&bridge->dev);
+}
+EXPORT_SYMBOL_GPL(fpga_bridge_put);
+
+/**
+ * fpga_bridges_enable - enable bridges in a list
+ * @bridge_list: list of FPGA bridges
+ *
+ * Enable each bridge in the list.  If list is empty, do nothing.
+ *
+ * Return 0 for success or empty bridge list; return error code otherwise.
+ */
+int fpga_bridges_enable(struct list_head *bridge_list)
+{
+	struct fpga_bridge *bridge;
+	struct list_head *node;
+	int ret;
+
+	list_for_each(node, bridge_list) {
+		bridge = list_entry(node, struct fpga_bridge, node);
+		ret = fpga_bridge_enable(bridge);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fpga_bridges_enable);
+
+/**
+ * fpga_bridges_disable - disable bridges in a list
+ *
+ * @bridge_list: list of FPGA bridges
+ *
+ * Disable each bridge in the list.  If list is empty, do nothing.
+ *
+ * Return 0 for success or empty bridge list; return error code otherwise.
+ */
+int fpga_bridges_disable(struct list_head *bridge_list)
+{
+	struct fpga_bridge *bridge;
+	struct list_head *node;
+	int ret;
+
+	list_for_each(node, bridge_list) {
+		bridge = list_entry(node, struct fpga_bridge, node);
+		ret = fpga_bridge_disable(bridge);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fpga_bridges_disable);
+
+/**
+ * fpga_bridges_put - put bridges
+ *
+ * @bridge_list: list of FPGA bridges
+ *
+ * For each bridge in the list, put the bridge and remove it from the list.
+ * If list is empty, do nothing.
+ */
+void fpga_bridges_put(struct list_head *bridge_list)
+{
+	struct fpga_bridge *bridge;
+	struct list_head *node, *next;
+	unsigned long flags;
+
+	list_for_each_safe(node, next, bridge_list) {
+		bridge = list_entry(node, struct fpga_bridge, node);
+
+		fpga_bridge_put(bridge);
+
+		spin_lock_irqsave(&bridge_list_lock, flags);
+		list_del(&bridge->node);
+		spin_unlock_irqrestore(&bridge_list_lock, flags);
+	}
+}
+EXPORT_SYMBOL_GPL(fpga_bridges_put);
+
+/**
+ * fpga_bridges_get_to_list - get a bridge, add it to a list
+ *
+ * @np: node pointer of a FPGA bridge
+ * @info: fpga image specific information
+ * @bridge_list: list of FPGA bridges
+ *
+ * Get an exclusive reference to the bridge and and it to the list.
+ *
+ * Return 0 for success, error code from of_fpga_bridge_get() othewise.
+ */
+int fpga_bridge_get_to_list(struct device_node *np,
+			    struct fpga_image_info *info,
+			    struct list_head *bridge_list)
+{
+	struct fpga_bridge *bridge;
+	unsigned long flags;
+
+	bridge = of_fpga_bridge_get(np, info);
+	if (IS_ERR(bridge))
+		return PTR_ERR(bridge);
+
+	spin_lock_irqsave(&bridge_list_lock, flags);
+	list_add(&bridge->node, bridge_list);
+	spin_unlock_irqrestore(&bridge_list_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fpga_bridge_get_to_list);
+
+static ssize_t name_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	struct fpga_bridge *bridge = to_fpga_bridge(dev);
+
+	return sprintf(buf, "%s\n", bridge->name);
+}
+
+static ssize_t state_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct fpga_bridge *bridge = to_fpga_bridge(dev);
+	int enable = 1;
+
+	if (bridge->br_ops && bridge->br_ops->enable_show)
+		enable = bridge->br_ops->enable_show(bridge);
+
+	return sprintf(buf, "%s\n", enable ? "enabled" : "disabled");
+}
+
+static DEVICE_ATTR_RO(name);
+static DEVICE_ATTR_RO(state);
+
+static struct attribute *fpga_bridge_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_state.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(fpga_bridge);
+
+/**
+ * fpga_bridge_register - register a fpga bridge driver
+ * @dev:	FPGA bridge device from pdev
+ * @name:	FPGA bridge name
+ * @br_ops:	pointer to structure of fpga bridge ops
+ * @priv:	FPGA bridge private data
+ *
+ * Return: 0 for success, error code otherwise.
+ */
+int fpga_bridge_register(struct device *dev, const char *name,
+			 const struct fpga_bridge_ops *br_ops, void *priv)
+{
+	struct fpga_bridge *bridge;
+	int id, ret = 0;
+
+	if (!name || !strlen(name)) {
+		dev_err(dev, "Attempt to register with no name!\n");
+		return -EINVAL;
+	}
+
+	bridge = kzalloc(sizeof(*bridge), GFP_KERNEL);
+	if (!bridge)
+		return -ENOMEM;
+
+	id = ida_simple_get(&fpga_bridge_ida, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		ret = id;
+		goto error_kfree;
+	}
+
+	mutex_init(&bridge->mutex);
+	INIT_LIST_HEAD(&bridge->node);
+
+	bridge->name = name;
+	bridge->br_ops = br_ops;
+	bridge->priv = priv;
+
+	device_initialize(&bridge->dev);
+	bridge->dev.class = fpga_bridge_class;
+	bridge->dev.parent = dev;
+	bridge->dev.of_node = dev->of_node;
+	bridge->dev.id = id;
+	dev_set_drvdata(dev, bridge);
+
+	ret = dev_set_name(&bridge->dev, "br%d", id);
+	if (ret)
+		goto error_device;
+
+	ret = device_add(&bridge->dev);
+	if (ret)
+		goto error_device;
+
+	of_platform_populate(dev->of_node, NULL, NULL, dev);
+
+	dev_info(bridge->dev.parent, "fpga bridge [%s] registered\n",
+		 bridge->name);
+
+	return 0;
+
+error_device:
+	ida_simple_remove(&fpga_bridge_ida, id);
+error_kfree:
+	kfree(bridge);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(fpga_bridge_register);
+
+/**
+ * fpga_bridge_unregister - unregister a fpga bridge driver
+ * @dev: FPGA bridge device from pdev
+ */
+void fpga_bridge_unregister(struct device *dev)
+{
+	struct fpga_bridge *bridge = dev_get_drvdata(dev);
+
+	/*
+	 * If the low level driver provides a method for putting bridge into
+	 * a desired state upon unregister, do it.
+	 */
+	if (bridge->br_ops && bridge->br_ops->fpga_bridge_remove)
+		bridge->br_ops->fpga_bridge_remove(bridge);
+
+	device_unregister(&bridge->dev);
+}
+EXPORT_SYMBOL_GPL(fpga_bridge_unregister);
+
+static void fpga_bridge_dev_release(struct device *dev)
+{
+	struct fpga_bridge *bridge = to_fpga_bridge(dev);
+
+	ida_simple_remove(&fpga_bridge_ida, bridge->dev.id);
+	kfree(bridge);
+}
+
+static int __init fpga_bridge_dev_init(void)
+{
+	spin_lock_init(&bridge_list_lock);
+
+	fpga_bridge_class = class_create(THIS_MODULE, "fpga_bridge");
+	if (IS_ERR(fpga_bridge_class))
+		return PTR_ERR(fpga_bridge_class);
+
+	fpga_bridge_class->dev_groups = fpga_bridge_groups;
+	fpga_bridge_class->dev_release = fpga_bridge_dev_release;
+
+	return 0;
+}
+
+static void __exit fpga_bridge_dev_exit(void)
+{
+	class_destroy(fpga_bridge_class);
+	ida_destroy(&fpga_bridge_ida);
+}
+
+MODULE_DESCRIPTION("FPGA Bridge Driver");
+MODULE_AUTHOR("Alan Tull <atull@opensource.altera.com>");
+MODULE_LICENSE("GPL v2");
+
+subsys_initcall(fpga_bridge_dev_init);
+module_exit(fpga_bridge_dev_exit);
diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h
new file mode 100644
index 000000000000..dba6e3c697c7
--- /dev/null
+++ b/include/linux/fpga/fpga-bridge.h
@@ -0,0 +1,60 @@
+#include <linux/device.h>
+#include <linux/fpga/fpga-mgr.h>
+
+#ifndef _LINUX_FPGA_BRIDGE_H
+#define _LINUX_FPGA_BRIDGE_H
+
+struct fpga_bridge;
+
+/**
+ * struct fpga_bridge_ops - ops for low level FPGA bridge drivers
+ * @enable_show: returns the FPGA bridge's status
+ * @enable_set: set a FPGA bridge as enabled or disabled
+ * @fpga_bridge_remove: set FPGA into a specific state during driver remove
+ */
+struct fpga_bridge_ops {
+	int (*enable_show)(struct fpga_bridge *bridge);
+	int (*enable_set)(struct fpga_bridge *bridge, bool enable);
+	void (*fpga_bridge_remove)(struct fpga_bridge *bridge);
+};
+
+/**
+ * struct fpga_bridge - FPGA bridge structure
+ * @name: name of low level FPGA bridge
+ * @dev: FPGA bridge device
+ * @mutex: enforces exclusive reference to bridge
+ * @br_ops: pointer to struct of FPGA bridge ops
+ * @info: fpga image specific information
+ * @node: FPGA bridge list node
+ * @priv: low level driver private date
+ */
+struct fpga_bridge {
+	const char *name;
+	struct device dev;
+	struct mutex mutex; /* for exclusive reference to bridge */
+	const struct fpga_bridge_ops *br_ops;
+	struct fpga_image_info *info;
+	struct list_head node;
+	void *priv;
+};
+
+#define to_fpga_bridge(d) container_of(d, struct fpga_bridge, dev)
+
+struct fpga_bridge *of_fpga_bridge_get(struct device_node *node,
+				       struct fpga_image_info *info);
+void fpga_bridge_put(struct fpga_bridge *bridge);
+int fpga_bridge_enable(struct fpga_bridge *bridge);
+int fpga_bridge_disable(struct fpga_bridge *bridge);
+
+int fpga_bridges_enable(struct list_head *bridge_list);
+int fpga_bridges_disable(struct list_head *bridge_list);
+void fpga_bridges_put(struct list_head *bridge_list);
+int fpga_bridge_get_to_list(struct device_node *np,
+			    struct fpga_image_info *info,
+			    struct list_head *bridge_list);
+
+int fpga_bridge_register(struct device *dev, const char *name,
+			 const struct fpga_bridge_ops *br_ops, void *priv);
+void fpga_bridge_unregister(struct device *dev);
+
+#endif /* _LINUX_FPGA_BRIDGE_H */
-- 
cgit v1.2.3


From 0fa20cdfcc1f68847cdfc47824476301eedc8297 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@opensource.altera.com>
Date: Tue, 1 Nov 2016 14:14:29 -0500
Subject: fpga: fpga-region: device tree control for FPGA

FPGA Regions support programming FPGA under control of the Device
Tree.

Signed-off-by: Alan Tull <atull@opensource.altera.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/Kconfig          |   7 +
 drivers/fpga/Makefile         |   3 +
 drivers/fpga/fpga-region.c    | 603 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/fpga/fpga-mgr.h |   2 +
 4 files changed, 615 insertions(+)
 create mode 100644 drivers/fpga/fpga-region.c

(limited to 'include/linux')

diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index 9b20f45c85bf..e0e1257e17e2 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -13,6 +13,13 @@ config FPGA
 
 if FPGA
 
+config FPGA_REGION
+	tristate "FPGA Region"
+	depends on OF && FPGA_BRIDGE
+	help
+	  FPGA Regions allow loading FPGA images under control of
+	  the Device Tree.
+
 config FPGA_MGR_SOCFPGA
 	tristate "Altera SOCFPGA FPGA Manager"
 	depends on ARCH_SOCFPGA
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index 4baef0022d4c..8d746c342533 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -11,3 +11,6 @@ obj-$(CONFIG_FPGA_MGR_ZYNQ_FPGA)	+= zynq-fpga.o
 
 # FPGA Bridge Drivers
 obj-$(CONFIG_FPGA_BRIDGE)		+= fpga-bridge.o
+
+# High Level Interfaces
+obj-$(CONFIG_FPGA_REGION)		+= fpga-region.o
diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c
new file mode 100644
index 000000000000..3222fdbad75a
--- /dev/null
+++ b/drivers/fpga/fpga-region.c
@@ -0,0 +1,603 @@
+/*
+ * FPGA Region - Device Tree support for FPGA programming under Linux
+ *
+ *  Copyright (C) 2013-2016 Altera Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fpga/fpga-bridge.h>
+#include <linux/fpga/fpga-mgr.h>
+#include <linux/idr.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/**
+ * struct fpga_region - FPGA Region structure
+ * @dev: FPGA Region device
+ * @mutex: enforces exclusive reference to region
+ * @bridge_list: list of FPGA bridges specified in region
+ * @info: fpga image specific information
+ */
+struct fpga_region {
+	struct device dev;
+	struct mutex mutex; /* for exclusive reference to region */
+	struct list_head bridge_list;
+	struct fpga_image_info *info;
+};
+
+#define to_fpga_region(d) container_of(d, struct fpga_region, dev)
+
+static DEFINE_IDA(fpga_region_ida);
+static struct class *fpga_region_class;
+
+static const struct of_device_id fpga_region_of_match[] = {
+	{ .compatible = "fpga-region", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, fpga_region_of_match);
+
+static int fpga_region_of_node_match(struct device *dev, const void *data)
+{
+	return dev->of_node == data;
+}
+
+/**
+ * fpga_region_find - find FPGA region
+ * @np: device node of FPGA Region
+ * Caller will need to put_device(&region->dev) when done.
+ * Returns FPGA Region struct or NULL
+ */
+static struct fpga_region *fpga_region_find(struct device_node *np)
+{
+	struct device *dev;
+
+	dev = class_find_device(fpga_region_class, NULL, np,
+				fpga_region_of_node_match);
+	if (!dev)
+		return NULL;
+
+	return to_fpga_region(dev);
+}
+
+/**
+ * fpga_region_get - get an exclusive reference to a fpga region
+ * @region: FPGA Region struct
+ *
+ * Caller should call fpga_region_put() when done with region.
+ *
+ * Return fpga_region struct if successful.
+ * Return -EBUSY if someone already has a reference to the region.
+ * Return -ENODEV if @np is not a FPGA Region.
+ */
+static struct fpga_region *fpga_region_get(struct fpga_region *region)
+{
+	struct device *dev = &region->dev;
+
+	if (!mutex_trylock(&region->mutex)) {
+		dev_dbg(dev, "%s: FPGA Region already in use\n", __func__);
+		return ERR_PTR(-EBUSY);
+	}
+
+	get_device(dev);
+	of_node_get(dev->of_node);
+	if (!try_module_get(dev->parent->driver->owner)) {
+		of_node_put(dev->of_node);
+		put_device(dev);
+		mutex_unlock(&region->mutex);
+		return ERR_PTR(-ENODEV);
+	}
+
+	dev_dbg(&region->dev, "get\n");
+
+	return region;
+}
+
+/**
+ * fpga_region_put - release a reference to a region
+ *
+ * @region: FPGA region
+ */
+static void fpga_region_put(struct fpga_region *region)
+{
+	struct device *dev = &region->dev;
+
+	dev_dbg(&region->dev, "put\n");
+
+	module_put(dev->parent->driver->owner);
+	of_node_put(dev->of_node);
+	put_device(dev);
+	mutex_unlock(&region->mutex);
+}
+
+/**
+ * fpga_region_get_manager - get exclusive reference for FPGA manager
+ * @region: FPGA region
+ *
+ * Get FPGA Manager from "fpga-mgr" property or from ancestor region.
+ *
+ * Caller should call fpga_mgr_put() when done with manager.
+ *
+ * Return: fpga manager struct or IS_ERR() condition containing error code.
+ */
+static struct fpga_manager *fpga_region_get_manager(struct fpga_region *region)
+{
+	struct device *dev = &region->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node  *mgr_node;
+	struct fpga_manager *mgr;
+
+	of_node_get(np);
+	while (np) {
+		if (of_device_is_compatible(np, "fpga-region")) {
+			mgr_node = of_parse_phandle(np, "fpga-mgr", 0);
+			if (mgr_node) {
+				mgr = of_fpga_mgr_get(mgr_node);
+				of_node_put(np);
+				return mgr;
+			}
+		}
+		np = of_get_next_parent(np);
+	}
+	of_node_put(np);
+
+	return ERR_PTR(-EINVAL);
+}
+
+/**
+ * fpga_region_get_bridges - create a list of bridges
+ * @region: FPGA region
+ * @overlay: device node of the overlay
+ *
+ * Create a list of bridges including the parent bridge and the bridges
+ * specified by "fpga-bridges" property.  Note that the
+ * fpga_bridges_enable/disable/put functions are all fine with an empty list
+ * if that happens.
+ *
+ * Caller should call fpga_bridges_put(&region->bridge_list) when
+ * done with the bridges.
+ *
+ * Return 0 for success (even if there are no bridges specified)
+ * or -EBUSY if any of the bridges are in use.
+ */
+static int fpga_region_get_bridges(struct fpga_region *region,
+				   struct device_node *overlay)
+{
+	struct device *dev = &region->dev;
+	struct device_node *region_np = dev->of_node;
+	struct device_node *br, *np, *parent_br = NULL;
+	int i, ret;
+
+	/* If parent is a bridge, add to list */
+	ret = fpga_bridge_get_to_list(region_np->parent, region->info,
+				      &region->bridge_list);
+	if (ret == -EBUSY)
+		return ret;
+
+	if (!ret)
+		parent_br = region_np->parent;
+
+	/* If overlay has a list of bridges, use it. */
+	if (of_parse_phandle(overlay, "fpga-bridges", 0))
+		np = overlay;
+	else
+		np = region_np;
+
+	for (i = 0; ; i++) {
+		br = of_parse_phandle(np, "fpga-bridges", i);
+		if (!br)
+			break;
+
+		/* If parent bridge is in list, skip it. */
+		if (br == parent_br)
+			continue;
+
+		/* If node is a bridge, get it and add to list */
+		ret = fpga_bridge_get_to_list(br, region->info,
+					      &region->bridge_list);
+
+		/* If any of the bridges are in use, give up */
+		if (ret == -EBUSY) {
+			fpga_bridges_put(&region->bridge_list);
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * fpga_region_program_fpga - program FPGA
+ * @region: FPGA region
+ * @firmware_name: name of FPGA image firmware file
+ * @overlay: device node of the overlay
+ * Program an FPGA using information in the device tree.
+ * Function assumes that there is a firmware-name property.
+ * Return 0 for success or negative error code.
+ */
+static int fpga_region_program_fpga(struct fpga_region *region,
+				    const char *firmware_name,
+				    struct device_node *overlay)
+{
+	struct fpga_manager *mgr;
+	int ret;
+
+	region = fpga_region_get(region);
+	if (IS_ERR(region)) {
+		pr_err("failed to get fpga region\n");
+		return PTR_ERR(region);
+	}
+
+	mgr = fpga_region_get_manager(region);
+	if (IS_ERR(mgr)) {
+		pr_err("failed to get fpga region manager\n");
+		return PTR_ERR(mgr);
+	}
+
+	ret = fpga_region_get_bridges(region, overlay);
+	if (ret) {
+		pr_err("failed to get fpga region bridges\n");
+		goto err_put_mgr;
+	}
+
+	ret = fpga_bridges_disable(&region->bridge_list);
+	if (ret) {
+		pr_err("failed to disable region bridges\n");
+		goto err_put_br;
+	}
+
+	ret = fpga_mgr_firmware_load(mgr, region->info, firmware_name);
+	if (ret) {
+		pr_err("failed to load fpga image\n");
+		goto err_put_br;
+	}
+
+	ret = fpga_bridges_enable(&region->bridge_list);
+	if (ret) {
+		pr_err("failed to enable region bridges\n");
+		goto err_put_br;
+	}
+
+	fpga_mgr_put(mgr);
+	fpga_region_put(region);
+
+	return 0;
+
+err_put_br:
+	fpga_bridges_put(&region->bridge_list);
+err_put_mgr:
+	fpga_mgr_put(mgr);
+	fpga_region_put(region);
+
+	return ret;
+}
+
+/**
+ * child_regions_with_firmware
+ * @overlay: device node of the overlay
+ *
+ * If the overlay adds child FPGA regions, they are not allowed to have
+ * firmware-name property.
+ *
+ * Return 0 for OK or -EINVAL if child FPGA region adds firmware-name.
+ */
+static int child_regions_with_firmware(struct device_node *overlay)
+{
+	struct device_node *child_region;
+	const char *child_firmware_name;
+	int ret = 0;
+
+	of_node_get(overlay);
+
+	child_region = of_find_matching_node(overlay, fpga_region_of_match);
+	while (child_region) {
+		if (!of_property_read_string(child_region, "firmware-name",
+					     &child_firmware_name)) {
+			ret = -EINVAL;
+			break;
+		}
+		child_region = of_find_matching_node(child_region,
+						     fpga_region_of_match);
+	}
+
+	of_node_put(child_region);
+
+	if (ret)
+		pr_err("firmware-name not allowed in child FPGA region: %s",
+		       child_region->full_name);
+
+	return ret;
+}
+
+/**
+ * fpga_region_notify_pre_apply - pre-apply overlay notification
+ *
+ * @region: FPGA region that the overlay was applied to
+ * @nd: overlay notification data
+ *
+ * Called after when an overlay targeted to a FPGA Region is about to be
+ * applied.  Function will check the properties that will be added to the FPGA
+ * region.  If the checks pass, it will program the FPGA.
+ *
+ * The checks are:
+ * The overlay must add either firmware-name or external-fpga-config property
+ * to the FPGA Region.
+ *
+ *   firmware-name        : program the FPGA
+ *   external-fpga-config : FPGA is already programmed
+ *
+ * The overlay can add other FPGA regions, but child FPGA regions cannot have a
+ * firmware-name property since those regions don't exist yet.
+ *
+ * If the overlay that breaks the rules, notifier returns an error and the
+ * overlay is rejected before it goes into the main tree.
+ *
+ * Returns 0 for success or negative error code for failure.
+ */
+static int fpga_region_notify_pre_apply(struct fpga_region *region,
+					struct of_overlay_notify_data *nd)
+{
+	const char *firmware_name = NULL;
+	struct fpga_image_info *info;
+	int ret;
+
+	info = devm_kzalloc(&region->dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	region->info = info;
+
+	/* Reject overlay if child FPGA Regions have firmware-name property */
+	ret = child_regions_with_firmware(nd->overlay);
+	if (ret)
+		return ret;
+
+	/* Read FPGA region properties from the overlay */
+	if (of_property_read_bool(nd->overlay, "partial-fpga-config"))
+		info->flags |= FPGA_MGR_PARTIAL_RECONFIG;
+
+	if (of_property_read_bool(nd->overlay, "external-fpga-config"))
+		info->flags |= FPGA_MGR_EXTERNAL_CONFIG;
+
+	of_property_read_string(nd->overlay, "firmware-name", &firmware_name);
+
+	of_property_read_u32(nd->overlay, "region-unfreeze-timeout-us",
+			     &info->enable_timeout_us);
+
+	of_property_read_u32(nd->overlay, "region-freeze-timeout-us",
+			     &info->disable_timeout_us);
+
+	/* If FPGA was externally programmed, don't specify firmware */
+	if ((info->flags & FPGA_MGR_EXTERNAL_CONFIG) && firmware_name) {
+		pr_err("error: specified firmware and external-fpga-config");
+		return -EINVAL;
+	}
+
+	/* FPGA is already configured externally.  We're done. */
+	if (info->flags & FPGA_MGR_EXTERNAL_CONFIG)
+		return 0;
+
+	/* If we got this far, we should be programming the FPGA */
+	if (!firmware_name) {
+		pr_err("should specify firmware-name or external-fpga-config\n");
+		return -EINVAL;
+	}
+
+	return fpga_region_program_fpga(region, firmware_name, nd->overlay);
+}
+
+/**
+ * fpga_region_notify_post_remove - post-remove overlay notification
+ *
+ * @region: FPGA region that was targeted by the overlay that was removed
+ * @nd: overlay notification data
+ *
+ * Called after an overlay has been removed if the overlay's target was a
+ * FPGA region.
+ */
+static void fpga_region_notify_post_remove(struct fpga_region *region,
+					   struct of_overlay_notify_data *nd)
+{
+	fpga_bridges_disable(&region->bridge_list);
+	fpga_bridges_put(&region->bridge_list);
+	devm_kfree(&region->dev, region->info);
+	region->info = NULL;
+}
+
+/**
+ * of_fpga_region_notify - reconfig notifier for dynamic DT changes
+ * @nb:		notifier block
+ * @action:	notifier action
+ * @arg:	reconfig data
+ *
+ * This notifier handles programming a FPGA when a "firmware-name" property is
+ * added to a fpga-region.
+ *
+ * Returns NOTIFY_OK or error if FPGA programming fails.
+ */
+static int of_fpga_region_notify(struct notifier_block *nb,
+				 unsigned long action, void *arg)
+{
+	struct of_overlay_notify_data *nd = arg;
+	struct fpga_region *region;
+	int ret;
+
+	switch (action) {
+	case OF_OVERLAY_PRE_APPLY:
+		pr_debug("%s OF_OVERLAY_PRE_APPLY\n", __func__);
+		break;
+	case OF_OVERLAY_POST_APPLY:
+		pr_debug("%s OF_OVERLAY_POST_APPLY\n", __func__);
+		return NOTIFY_OK;       /* not for us */
+	case OF_OVERLAY_PRE_REMOVE:
+		pr_debug("%s OF_OVERLAY_PRE_REMOVE\n", __func__);
+		return NOTIFY_OK;       /* not for us */
+	case OF_OVERLAY_POST_REMOVE:
+		pr_debug("%s OF_OVERLAY_POST_REMOVE\n", __func__);
+		break;
+	default:			/* should not happen */
+		return NOTIFY_OK;
+	}
+
+	region = fpga_region_find(nd->target);
+	if (!region)
+		return NOTIFY_OK;
+
+	ret = 0;
+	switch (action) {
+	case OF_OVERLAY_PRE_APPLY:
+		ret = fpga_region_notify_pre_apply(region, nd);
+		break;
+
+	case OF_OVERLAY_POST_REMOVE:
+		fpga_region_notify_post_remove(region, nd);
+		break;
+	}
+
+	put_device(&region->dev);
+
+	if (ret)
+		return notifier_from_errno(ret);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block fpga_region_of_nb = {
+	.notifier_call = of_fpga_region_notify,
+};
+
+static int fpga_region_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct fpga_region *region;
+	int id, ret = 0;
+
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	id = ida_simple_get(&fpga_region_ida, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		ret = id;
+		goto err_kfree;
+	}
+
+	mutex_init(&region->mutex);
+	INIT_LIST_HEAD(&region->bridge_list);
+
+	device_initialize(&region->dev);
+	region->dev.class = fpga_region_class;
+	region->dev.parent = dev;
+	region->dev.of_node = np;
+	region->dev.id = id;
+	dev_set_drvdata(dev, region);
+
+	ret = dev_set_name(&region->dev, "region%d", id);
+	if (ret)
+		goto err_remove;
+
+	ret = device_add(&region->dev);
+	if (ret)
+		goto err_remove;
+
+	of_platform_populate(np, fpga_region_of_match, NULL, &region->dev);
+
+	dev_info(dev, "FPGA Region probed\n");
+
+	return 0;
+
+err_remove:
+	ida_simple_remove(&fpga_region_ida, id);
+err_kfree:
+	kfree(region);
+
+	return ret;
+}
+
+static int fpga_region_remove(struct platform_device *pdev)
+{
+	struct fpga_region *region = platform_get_drvdata(pdev);
+
+	device_unregister(&region->dev);
+
+	return 0;
+}
+
+static struct platform_driver fpga_region_driver = {
+	.probe = fpga_region_probe,
+	.remove = fpga_region_remove,
+	.driver = {
+		.name	= "fpga-region",
+		.of_match_table = of_match_ptr(fpga_region_of_match),
+	},
+};
+
+static void fpga_region_dev_release(struct device *dev)
+{
+	struct fpga_region *region = to_fpga_region(dev);
+
+	ida_simple_remove(&fpga_region_ida, region->dev.id);
+	kfree(region);
+}
+
+/**
+ * fpga_region_init - init function for fpga_region class
+ * Creates the fpga_region class and registers a reconfig notifier.
+ */
+static int __init fpga_region_init(void)
+{
+	int ret;
+
+	fpga_region_class = class_create(THIS_MODULE, "fpga_region");
+	if (IS_ERR(fpga_region_class))
+		return PTR_ERR(fpga_region_class);
+
+	fpga_region_class->dev_release = fpga_region_dev_release;
+
+	ret = of_overlay_notifier_register(&fpga_region_of_nb);
+	if (ret)
+		goto err_class;
+
+	ret = platform_driver_register(&fpga_region_driver);
+	if (ret)
+		goto err_plat;
+
+	return 0;
+
+err_plat:
+	of_overlay_notifier_unregister(&fpga_region_of_nb);
+err_class:
+	class_destroy(fpga_region_class);
+	ida_destroy(&fpga_region_ida);
+	return ret;
+}
+
+static void __exit fpga_region_exit(void)
+{
+	platform_driver_unregister(&fpga_region_driver);
+	of_overlay_notifier_unregister(&fpga_region_of_nb);
+	class_destroy(fpga_region_class);
+	ida_destroy(&fpga_region_ida);
+}
+
+subsys_initcall(fpga_region_init);
+module_exit(fpga_region_exit);
+
+MODULE_DESCRIPTION("FPGA Region");
+MODULE_AUTHOR("Alan Tull <atull@opensource.altera.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 55803186e0ea..96a1a3311649 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -65,8 +65,10 @@ enum fpga_mgr_states {
 /*
  * FPGA Manager flags
  * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported
+ * FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting
  */
 #define FPGA_MGR_PARTIAL_RECONFIG	BIT(0)
+#define FPGA_MGR_EXTERNAL_CONFIG	BIT(1)
 
 /**
  * struct fpga_image_info - information specific to a FPGA image
-- 
cgit v1.2.3


From fac51482577d5e05bbb0efa8d602a3c2111098bf Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Fri, 28 Oct 2016 09:45:28 +0100
Subject: drivers: base: cacheinfo: fix x86 with CONFIG_OF enabled

With CONFIG_OF enabled on x86, we get the following error on boot:
"
	Failed to find cpu0 device node
 	Unable to detect cache hierarchy from DT for CPU 0
"
and the cacheinfo fails to get populated in the corresponding sysfs
entries. This is because cache_setup_of_node looks for of_node for
setting up the shared cpu_map without checking that it's already
populated in the architecture specific callback.

In order to indicate that the shared cpu_map is already populated, this
patch introduces a boolean `cpu_map_populated` in struct cpu_cacheinfo
that can be used by the generic code to skip cache_shared_cpu_map_setup.

This patch also sets that boolean for x86.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 2 ++
 drivers/base/cacheinfo.c              | 3 +++
 include/linux/cacheinfo.h             | 1 +
 3 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index de6626c18e42..be6337156502 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -934,6 +934,8 @@ static int __populate_cache_leaves(unsigned int cpu)
 		ci_leaf_init(this_leaf++, &id4_regs);
 		__cache_cpumap_setup(cpu, idx, &id4_regs);
 	}
+	this_cpu_ci->cpu_map_populated = true;
+
 	return 0;
 }
 
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index e9fd32e91668..ecde8957835a 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -106,6 +106,9 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
 	unsigned int index;
 	int ret;
 
+	if (this_cpu_ci->cpu_map_populated)
+		return 0;
+
 	ret = cache_setup_of_node(cpu);
 	if (ret)
 		return ret;
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 2189935075b4..a951fd10aaaa 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -71,6 +71,7 @@ struct cpu_cacheinfo {
 	struct cacheinfo *info_list;
 	unsigned int num_levels;
 	unsigned int num_leaves;
+	bool cpu_map_populated;
 };
 
 /*
-- 
cgit v1.2.3


From d49187e97e94e2eb613cb6fed810356972077cc3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 10 Nov 2016 07:32:33 -0800
Subject: nvme: introduce struct nvme_request

This adds a shared per-request structure for all NVMe I/O.  This structure
is embedded as the first member in all NVMe transport drivers request
private data and allows to implement common functionality between the
drivers.

The first use is to replace the current abuse of the SCSI command
passthrough fields in struct request for the NVMe command passthrough,
but it will grow a field more fields to allow implementing things
like common abort handlers in the future.

The passthrough commands are handled by having a pointer to the SQE
(struct nvme_command) in struct nvme_request, and the union of the
possible result fields, which had to be turned from an anonymous
into a named union for that purpose.  This avoids having to pass
a reference to a full CQE around and thus makes checking the result
a lot more lightweight.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/core.c          | 28 +++++++++++++---------------
 drivers/nvme/host/fabrics.c       | 26 +++++++++++++-------------
 drivers/nvme/host/lightnvm.c      | 31 +++++++------------------------
 drivers/nvme/host/nvme.h          | 16 +++++++++++++++-
 drivers/nvme/host/pci.c           |  4 ++--
 drivers/nvme/host/rdma.c          | 11 +++--------
 drivers/nvme/target/core.c        |  8 ++++----
 drivers/nvme/target/fabrics-cmd.c | 14 +++++++-------
 drivers/nvme/target/loop.c        | 12 ++++++------
 drivers/nvme/target/nvmet.h       |  2 +-
 include/linux/nvme.h              | 10 +++++-----
 11 files changed, 76 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ef34f2f3566a..2fd632bcd975 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -221,8 +221,7 @@ struct request *nvme_alloc_request(struct request_queue *q,
 
 	req->cmd_type = REQ_TYPE_DRV_PRIV;
 	req->cmd_flags |= REQ_FAILFAST_DRIVER;
-	req->cmd = (unsigned char *)cmd;
-	req->cmd_len = sizeof(struct nvme_command);
+	nvme_req(req)->cmd = cmd;
 
 	return req;
 }
@@ -321,7 +320,7 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 	int ret = 0;
 
 	if (req->cmd_type == REQ_TYPE_DRV_PRIV)
-		memcpy(cmd, req->cmd, sizeof(*cmd));
+		memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
 	else if (req_op(req) == REQ_OP_FLUSH)
 		nvme_setup_flush(ns, cmd);
 	else if (req_op(req) == REQ_OP_DISCARD)
@@ -338,7 +337,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd);
  * if the result is positive, it's an NVM Express status code
  */
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		struct nvme_completion *cqe, void *buffer, unsigned bufflen,
+		union nvme_result *result, void *buffer, unsigned bufflen,
 		unsigned timeout, int qid, int at_head, int flags)
 {
 	struct request *req;
@@ -349,7 +348,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		return PTR_ERR(req);
 
 	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
-	req->special = cqe;
 
 	if (buffer && bufflen) {
 		ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
@@ -358,6 +356,8 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	}
 
 	blk_execute_rq(req->q, NULL, req, at_head);
+	if (result)
+		*result = nvme_req(req)->result;
 	ret = req->errors;
  out:
 	blk_mq_free_request(req);
@@ -379,7 +379,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		u32 *result, unsigned timeout)
 {
 	bool write = nvme_is_write(cmd);
-	struct nvme_completion cqe;
 	struct nvme_ns *ns = q->queuedata;
 	struct gendisk *disk = ns ? ns->disk : NULL;
 	struct request *req;
@@ -392,7 +391,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		return PTR_ERR(req);
 
 	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
-	req->special = &cqe;
 
 	if (ubuffer && bufflen) {
 		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
@@ -447,7 +445,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 	blk_execute_rq(req->q, disk, req, 0);
 	ret = req->errors;
 	if (result)
-		*result = le32_to_cpu(cqe.result);
+		*result = le32_to_cpu(nvme_req(req)->result.u32);
 	if (meta && !ret && !write) {
 		if (copy_to_user(meta_buffer, meta, meta_len))
 			ret = -EFAULT;
@@ -596,7 +594,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 		      void *buffer, size_t buflen, u32 *result)
 {
 	struct nvme_command c;
-	struct nvme_completion cqe;
+	union nvme_result res;
 	int ret;
 
 	memset(&c, 0, sizeof(c));
@@ -604,10 +602,10 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 	c.features.nsid = cpu_to_le32(nsid);
 	c.features.fid = cpu_to_le32(fid);
 
-	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, buffer, buflen, 0,
+	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
 			NVME_QID_ANY, 0, 0);
 	if (ret >= 0 && result)
-		*result = le32_to_cpu(cqe.result);
+		*result = le32_to_cpu(res.u32);
 	return ret;
 }
 
@@ -615,7 +613,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 		      void *buffer, size_t buflen, u32 *result)
 {
 	struct nvme_command c;
-	struct nvme_completion cqe;
+	union nvme_result res;
 	int ret;
 
 	memset(&c, 0, sizeof(c));
@@ -623,10 +621,10 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
 
-	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe,
+	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res,
 			buffer, buflen, 0, NVME_QID_ANY, 0, 0);
 	if (ret >= 0 && result)
-		*result = le32_to_cpu(cqe.result);
+		*result = le32_to_cpu(res.u32);
 	return ret;
 }
 
@@ -1901,7 +1899,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl,
 		struct nvme_completion *cqe)
 {
 	u16 status = le16_to_cpu(cqe->status) >> 1;
-	u32 result = le32_to_cpu(cqe->result);
+	u32 result = le32_to_cpu(cqe->result.u32);
 
 	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) {
 		++ctrl->event_limit;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5a3f008d3480..68fb26b3bfb9 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -161,7 +161,7 @@ EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 {
 	struct nvme_command cmd;
-	struct nvme_completion cqe;
+	union nvme_result res;
 	int ret;
 
 	memset(&cmd, 0, sizeof(cmd));
@@ -169,11 +169,11 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
 	cmd.prop_get.offset = cpu_to_le32(off);
 
-	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, NULL, 0, 0,
+	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0,
 			NVME_QID_ANY, 0, 0);
 
 	if (ret >= 0)
-		*val = le64_to_cpu(cqe.result64);
+		*val = le64_to_cpu(res.u64);
 	if (unlikely(ret != 0))
 		dev_err(ctrl->device,
 			"Property Get error: %d, offset %#x\n",
@@ -207,7 +207,7 @@ EXPORT_SYMBOL_GPL(nvmf_reg_read32);
 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 {
 	struct nvme_command cmd;
-	struct nvme_completion cqe;
+	union nvme_result res;
 	int ret;
 
 	memset(&cmd, 0, sizeof(cmd));
@@ -216,11 +216,11 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 	cmd.prop_get.attrib = 1;
 	cmd.prop_get.offset = cpu_to_le32(off);
 
-	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, NULL, 0, 0,
+	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0,
 			NVME_QID_ANY, 0, 0);
 
 	if (ret >= 0)
-		*val = le64_to_cpu(cqe.result64);
+		*val = le64_to_cpu(res.u64);
 	if (unlikely(ret != 0))
 		dev_err(ctrl->device,
 			"Property Get error: %d, offset %#x\n",
@@ -368,7 +368,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 {
 	struct nvme_command cmd;
-	struct nvme_completion cqe;
+	union nvme_result res;
 	struct nvmf_connect_data *data;
 	int ret;
 
@@ -400,16 +400,16 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
 
-	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe,
+	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res,
 			data, sizeof(*data), 0, NVME_QID_ANY, 1,
 			BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
 	if (ret) {
-		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(cqe.result),
+		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
 				       &cmd, data);
 		goto out_free_data;
 	}
 
-	ctrl->cntlid = le16_to_cpu(cqe.result16);
+	ctrl->cntlid = le16_to_cpu(res.u16);
 
 out_free_data:
 	kfree(data);
@@ -441,7 +441,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
 {
 	struct nvme_command cmd;
 	struct nvmf_connect_data *data;
-	struct nvme_completion cqe;
+	union nvme_result res;
 	int ret;
 
 	memset(&cmd, 0, sizeof(cmd));
@@ -459,11 +459,11 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
 
-	ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &cqe,
+	ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
 			data, sizeof(*data), 0, qid, 1,
 			BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
 	if (ret) {
-		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(cqe.result),
+		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
 				       &cmd, data);
 	}
 	kfree(data);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f5e3011e31fc..442f67774ea9 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -146,14 +146,6 @@ struct nvme_nvm_command {
 	};
 };
 
-struct nvme_nvm_completion {
-	__le64	result;		/* Used by LightNVM to return ppa completions */
-	__le16	sq_head;	/* how much of this queue may be reclaimed */
-	__le16	sq_id;		/* submission queue that generated this entry */
-	__u16	command_id;	/* of the command which completed */
-	__le16	status;		/* did the command fail, and if so, why? */
-};
-
 #define NVME_NVM_LP_MLC_PAIRS 886
 struct nvme_nvm_lp_mlc {
 	__le16			num_pairs;
@@ -481,11 +473,8 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
 static void nvme_nvm_end_io(struct request *rq, int error)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
-	struct nvme_nvm_completion *cqe = rq->special;
-
-	if (cqe)
-		rqd->ppa_status = le64_to_cpu(cqe->result);
 
+	rqd->ppa_status = nvme_req(rq)->result.u64;
 	nvm_end_io(rqd, error);
 
 	kfree(rq->cmd);
@@ -500,20 +489,18 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	struct bio *bio = rqd->bio;
 	struct nvme_nvm_command *cmd;
 
-	rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0);
-	if (IS_ERR(rq))
+	cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
+	if (!cmd)
 		return -ENOMEM;
 
-	cmd = kzalloc(sizeof(struct nvme_nvm_command) +
-				sizeof(struct nvme_nvm_completion), GFP_KERNEL);
-	if (!cmd) {
-		blk_mq_free_request(rq);
+	rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
+	if (IS_ERR(rq)) {
+		kfree(cmd);
 		return -ENOMEM;
 	}
+	rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
 
-	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->ioprio = bio_prio(bio);
-
 	if (bio_has_data(bio))
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 
@@ -522,10 +509,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 
 	nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
 
-	rq->cmd = (unsigned char *)cmd;
-	rq->cmd_len = sizeof(struct nvme_nvm_command);
-	rq->special = cmd + 1;
-
 	rq->end_io_data = rqd;
 
 	blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d47f5a5d18c7..5e64957a9b96 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -79,6 +79,20 @@ enum nvme_quirks {
 	NVME_QUIRK_DELAY_BEFORE_CHK_RDY		= (1 << 3),
 };
 
+/*
+ * Common request structure for NVMe passthrough.  All drivers must have
+ * this structure as the first member of their request-private data.
+ */
+struct nvme_request {
+	struct nvme_command	*cmd;
+	union nvme_result	result;
+};
+
+static inline struct nvme_request *nvme_req(struct request *req)
+{
+	return blk_mq_rq_to_pdu(req);
+}
+
 /* The below value is the specific amount of delay needed before checking
  * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the
  * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
@@ -278,7 +292,7 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		struct nvme_completion *cqe, void *buffer, unsigned bufflen,
+		union nvme_result *result, void *buffer, unsigned bufflen,
 		unsigned timeout, int qid, int at_head, int flags);
 int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void __user *ubuffer, unsigned bufflen, u32 *result,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0955e9d22020..de8e0505d979 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -140,6 +140,7 @@ struct nvme_queue {
  * allocated to store the PRP list.
  */
 struct nvme_iod {
+	struct nvme_request req;
 	struct nvme_queue *nvmeq;
 	int aborted;
 	int npages;		/* In the PRP list. 0 means small pool in use */
@@ -707,8 +708,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 		}
 
 		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
-			memcpy(req->special, &cqe, sizeof(cqe));
+		nvme_req(req)->result = cqe.result;
 		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
 
 	}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 5a8388177959..0b8a161cf881 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -66,6 +66,7 @@ struct nvme_rdma_qe {
 
 struct nvme_rdma_queue;
 struct nvme_rdma_request {
+	struct nvme_request	req;
 	struct ib_mr		*mr;
 	struct nvme_rdma_qe	sqe;
 	struct ib_sge		sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
@@ -1117,13 +1118,10 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 		struct nvme_completion *cqe, struct ib_wc *wc, int tag)
 {
-	u16 status = le16_to_cpu(cqe->status);
 	struct request *rq;
 	struct nvme_rdma_request *req;
 	int ret = 0;
 
-	status >>= 1;
-
 	rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
 	if (!rq) {
 		dev_err(queue->ctrl->ctrl.device,
@@ -1134,9 +1132,6 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 	}
 	req = blk_mq_rq_to_pdu(rq);
 
-	if (rq->cmd_type == REQ_TYPE_DRV_PRIV && rq->special)
-		memcpy(rq->special, cqe, sizeof(*cqe));
-
 	if (rq->tag == tag)
 		ret = 1;
 
@@ -1144,8 +1139,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 	    wc->ex.invalidate_rkey == req->mr->rkey)
 		req->mr->need_inval = false;
 
-	blk_mq_complete_request(rq, status);
-
+	req->req.result = cqe->result;
+	blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1);
 	return ret;
 }
 
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 6559d5afa7bf..c232552be2d8 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -617,7 +617,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 	if (!subsys) {
 		pr_warn("connect request for invalid subsystem %s!\n",
 			subsysnqn);
-		req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn);
+		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 		return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 	}
 
@@ -638,7 +638,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 
 	pr_warn("could not find controller %d for subsys %s / host %s\n",
 		cntlid, subsysnqn, hostnqn);
-	req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid);
+	req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 
 out:
@@ -700,7 +700,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	if (!subsys) {
 		pr_warn("connect request for invalid subsystem %s!\n",
 			subsysnqn);
-		req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn);
+		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 		goto out;
 	}
 
@@ -709,7 +709,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	if (!nvmet_host_allowed(req, subsys, hostnqn)) {
 		pr_info("connect by host %s for subsystem %s not allowed\n",
 			hostnqn, subsysnqn);
-		req->rsp->result = IPO_IATTR_CONNECT_DATA(hostnqn);
+		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
 		up_read(&nvmet_config_sem);
 		goto out_put_subsystem;
 	}
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 9a97ae67e656..f4088198cd0d 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -69,7 +69,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
 		}
 	}
 
-	req->rsp->result64 = cpu_to_le64(val);
+	req->rsp->result.u64 = cpu_to_le64(val);
 	nvmet_req_complete(req, status);
 }
 
@@ -125,7 +125,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 	d = kmap(sg_page(req->sg)) + req->sg->offset;
 
 	/* zero out initial completion result, assign values as needed */
-	req->rsp->result = 0;
+	req->rsp->result.u32 = 0;
 
 	if (c->recfmt != 0) {
 		pr_warn("invalid connect version (%d).\n",
@@ -138,7 +138,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 		pr_warn("connect attempt for invalid controller ID %#x\n",
 			d->cntlid);
 		status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
-		req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid);
+		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
 		goto out;
 	}
 
@@ -155,7 +155,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 
 	pr_info("creating controller %d for NQN %s.\n",
 			ctrl->cntlid, ctrl->hostnqn);
-	req->rsp->result16 = cpu_to_le16(ctrl->cntlid);
+	req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
 
 out:
 	kunmap(sg_page(req->sg));
@@ -173,7 +173,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
 	d = kmap(sg_page(req->sg)) + req->sg->offset;
 
 	/* zero out initial completion result, assign values as needed */
-	req->rsp->result = 0;
+	req->rsp->result.u32 = 0;
 
 	if (c->recfmt != 0) {
 		pr_warn("invalid connect version (%d).\n",
@@ -191,14 +191,14 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
 	if (unlikely(qid > ctrl->subsys->max_qid)) {
 		pr_warn("invalid queue id (%d)\n", qid);
 		status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
-		req->rsp->result = IPO_IATTR_CONNECT_SQE(qid);
+		req->rsp->result.u32 = IPO_IATTR_CONNECT_SQE(qid);
 		goto out_ctrl_put;
 	}
 
 	status = nvmet_install_queue(ctrl, req);
 	if (status) {
 		/* pass back cntlid that had the issue of installing queue */
-		req->rsp->result16 = cpu_to_le16(ctrl->cntlid);
+		req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
 		goto out_ctrl_put;
 	}
 
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index d5df77d686b2..757e21a31128 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -36,6 +36,7 @@
 	(NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
 
 struct nvme_loop_iod {
+	struct nvme_request	nvme_req;
 	struct nvme_command	cmd;
 	struct nvme_completion	rsp;
 	struct nvmet_req	req;
@@ -112,10 +113,10 @@ static void nvme_loop_complete_rq(struct request *req)
 	blk_mq_end_request(req, error);
 }
 
-static void nvme_loop_queue_response(struct nvmet_req *nvme_req)
+static void nvme_loop_queue_response(struct nvmet_req *req)
 {
 	struct nvme_loop_iod *iod =
-		container_of(nvme_req, struct nvme_loop_iod, req);
+		container_of(req, struct nvme_loop_iod, req);
 	struct nvme_completion *cqe = &iod->rsp;
 
 	/*
@@ -128,11 +129,10 @@ static void nvme_loop_queue_response(struct nvmet_req *nvme_req)
 			cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) {
 		nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe);
 	} else {
-		struct request *req = blk_mq_rq_from_pdu(iod);
+		struct request *rq = blk_mq_rq_from_pdu(iod);
 
-		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
-			memcpy(req->special, cqe, sizeof(*cqe));
-		blk_mq_complete_request(req, le16_to_cpu(cqe->status) >> 1);
+		iod->nvme_req.result = cqe->result;
+		blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1);
 	}
 }
 
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 76b6eedccaf9..f9c76441e8c9 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -238,7 +238,7 @@ static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
 
 static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
 {
-	req->rsp->result = cpu_to_le32(result);
+	req->rsp->result.u32 = cpu_to_le32(result);
 }
 
 /*
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 7676557ce357..18ce9f7cc881 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -949,11 +949,11 @@ struct nvme_completion {
 	/*
 	 * Used by Admin and Fabrics commands to return data:
 	 */
-	union {
-		__le16	result16;
-		__le32	result;
-		__le64	result64;
-	};
+	union nvme_result {
+		__le16	u16;
+		__le32	u32;
+		__le64	u64;
+	} result;
 	__le16	sq_head;	/* how much of this queue may be reclaimed */
 	__le16	sq_id;		/* submission queue that generated this entry */
 	__u16	command_id;	/* of the command which completed */
-- 
cgit v1.2.3


From cf43e6be865a582ba66ee4747ae27a0513f6bba1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Mon, 7 Nov 2016 21:32:37 -0700
Subject: block: add scalable completion tracking of requests

For legacy block, we simply track them in the request queue. For
blk-mq, we track them on a per-sw queue basis, which we can then
sum up through the hardware queues and finally to a per device
state.

The stats are tracked in, roughly, 0.1s interval windows.

Add sysfs files to display the stats.

The feature is off by default, to avoid any extra overhead. In-kernel
users of it can turn it on by setting QUEUE_FLAG_STATS in the queue
flags. We currently don't turn it on if someone just reads any of
the stats files, that is something we could add as well.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/Makefile            |   2 +-
 block/blk-core.c          |  14 ++-
 block/blk-mq-sysfs.c      |  47 +++++++++
 block/blk-mq.c            |  25 +++++
 block/blk-mq.h            |   3 +
 block/blk-stat.c          | 248 ++++++++++++++++++++++++++++++++++++++++++++++
 block/blk-stat.h          |  42 ++++++++
 block/blk-sysfs.c         |  26 +++++
 include/linux/blk_types.h |  16 +++
 include/linux/blkdev.h    |   7 ++
 10 files changed, 427 insertions(+), 3 deletions(-)
 create mode 100644 block/blk-stat.c
 create mode 100644 block/blk-stat.h

(limited to 'include/linux')

diff --git a/block/Makefile b/block/Makefile
index 934dac73fb37..2528c596f7ec 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-			blk-lib.o blk-mq.o blk-mq-tag.o \
+			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
 			blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
 			genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
 			badblocks.o partitions/
diff --git a/block/blk-core.c b/block/blk-core.c
index 2deca48a4a05..216372b01624 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2464,6 +2464,11 @@ void blk_start_request(struct request *req)
 {
 	blk_dequeue_request(req);
 
+	if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
+		blk_stat_set_issue_time(&req->issue_stat);
+		req->rq_flags |= RQF_STATS;
+	}
+
 	/*
 	 * We are now handing the request to the hardware, initialize
 	 * resid_len to full count and add the timeout handler.
@@ -2683,8 +2688,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
  */
 void blk_finish_request(struct request *req, int error)
 {
+	struct request_queue *q = req->q;
+
+	if (req->rq_flags & RQF_STATS)
+		blk_stat_add(&q->rq_stats[rq_data_dir(req)], req);
+
 	if (req->rq_flags & RQF_QUEUED)
-		blk_queue_end_tag(req->q, req);
+		blk_queue_end_tag(q, req);
 
 	BUG_ON(blk_queued_rq(req));
 
@@ -2704,7 +2714,7 @@ void blk_finish_request(struct request *req, int error)
 		if (blk_bidi_rq(req))
 			__blk_put_request(req->next_rq->q, req->next_rq);
 
-		__blk_put_request(req->q, req);
+		__blk_put_request(q, req);
 	}
 }
 EXPORT_SYMBOL(blk_finish_request);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 01fb455d3377..eacd3af72099 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -259,6 +259,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
 	return ret;
 }
 
+static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
+{
+	struct blk_mq_ctx *ctx;
+	unsigned int i;
+
+	hctx_for_each_ctx(hctx, ctx, i) {
+		blk_stat_init(&ctx->stat[BLK_STAT_READ]);
+		blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
+	}
+}
+
+static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
+					  const char *page, size_t count)
+{
+	blk_mq_stat_clear(hctx);
+	return count;
+}
+
+static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
+{
+	return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
+			pre, (long long) stat->nr_samples,
+			(long long) stat->mean, (long long) stat->min,
+			(long long) stat->max);
+}
+
+static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+	struct blk_rq_stat stat[2];
+	ssize_t ret;
+
+	blk_stat_init(&stat[BLK_STAT_READ]);
+	blk_stat_init(&stat[BLK_STAT_WRITE]);
+
+	blk_hctx_stat_get(hctx, stat);
+
+	ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
+	ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
+	return ret;
+}
+
 static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
 	.attr = {.name = "dispatched", .mode = S_IRUGO },
 	.show = blk_mq_sysfs_dispatched_show,
@@ -317,6 +358,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
 	.show = blk_mq_hw_sysfs_poll_show,
 	.store = blk_mq_hw_sysfs_poll_store,
 };
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
+	.attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
+	.show = blk_mq_hw_sysfs_stat_show,
+	.store = blk_mq_hw_sysfs_stat_store,
+};
 
 static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_queued.attr,
@@ -327,6 +373,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_cpus.attr,
 	&blk_mq_hw_sysfs_active.attr,
 	&blk_mq_hw_sysfs_poll.attr,
+	&blk_mq_hw_sysfs_stat.attr,
 	NULL,
 };
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6f5cb3f3dcac..19795886d46e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -30,6 +30,7 @@
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
+#include "blk-stat.h"
 
 static DEFINE_MUTEX(all_q_mutex);
 static LIST_HEAD(all_q_list);
@@ -403,10 +404,27 @@ static void blk_mq_ipi_complete_request(struct request *rq)
 	put_cpu();
 }
 
+static void blk_mq_stat_add(struct request *rq)
+{
+	if (rq->rq_flags & RQF_STATS) {
+		/*
+		 * We could rq->mq_ctx here, but there's less of a risk
+		 * of races if we have the completion event add the stats
+		 * to the local software queue.
+		 */
+		struct blk_mq_ctx *ctx;
+
+		ctx = __blk_mq_get_ctx(rq->q, raw_smp_processor_id());
+		blk_stat_add(&ctx->stat[rq_data_dir(rq)], rq);
+	}
+}
+
 static void __blk_mq_complete_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
+	blk_mq_stat_add(rq);
+
 	if (!q->softirq_done_fn)
 		blk_mq_end_request(rq, rq->errors);
 	else
@@ -450,6 +468,11 @@ void blk_mq_start_request(struct request *rq)
 	if (unlikely(blk_bidi_rq(rq)))
 		rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
 
+	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
+		blk_stat_set_issue_time(&rq->issue_stat);
+		rq->rq_flags |= RQF_STATS;
+	}
+
 	blk_add_timer(rq);
 
 	/*
@@ -1784,6 +1807,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 		spin_lock_init(&__ctx->lock);
 		INIT_LIST_HEAD(&__ctx->rq_list);
 		__ctx->queue = q;
+		blk_stat_init(&__ctx->stat[BLK_STAT_READ]);
+		blk_stat_init(&__ctx->stat[BLK_STAT_WRITE]);
 
 		/* If the cpu isn't online, the cpu is mapped to first hctx */
 		if (!cpu_online(i))
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ac772dac7ce8..b444370ae05b 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -1,6 +1,8 @@
 #ifndef INT_BLK_MQ_H
 #define INT_BLK_MQ_H
 
+#include "blk-stat.h"
+
 struct blk_mq_tag_set;
 
 struct blk_mq_ctx {
@@ -18,6 +20,7 @@ struct blk_mq_ctx {
 
 	/* incremented at completion time */
 	unsigned long		____cacheline_aligned_in_smp rq_completed[2];
+	struct blk_rq_stat	stat[2];
 
 	struct request_queue	*queue;
 	struct kobject		kobj;
diff --git a/block/blk-stat.c b/block/blk-stat.c
new file mode 100644
index 000000000000..688c958367ee
--- /dev/null
+++ b/block/blk-stat.c
@@ -0,0 +1,248 @@
+/*
+ * Block stat tracking code
+ *
+ * Copyright (C) 2016 Jens Axboe
+ */
+#include <linux/kernel.h>
+#include <linux/blk-mq.h>
+
+#include "blk-stat.h"
+#include "blk-mq.h"
+
+static void blk_stat_flush_batch(struct blk_rq_stat *stat)
+{
+	const s32 nr_batch = READ_ONCE(stat->nr_batch);
+	const s32 nr_samples = READ_ONCE(stat->nr_batch);
+
+	if (!nr_batch)
+		return;
+	if (!nr_samples)
+		stat->mean = div64_s64(stat->batch, nr_batch);
+	else {
+		stat->mean = div64_s64((stat->mean * nr_samples) +
+					stat->batch,
+					nr_batch + nr_samples);
+	}
+
+	stat->nr_samples += nr_batch;
+	stat->nr_batch = stat->batch = 0;
+}
+
+static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
+{
+	if (!src->nr_samples)
+		return;
+
+	blk_stat_flush_batch(src);
+
+	dst->min = min(dst->min, src->min);
+	dst->max = max(dst->max, src->max);
+
+	if (!dst->nr_samples)
+		dst->mean = src->mean;
+	else {
+		dst->mean = div64_s64((src->mean * src->nr_samples) +
+					(dst->mean * dst->nr_samples),
+					dst->nr_samples + src->nr_samples);
+	}
+	dst->nr_samples += src->nr_samples;
+}
+
+static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
+{
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx;
+	uint64_t latest = 0;
+	int i, j, nr;
+
+	blk_stat_init(&dst[BLK_STAT_READ]);
+	blk_stat_init(&dst[BLK_STAT_WRITE]);
+
+	nr = 0;
+	do {
+		uint64_t newest = 0;
+
+		queue_for_each_hw_ctx(q, hctx, i) {
+			hctx_for_each_ctx(hctx, ctx, j) {
+				if (!ctx->stat[BLK_STAT_READ].nr_samples &&
+				    !ctx->stat[BLK_STAT_WRITE].nr_samples)
+					continue;
+				if (ctx->stat[BLK_STAT_READ].time > newest)
+					newest = ctx->stat[BLK_STAT_READ].time;
+				if (ctx->stat[BLK_STAT_WRITE].time > newest)
+					newest = ctx->stat[BLK_STAT_WRITE].time;
+			}
+		}
+
+		/*
+		 * No samples
+		 */
+		if (!newest)
+			break;
+
+		if (newest > latest)
+			latest = newest;
+
+		queue_for_each_hw_ctx(q, hctx, i) {
+			hctx_for_each_ctx(hctx, ctx, j) {
+				if (ctx->stat[BLK_STAT_READ].time == newest) {
+					blk_stat_sum(&dst[BLK_STAT_READ],
+						     &ctx->stat[BLK_STAT_READ]);
+					nr++;
+				}
+				if (ctx->stat[BLK_STAT_WRITE].time == newest) {
+					blk_stat_sum(&dst[BLK_STAT_WRITE],
+						     &ctx->stat[BLK_STAT_WRITE]);
+					nr++;
+				}
+			}
+		}
+		/*
+		 * If we race on finding an entry, just loop back again.
+		 * Should be very rare.
+		 */
+	} while (!nr);
+
+	dst[BLK_STAT_READ].time = dst[BLK_STAT_WRITE].time = latest;
+}
+
+void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
+{
+	if (q->mq_ops)
+		blk_mq_stat_get(q, dst);
+	else {
+		memcpy(&dst[BLK_STAT_READ], &q->rq_stats[BLK_STAT_READ],
+				sizeof(struct blk_rq_stat));
+		memcpy(&dst[BLK_STAT_WRITE], &q->rq_stats[BLK_STAT_WRITE],
+				sizeof(struct blk_rq_stat));
+	}
+}
+
+void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst)
+{
+	struct blk_mq_ctx *ctx;
+	unsigned int i, nr;
+
+	nr = 0;
+	do {
+		uint64_t newest = 0;
+
+		hctx_for_each_ctx(hctx, ctx, i) {
+			if (!ctx->stat[BLK_STAT_READ].nr_samples &&
+			    !ctx->stat[BLK_STAT_WRITE].nr_samples)
+				continue;
+
+			if (ctx->stat[BLK_STAT_READ].time > newest)
+				newest = ctx->stat[BLK_STAT_READ].time;
+			if (ctx->stat[BLK_STAT_WRITE].time > newest)
+				newest = ctx->stat[BLK_STAT_WRITE].time;
+		}
+
+		if (!newest)
+			break;
+
+		hctx_for_each_ctx(hctx, ctx, i) {
+			if (ctx->stat[BLK_STAT_READ].time == newest) {
+				blk_stat_sum(&dst[BLK_STAT_READ],
+						&ctx->stat[BLK_STAT_READ]);
+				nr++;
+			}
+			if (ctx->stat[BLK_STAT_WRITE].time == newest) {
+				blk_stat_sum(&dst[BLK_STAT_WRITE],
+						&ctx->stat[BLK_STAT_WRITE]);
+				nr++;
+			}
+		}
+		/*
+		 * If we race on finding an entry, just loop back again.
+		 * Should be very rare, as the window is only updated
+		 * occasionally
+		 */
+	} while (!nr);
+}
+
+static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now)
+{
+	stat->min = -1ULL;
+	stat->max = stat->nr_samples = stat->mean = 0;
+	stat->batch = stat->nr_batch = 0;
+	stat->time = time_now & BLK_STAT_NSEC_MASK;
+}
+
+void blk_stat_init(struct blk_rq_stat *stat)
+{
+	__blk_stat_init(stat, ktime_to_ns(ktime_get()));
+}
+
+static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now)
+{
+	return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK);
+}
+
+bool blk_stat_is_current(struct blk_rq_stat *stat)
+{
+	return __blk_stat_is_current(stat, ktime_to_ns(ktime_get()));
+}
+
+void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
+{
+	s64 now, value;
+
+	now = __blk_stat_time(ktime_to_ns(ktime_get()));
+	if (now < blk_stat_time(&rq->issue_stat))
+		return;
+
+	if (!__blk_stat_is_current(stat, now))
+		__blk_stat_init(stat, now);
+
+	value = now - blk_stat_time(&rq->issue_stat);
+	if (value > stat->max)
+		stat->max = value;
+	if (value < stat->min)
+		stat->min = value;
+
+	if (stat->batch + value < stat->batch ||
+	    stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
+		blk_stat_flush_batch(stat);
+
+	stat->batch += value;
+	stat->nr_batch++;
+}
+
+void blk_stat_clear(struct request_queue *q)
+{
+	if (q->mq_ops) {
+		struct blk_mq_hw_ctx *hctx;
+		struct blk_mq_ctx *ctx;
+		int i, j;
+
+		queue_for_each_hw_ctx(q, hctx, i) {
+			hctx_for_each_ctx(hctx, ctx, j) {
+				blk_stat_init(&ctx->stat[BLK_STAT_READ]);
+				blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
+			}
+		}
+	} else {
+		blk_stat_init(&q->rq_stats[BLK_STAT_READ]);
+		blk_stat_init(&q->rq_stats[BLK_STAT_WRITE]);
+	}
+}
+
+void blk_stat_set_issue_time(struct blk_issue_stat *stat)
+{
+	stat->time = (stat->time & BLK_STAT_MASK) |
+			(ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK);
+}
+
+/*
+ * Enable stat tracking, return whether it was enabled
+ */
+bool blk_stat_enable(struct request_queue *q)
+{
+	if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
+		set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+		return false;
+	}
+
+	return true;
+}
diff --git a/block/blk-stat.h b/block/blk-stat.h
new file mode 100644
index 000000000000..a2050a0a5314
--- /dev/null
+++ b/block/blk-stat.h
@@ -0,0 +1,42 @@
+#ifndef BLK_STAT_H
+#define BLK_STAT_H
+
+/*
+ * ~0.13s window as a power-of-2 (2^27 nsecs)
+ */
+#define BLK_STAT_NSEC		134217728ULL
+#define BLK_STAT_NSEC_MASK	~(BLK_STAT_NSEC - 1)
+
+/*
+ * Upper 3 bits can be used elsewhere
+ */
+#define BLK_STAT_RES_BITS	3
+#define BLK_STAT_SHIFT		(64 - BLK_STAT_RES_BITS)
+#define BLK_STAT_TIME_MASK	((1ULL << BLK_STAT_SHIFT) - 1)
+#define BLK_STAT_MASK		~BLK_STAT_TIME_MASK
+
+enum {
+	BLK_STAT_READ	= 0,
+	BLK_STAT_WRITE,
+};
+
+void blk_stat_add(struct blk_rq_stat *, struct request *);
+void blk_hctx_stat_get(struct blk_mq_hw_ctx *, struct blk_rq_stat *);
+void blk_queue_stat_get(struct request_queue *, struct blk_rq_stat *);
+void blk_stat_clear(struct request_queue *);
+void blk_stat_init(struct blk_rq_stat *);
+bool blk_stat_is_current(struct blk_rq_stat *);
+void blk_stat_set_issue_time(struct blk_issue_stat *);
+bool blk_stat_enable(struct request_queue *);
+
+static inline u64 __blk_stat_time(u64 time)
+{
+	return time & BLK_STAT_TIME_MASK;
+}
+
+static inline u64 blk_stat_time(struct blk_issue_stat *stat)
+{
+	return __blk_stat_time(stat->time);
+}
+
+#endif
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 488c2e28feb8..9cdb7247727a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -401,6 +401,26 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
 	return queue_var_show(blk_queue_dax(q), page);
 }
 
+static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
+{
+	return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
+			pre, (long long) stat->nr_samples,
+			(long long) stat->mean, (long long) stat->min,
+			(long long) stat->max);
+}
+
+static ssize_t queue_stats_show(struct request_queue *q, char *page)
+{
+	struct blk_rq_stat stat[2];
+	ssize_t ret;
+
+	blk_queue_stat_get(q, stat);
+
+	ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
+	ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
+	return ret;
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -553,6 +573,11 @@ static struct queue_sysfs_entry queue_dax_entry = {
 	.show = queue_dax_show,
 };
 
+static struct queue_sysfs_entry queue_stats_entry = {
+	.attr = {.name = "stats", .mode = S_IRUGO },
+	.show = queue_stats_show,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -582,6 +607,7 @@ static struct attribute *default_attrs[] = {
 	&queue_poll_entry.attr,
 	&queue_wc_entry.attr,
 	&queue_dax_entry.attr,
+	&queue_stats_entry.attr,
 	NULL,
 };
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 562ac46cb790..4d0044d09984 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -250,4 +250,20 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
 }
 
+struct blk_issue_stat {
+	u64 time;
+};
+
+#define BLK_RQ_STAT_BATCH	64
+
+struct blk_rq_stat {
+	s64 mean;
+	u64 min;
+	u64 max;
+	s32 nr_samples;
+	s32 nr_batch;
+	u64 batch;
+	s64 time;
+};
+
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d364be6e6959..303723a2e5b8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -117,6 +117,8 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_PM			((__force req_flags_t)(1 << 15))
 /* on IO scheduler merge hash */
 #define RQF_HASHED		((__force req_flags_t)(1 << 16))
+/* IO stats tracking on */
+#define RQF_STATS		((__force req_flags_t)(1 << 17))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
@@ -197,6 +199,7 @@ struct request {
 	struct gendisk *rq_disk;
 	struct hd_struct *part;
 	unsigned long start_time;
+	struct blk_issue_stat issue_stat;
 #ifdef CONFIG_BLK_CGROUP
 	struct request_list *rl;		/* rl this rq is alloced from */
 	unsigned long long start_time_ns;
@@ -492,6 +495,9 @@ struct request_queue {
 
 	unsigned int		nr_sorted;
 	unsigned int		in_flight[2];
+
+	struct blk_rq_stat	rq_stats[2];
+
 	/*
 	 * Number of active block driver functions for which blk_drain_queue()
 	 * must wait. Must be incremented around functions that unlock the
@@ -585,6 +591,7 @@ struct request_queue {
 #define QUEUE_FLAG_FUA	       24	/* device supports FUA writes */
 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
 #define QUEUE_FLAG_DAX         26	/* device supports DAX */
+#define QUEUE_FLAG_STATS       27	/* track rq completion times */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-- 
cgit v1.2.3


From 87760e5eef359788047d6fd54fc12eec74ce0d27 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 9 Nov 2016 12:38:14 -0700
Subject: block: hook up writeback throttling

Enable throttling of buffered writeback to make it a lot
more smooth, and has way less impact on other system activity.
Background writeback should be, by definition, background
activity. The fact that we flush huge bundles of it at the time
means that it potentially has heavy impacts on foreground workloads,
which isn't ideal. We can't easily limit the sizes of writes that
we do, since that would impact file system layout in the presence
of delayed allocation. So just throttle back buffered writeback,
unless someone is waiting for it.

The algorithm for when to throttle takes its inspiration in the
CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors
the minimum latencies of requests over a window of time. In that
window of time, if the minimum latency of any request exceeds a
given target, then a scale count is incremented and the queue depth
is shrunk. The next monitoring window is shrunk accordingly. Unlike
CoDel, if we hit a window that exhibits good behavior, then we
simply increment the scale count and re-calculate the limits for that
scale value. This prevents us from oscillating between a
close-to-ideal value and max all the time, instead remaining in the
windows where we get good behavior.

Unlike CoDel, blk-wb allows the scale count to to negative. This
happens if we primarily have writes going on. Unlike positive
scale counts, this doesn't change the size of the monitoring window.
When the heavy writers finish, blk-bw quickly snaps back to it's
stable state of a zero scale count.

The patch registers a sysfs entry, 'wb_lat_usec'. This sets the latency
target to me met. It defaults to 2 msec for non-rotational storage, and
75 msec for rotational storage. Setting this value to '0' disables
blk-wb. Generally, a user would not have to touch this setting.

We don't enable WBT on devices that are managed with CFQ, and have
a non-root block cgroup attached. If we have a proportional share setup
on this particular disk, then the wbt throttling will interfere with
that. We don't have a strong need for wbt for that case, since we will
rely on CFQ doing that for us.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/queue-sysfs.txt |  7 +++
 block/Kconfig                       | 26 +++++++++++
 block/blk-core.c                    | 17 ++++++-
 block/blk-mq.c                      | 26 ++++++++++-
 block/blk-settings.c                |  4 ++
 block/blk-sysfs.c                   | 88 +++++++++++++++++++++++++++++++++++++
 block/cfq-iosched.c                 | 14 ++++++
 include/linux/blkdev.h              |  3 ++
 8 files changed, 181 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index 2a3904030dea..87abf1ac2939 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -169,5 +169,12 @@ This is the number of bytes the device can write in a single write-same
 command.  A value of '0' means write-same is not supported by this
 device.
 
+wb_lat_usec (RW)
+----------------
+If the device is registered for writeback throttling, then this file shows
+the target minimum read latency. If this latency is exceeded in a given
+window of time (see wb_window_usec), then the writeback throttling will start
+scaling back writes.
+
 
 Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/block/Kconfig b/block/Kconfig
index 3a024440a669..8bf114a3858a 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -121,6 +121,32 @@ config BLK_CMDLINE_PARSER
 
 	See Documentation/block/cmdline-partition.txt for more information.
 
+config BLK_WBT
+	bool "Enable support for block device writeback throttling"
+	default n
+	---help---
+	Enabling this option enables the block layer to throttle buffered
+	background writeback from the VM, making it more smooth and having
+	less impact on foreground operations. The throttling is done
+	dynamically on an algorithm loosely based on CoDel, factoring in
+	the realtime performance of the disk.
+
+config BLK_WBT_SQ
+	bool "Single queue writeback throttling"
+	default n
+	depends on BLK_WBT
+	---help---
+	Enable writeback throttling by default on legacy single queue devices
+
+config BLK_WBT_MQ
+	bool "Multiqueue writeback throttling"
+	default y
+	depends on BLK_WBT
+	---help---
+	Enable writeback throttling by default on multiqueue devices.
+	Multiqueue currently doesn't have support for IO scheduling,
+	enabling this option is recommended.
+
 menu "Partition Types"
 
 source "block/partitions/Kconfig"
diff --git a/block/blk-core.c b/block/blk-core.c
index 216372b01624..59f8129a4295 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-wbt.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -882,6 +883,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 
 fail:
 	blk_free_flush_queue(q->fq);
+	wbt_exit(q);
 	return NULL;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1344,6 +1346,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
+	wbt_requeue(q->rq_wb, &rq->issue_stat);
 
 	if (rq->rq_flags & RQF_QUEUED)
 		blk_queue_end_tag(q, rq);
@@ -1436,6 +1439,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	/* this is a bio leak */
 	WARN_ON(req->bio != NULL);
 
+	wbt_done(q->rq_wb, &req->issue_stat);
+
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
@@ -1663,6 +1668,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	int el_ret, where = ELEVATOR_INSERT_SORT;
 	struct request *req;
 	unsigned int request_count = 0;
+	unsigned int wb_acct;
 
 	/*
 	 * low level driver can indicate that it wants pages above a
@@ -1715,17 +1721,22 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	}
 
 get_rq:
+	wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
+
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Returns with the queue unlocked.
 	 */
 	req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
 	if (IS_ERR(req)) {
+		__wbt_done(q->rq_wb, wb_acct);
 		bio->bi_error = PTR_ERR(req);
 		bio_endio(bio);
 		goto out_unlock;
 	}
 
+	wbt_track(&req->issue_stat, wb_acct);
+
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
 	 * may now be mergeable after it had proven unmergeable (above).
@@ -2467,6 +2478,7 @@ void blk_start_request(struct request *req)
 	if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
 		blk_stat_set_issue_time(&req->issue_stat);
 		req->rq_flags |= RQF_STATS;
+		wbt_issue(req->q->rq_wb, &req->issue_stat);
 	}
 
 	/*
@@ -2708,9 +2720,10 @@ void blk_finish_request(struct request *req, int error)
 
 	blk_account_io_done(req);
 
-	if (req->end_io)
+	if (req->end_io) {
+		wbt_done(req->q->rq_wb, &req->issue_stat);
 		req->end_io(req, error);
-	else {
+	} else {
 		if (blk_bidi_rq(req))
 			__blk_put_request(req->next_rq->q, req->next_rq);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 19795886d46e..d180c989a0e5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -31,6 +31,7 @@
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
 #include "blk-stat.h"
+#include "blk-wbt.h"
 
 static DEFINE_MUTEX(all_q_mutex);
 static LIST_HEAD(all_q_list);
@@ -326,6 +327,8 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 
 	if (rq->rq_flags & RQF_MQ_INFLIGHT)
 		atomic_dec(&hctx->nr_active);
+
+	wbt_done(q->rq_wb, &rq->issue_stat);
 	rq->rq_flags = 0;
 
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
@@ -354,6 +357,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
 	blk_account_io_done(rq);
 
 	if (rq->end_io) {
+		wbt_done(rq->q->rq_wb, &rq->issue_stat);
 		rq->end_io(rq, error);
 	} else {
 		if (unlikely(blk_bidi_rq(rq)))
@@ -471,6 +475,7 @@ void blk_mq_start_request(struct request *rq)
 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
 		blk_stat_set_issue_time(&rq->issue_stat);
 		rq->rq_flags |= RQF_STATS;
+		wbt_issue(q->rq_wb, &rq->issue_stat);
 	}
 
 	blk_add_timer(rq);
@@ -508,6 +513,7 @@ static void __blk_mq_requeue_request(struct request *rq)
 	struct request_queue *q = rq->q;
 
 	trace_block_rq_requeue(q, rq);
+	wbt_requeue(q->rq_wb, &rq->issue_stat);
 
 	if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
 		if (q->dma_drain_size && blk_rq_bytes(rq))
@@ -1339,6 +1345,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	struct blk_plug *plug;
 	struct request *same_queue_rq = NULL;
 	blk_qc_t cookie;
+	unsigned int wb_acct;
 
 	blk_queue_bounce(q, &bio);
 
@@ -1353,9 +1360,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
 		return BLK_QC_T_NONE;
 
+	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
+
 	rq = blk_mq_map_request(q, bio, &data);
-	if (unlikely(!rq))
+	if (unlikely(!rq)) {
+		__wbt_done(q->rq_wb, wb_acct);
 		return BLK_QC_T_NONE;
+	}
+
+	wbt_track(&rq->issue_stat, wb_acct);
 
 	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
 
@@ -1439,6 +1452,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	struct blk_mq_alloc_data data;
 	struct request *rq;
 	blk_qc_t cookie;
+	unsigned int wb_acct;
 
 	blk_queue_bounce(q, &bio);
 
@@ -1455,9 +1469,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	} else
 		request_count = blk_plug_queued_count(q);
 
+	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
+
 	rq = blk_mq_map_request(q, bio, &data);
-	if (unlikely(!rq))
+	if (unlikely(!rq)) {
+		__wbt_done(q->rq_wb, wb_acct);
 		return BLK_QC_T_NONE;
+	}
+
+	wbt_track(&rq->issue_stat, wb_acct);
 
 	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
 
@@ -2139,6 +2159,8 @@ void blk_mq_free_queue(struct request_queue *q)
 	list_del_init(&q->all_q_node);
 	mutex_unlock(&all_q_mutex);
 
+	wbt_exit(q);
+
 	blk_mq_del_queue_tag_set(q);
 
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 9cf053759363..c7ccabc0ec3e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -13,6 +13,7 @@
 #include <linux/gfp.h>
 
 #include "blk.h"
+#include "blk-wbt.h"
 
 unsigned long blk_max_low_pfn;
 EXPORT_SYMBOL(blk_max_low_pfn);
@@ -845,6 +846,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
 	q->queue_depth = depth;
+	wbt_set_queue_depth(q->rq_wb, depth);
 }
 EXPORT_SYMBOL(blk_set_queue_depth);
 
@@ -868,6 +870,8 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
 	else
 		queue_flag_clear(QUEUE_FLAG_FUA, q);
 	spin_unlock_irq(q->queue_lock);
+
+	wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
 }
 EXPORT_SYMBOL_GPL(blk_queue_write_cache);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9cdb7247727a..9262d2d60a09 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -13,6 +13,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-wbt.h"
 
 struct queue_sysfs_entry {
 	struct attribute attr;
@@ -41,6 +42,19 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
 	return count;
 }
 
+static ssize_t queue_var_store64(u64 *var, const char *page)
+{
+	int err;
+	u64 v;
+
+	err = kstrtou64(page, 10, &v);
+	if (err < 0)
+		return err;
+
+	*var = v;
+	return 0;
+}
+
 static ssize_t queue_requests_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(q->nr_requests, (page));
@@ -364,6 +378,32 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 	return ret;
 }
 
+static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
+{
+	if (!q->rq_wb)
+		return -EINVAL;
+
+	return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000));
+}
+
+static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
+				  size_t count)
+{
+	ssize_t ret;
+	u64 val;
+
+	if (!q->rq_wb)
+		return -EINVAL;
+
+	ret = queue_var_store64(&val, page);
+	if (ret < 0)
+		return ret;
+
+	q->rq_wb->min_lat_nsec = val * 1000ULL;
+	wbt_update_limits(q->rq_wb);
+	return count;
+}
+
 static ssize_t queue_wc_show(struct request_queue *q, char *page)
 {
 	if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
@@ -578,6 +618,12 @@ static struct queue_sysfs_entry queue_stats_entry = {
 	.show = queue_stats_show,
 };
 
+static struct queue_sysfs_entry queue_wb_lat_entry = {
+	.attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_wb_lat_show,
+	.store = queue_wb_lat_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -608,6 +654,7 @@ static struct attribute *default_attrs[] = {
 	&queue_wc_entry.attr,
 	&queue_dax_entry.attr,
 	&queue_stats_entry.attr,
+	&queue_wb_lat_entry.attr,
 	NULL,
 };
 
@@ -682,6 +729,7 @@ static void blk_release_queue(struct kobject *kobj)
 	struct request_queue *q =
 		container_of(kobj, struct request_queue, kobj);
 
+	wbt_exit(q);
 	bdi_exit(&q->backing_dev_info);
 	blkcg_exit_queue(q);
 
@@ -722,6 +770,44 @@ struct kobj_type blk_queue_ktype = {
 	.release	= blk_release_queue,
 };
 
+static void blk_wb_stat_get(void *data, struct blk_rq_stat *stat)
+{
+	blk_queue_stat_get(data, stat);
+}
+
+static void blk_wb_stat_clear(void *data)
+{
+	blk_stat_clear(data);
+}
+
+static bool blk_wb_stat_is_current(struct blk_rq_stat *stat)
+{
+	return blk_stat_is_current(stat);
+}
+
+static struct wb_stat_ops wb_stat_ops = {
+	.get		= blk_wb_stat_get,
+	.is_current	= blk_wb_stat_is_current,
+	.clear		= blk_wb_stat_clear,
+};
+
+static void blk_wb_init(struct request_queue *q)
+{
+#ifndef CONFIG_BLK_WBT_MQ
+	if (q->mq_ops)
+		return;
+#endif
+#ifndef CONFIG_BLK_WBT_SQ
+	if (q->request_fn)
+		return;
+#endif
+
+	/*
+	 * If this fails, we don't get throttling
+	 */
+	wbt_init(q, &wb_stat_ops);
+}
+
 int blk_register_queue(struct gendisk *disk)
 {
 	int ret;
@@ -761,6 +847,8 @@ int blk_register_queue(struct gendisk *disk)
 	if (q->mq_ops)
 		blk_mq_register_dev(dev, q);
 
+	blk_wb_init(q);
+
 	if (!q->request_fn)
 		return 0;
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 61010511c5a0..e280d08ef6d7 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -16,6 +16,7 @@
 #include <linux/blktrace_api.h>
 #include <linux/blk-cgroup.h>
 #include "blk.h"
+#include "blk-wbt.h"
 
 /*
  * tunables
@@ -3762,9 +3763,11 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	struct cfq_queue *cfqq;
 	uint64_t serial_nr;
+	bool nonroot_cg;
 
 	rcu_read_lock();
 	serial_nr = bio_blkcg(bio)->css.serial_nr;
+	nonroot_cg = bio_blkcg(bio) != &blkcg_root;
 	rcu_read_unlock();
 
 	/*
@@ -3774,6 +3777,17 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
 	if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
 		return;
 
+	/*
+	 * If we have a non-root cgroup, we can depend on that to
+	 * do proper throttling of writes. Turn off wbt for that
+	 * case.
+	 */
+	if (nonroot_cg) {
+		struct request_queue *q = cfqd->queue;
+
+		wbt_disable(q->rq_wb);
+	}
+
 	/*
 	 * Drop reference to queues.  New queues will be assigned in new
 	 * group upon arrival of fresh requests.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 303723a2e5b8..15da9e430f90 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -38,6 +38,7 @@ struct bsg_job;
 struct blkcg_gq;
 struct blk_flush_queue;
 struct pr_ops;
+struct rq_wb;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -383,6 +384,8 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
+	struct rq_wb		*rq_wb;
+
 	/*
 	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
 	 * is used, root blkg allocates from @q->root_rl and all other
-- 
cgit v1.2.3


From ee7930ee27fe5240398cc302fa8eb4454725f188 Mon Sep 17 00:00:00 2001
From: Markus Mayer <mmayer@broadcom.com>
Date: Mon, 7 Nov 2016 10:02:23 -0800
Subject: cpufreq: stats: New sysfs attribute for clearing statistics

Allow CPUfreq statistics to be cleared by writing anything to
/sys/.../cpufreq/stats/reset.

Signed-off-by: Markus Mayer <mmayer@broadcom.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/cpufreq-stats.txt |  6 ++++++
 drivers/cpufreq/cpufreq_stats.c          | 22 ++++++++++++++++++++++
 include/linux/cpufreq.h                  |  4 ++++
 3 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt
index 8d9773f23550..3c355f6ad834 100644
--- a/Documentation/cpu-freq/cpufreq-stats.txt
+++ b/Documentation/cpu-freq/cpufreq-stats.txt
@@ -44,11 +44,17 @@ the stats driver insertion.
 total 0
 drwxr-xr-x  2 root root    0 May 14 16:06 .
 drwxr-xr-x  3 root root    0 May 14 15:58 ..
+--w-------  1 root root 4096 May 14 16:06 reset
 -r--r--r--  1 root root 4096 May 14 16:06 time_in_state
 -r--r--r--  1 root root 4096 May 14 16:06 total_trans
 -r--r--r--  1 root root 4096 May 14 16:06 trans_table
 --------------------------------------------------------------------------------
 
+-  reset
+Write-only attribute that can be used to reset the stat counters. This can be
+useful for evaluating system behaviour under different governors without the
+need for a reboot.
+
 -  time_in_state
 This gives the amount of time spent in each of the frequencies supported by
 this CPU. The cat output will have "<frequency> <time>" pair in each line, which
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 06d3abdffd3a..ac284e66839c 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -41,6 +41,18 @@ static int cpufreq_stats_update(struct cpufreq_stats *stats)
 	return 0;
 }
 
+static void cpufreq_stats_clear_table(struct cpufreq_stats *stats)
+{
+	unsigned int count = stats->max_state;
+
+	memset(stats->time_in_state, 0, count * sizeof(u64));
+#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
+	memset(stats->trans_table, 0, count * count * sizeof(int));
+#endif
+	stats->last_time = get_jiffies_64();
+	stats->total_trans = 0;
+}
+
 static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf)
 {
 	return sprintf(buf, "%d\n", policy->stats->total_trans);
@@ -64,6 +76,14 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
 	return len;
 }
 
+static ssize_t store_reset(struct cpufreq_policy *policy, const char *buf,
+			   size_t count)
+{
+	/* We don't care what is written to the attribute. */
+	cpufreq_stats_clear_table(policy->stats);
+	return count;
+}
+
 #ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 {
@@ -113,10 +133,12 @@ cpufreq_freq_attr_ro(trans_table);
 
 cpufreq_freq_attr_ro(total_trans);
 cpufreq_freq_attr_ro(time_in_state);
+cpufreq_freq_attr_wo(reset);
 
 static struct attribute *default_attrs[] = {
 	&total_trans.attr,
 	&time_in_state.attr,
+	&reset.attr,
 #ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 	&trans_table.attr,
 #endif
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 32dc0cbd51ca..40dc2e29f480 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -234,6 +234,10 @@ __ATTR(_name, _perm, show_##_name, NULL)
 static struct freq_attr _name =			\
 __ATTR(_name, 0644, show_##_name, store_##_name)
 
+#define cpufreq_freq_attr_wo(_name)		\
+static struct freq_attr _name =			\
+__ATTR(_name, 0200, NULL, store_##_name)
+
 struct global_attr {
 	struct attribute attr;
 	ssize_t (*show)(struct kobject *kobj,
-- 
cgit v1.2.3


From 53d74d056a4e306a72b8883d325b5d853c0618e6 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 19 Oct 2016 19:28:12 +0100
Subject: thread_info: factor out restart_block

Since commit f56141e3e2d9aabf ("all arches, signal: move restart_block
to struct task_struct"), thread_info and restart_block have been
logically distinct, yet struct restart_block is still defined in
<linux/thread_info.h>.

At least one architecture (erroneously) uses restart_block as part of
its thread_info, and thus the definition of restart_block must come
before the include of <asm/thread_info>. Subsequent patches in this
series need to shuffle the order of includes and definitions in
<linux/thread_info.h>, and will make this ordering fragile.

This patch moves the definition of restart_block out to its own header.
This serves as generic cleanup, logically separating thread_info and
restart_block, and also makes it easier to avoid fragility.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/restart_block.h | 51 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/thread_info.h   | 41 +---------------------------------
 2 files changed, 52 insertions(+), 40 deletions(-)
 create mode 100644 include/linux/restart_block.h

(limited to 'include/linux')

diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
new file mode 100644
index 000000000000..0d905d8ec553
--- /dev/null
+++ b/include/linux/restart_block.h
@@ -0,0 +1,51 @@
+/*
+ * Common syscall restarting data
+ */
+#ifndef __LINUX_RESTART_BLOCK_H
+#define __LINUX_RESTART_BLOCK_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct timespec;
+struct compat_timespec;
+struct pollfd;
+
+/*
+ * System call restart block.
+ */
+struct restart_block {
+	long (*fn)(struct restart_block *);
+	union {
+		/* For futex_wait and futex_wait_requeue_pi */
+		struct {
+			u32 __user *uaddr;
+			u32 val;
+			u32 flags;
+			u32 bitset;
+			u64 time;
+			u32 __user *uaddr2;
+		} futex;
+		/* For nanosleep */
+		struct {
+			clockid_t clockid;
+			struct timespec __user *rmtp;
+#ifdef CONFIG_COMPAT
+			struct compat_timespec __user *compat_rmtp;
+#endif
+			u64 expires;
+		} nanosleep;
+		/* For poll */
+		struct {
+			struct pollfd __user *ufds;
+			int nfds;
+			int has_timeout;
+			unsigned long tv_sec;
+			unsigned long tv_nsec;
+		} poll;
+	};
+};
+
+extern long do_no_restart_syscall(struct restart_block *parm);
+
+#endif /* __LINUX_RESTART_BLOCK_H */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 2873baf5372a..c75c6ab364ca 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -9,51 +9,12 @@
 
 #include <linux/types.h>
 #include <linux/bug.h>
-
-struct timespec;
-struct compat_timespec;
+#include <linux/restart_block.h>
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 #define current_thread_info() ((struct thread_info *)current)
 #endif
 
-/*
- * System call restart block.
- */
-struct restart_block {
-	long (*fn)(struct restart_block *);
-	union {
-		/* For futex_wait and futex_wait_requeue_pi */
-		struct {
-			u32 __user *uaddr;
-			u32 val;
-			u32 flags;
-			u32 bitset;
-			u64 time;
-			u32 __user *uaddr2;
-		} futex;
-		/* For nanosleep */
-		struct {
-			clockid_t clockid;
-			struct timespec __user *rmtp;
-#ifdef CONFIG_COMPAT
-			struct compat_timespec __user *compat_rmtp;
-#endif
-			u64 expires;
-		} nanosleep;
-		/* For poll */
-		struct {
-			struct pollfd __user *ufds;
-			int nfds;
-			int has_timeout;
-			unsigned long tv_sec;
-			unsigned long tv_nsec;
-		} poll;
-	};
-};
-
-extern long do_no_restart_syscall(struct restart_block *parm);
-
 #include <linux/bitops.h>
 #include <asm/thread_info.h>
 
-- 
cgit v1.2.3


From dc3d2a679cd8631b8a570fc8ca5f4712d7d25698 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 19 Oct 2016 19:28:13 +0100
Subject: thread_info: include <current.h> for THREAD_INFO_IN_TASK

When CONFIG_THREAD_INFO_IN_TASK is selected, the current_thread_info()
macro relies on current having been defined prior to its use. However,
not all users of current_thread_info() include <asm/current.h>, and thus
current is not guaranteed to be defined.

When CONFIG_THREAD_INFO_IN_TASK is not selected, it's possible that
get_current() / current are based upon current_thread_info(), and
<asm/current.h> includes <asm/thread_info.h>. Thus always including
<asm/current.h> would result in circular dependences on some platforms.

To ensure both cases work, this patch includes <asm/current.h>, but only
when CONFIG_THREAD_INFO_IN_TASK is selected.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/thread_info.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index c75c6ab364ca..58373875e8ee 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -12,6 +12,12 @@
 #include <linux/restart_block.h>
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
+/*
+ * For CONFIG_THREAD_INFO_IN_TASK kernels we need <asm/current.h> for the
+ * definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels,
+ * including <asm/current.h> can cause a circular dependency on some platforms.
+ */
+#include <asm/current.h>
 #define current_thread_info() ((struct thread_info *)current)
 #endif
 
-- 
cgit v1.2.3


From 109704492ef637956265ec2eb72ae7b3b39eb6f4 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Thu, 20 Oct 2016 00:34:00 -0700
Subject: pstore: Make spinlock per zone instead of global

Currently pstore has a global spinlock for all zones. Since the zones
are independent and modify different areas of memory, there's no need
to have a global lock, so we should use a per-zone lock as introduced
here. Also, when ramoops's ftrace use-case has a FTRACE_PER_CPU flag
introduced later, which splits the ftrace memory area into a single zone
per CPU, it will eliminate the need for locking. In preparation for this,
make the locking optional.

Signed-off-by: Joel Fernandes <joelaf@google.com>
[kees: updated commit message]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/ram_core.c       | 11 +++++------
 include/linux/pstore_ram.h |  1 +
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 3975deec02f8..cb92055e6016 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -48,8 +48,6 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
 	return atomic_read(&prz->buffer->start);
 }
 
-static DEFINE_RAW_SPINLOCK(buffer_lock);
-
 /* increase and wrap the start pointer, returning the old value */
 static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
 {
@@ -57,7 +55,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
 	int new;
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&buffer_lock, flags);
+	raw_spin_lock_irqsave(&prz->buffer_lock, flags);
 
 	old = atomic_read(&prz->buffer->start);
 	new = old + a;
@@ -65,7 +63,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
 		new -= prz->buffer_size;
 	atomic_set(&prz->buffer->start, new);
 
-	raw_spin_unlock_irqrestore(&buffer_lock, flags);
+	raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
 
 	return old;
 }
@@ -77,7 +75,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
 	size_t new;
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&buffer_lock, flags);
+	raw_spin_lock_irqsave(&prz->buffer_lock, flags);
 
 	old = atomic_read(&prz->buffer->size);
 	if (old == prz->buffer_size)
@@ -89,7 +87,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
 	atomic_set(&prz->buffer->size, new);
 
 exit:
-	raw_spin_unlock_irqrestore(&buffer_lock, flags);
+	raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
 }
 
 static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
@@ -493,6 +491,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
 
 	prz->buffer->sig = sig;
 	persistent_ram_zap(prz);
+	prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock);
 
 	return 0;
 }
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index c668c861c96c..244d2423dbaf 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -40,6 +40,7 @@ struct persistent_ram_zone {
 	void *vaddr;
 	struct persistent_ram_buffer *buffer;
 	size_t buffer_size;
+	raw_spinlock_t buffer_lock;
 
 	/* ECC correction */
 	char *par_buffer;
-- 
cgit v1.2.3


From bbd7bb7017d5c2b1e75f3818b4ce88fa58bb0eab Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 4 Nov 2016 09:34:34 -0600
Subject: block: move poll code to blk-mq

The poll code is blk-mq specific, let's move it to blk-mq.c. This
is a prep patch for improving the polling code.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c             | 46 -------------------------------------
 block/blk-mq.c               | 54 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/target/io-cmd.c |  2 +-
 fs/direct-io.c               |  2 +-
 include/linux/blkdev.h       |  2 +-
 5 files changed, 57 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 59f8129a4295..eea246567884 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3312,52 +3312,6 @@ void blk_finish_plug(struct blk_plug *plug)
 }
 EXPORT_SYMBOL(blk_finish_plug);
 
-bool blk_poll(struct request_queue *q, blk_qc_t cookie)
-{
-	struct blk_plug *plug;
-	long state;
-	unsigned int queue_num;
-	struct blk_mq_hw_ctx *hctx;
-
-	if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) ||
-	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
-		return false;
-
-	queue_num = blk_qc_t_to_queue_num(cookie);
-	hctx = q->queue_hw_ctx[queue_num];
-	hctx->poll_considered++;
-
-	plug = current->plug;
-	if (plug)
-		blk_flush_plug_list(plug, false);
-
-	state = current->state;
-	while (!need_resched()) {
-		int ret;
-
-		hctx->poll_invoked++;
-
-		ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie));
-		if (ret > 0) {
-			hctx->poll_success++;
-			set_current_state(TASK_RUNNING);
-			return true;
-		}
-
-		if (signal_pending_state(state, current))
-			set_current_state(TASK_RUNNING);
-
-		if (current->state == TASK_RUNNING)
-			return true;
-		if (ret < 0)
-			break;
-		cpu_relax();
-	}
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(blk_poll);
-
 #ifdef CONFIG_PM
 /**
  * blk_pm_runtime_init - Block layer runtime PM initialization routine
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 77110aed24ea..ae8df5ec20d3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2461,6 +2461,60 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 }
 EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 
+static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
+{
+	struct request_queue *q = hctx->queue;
+	long state;
+
+	hctx->poll_considered++;
+
+	state = current->state;
+	while (!need_resched()) {
+		int ret;
+
+		hctx->poll_invoked++;
+
+		ret = q->mq_ops->poll(hctx, rq->tag);
+		if (ret > 0) {
+			hctx->poll_success++;
+			set_current_state(TASK_RUNNING);
+			return true;
+		}
+
+		if (signal_pending_state(state, current))
+			set_current_state(TASK_RUNNING);
+
+		if (current->state == TASK_RUNNING)
+			return true;
+		if (ret < 0)
+			break;
+		cpu_relax();
+	}
+
+	return false;
+}
+
+bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
+{
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_plug *plug;
+	struct request *rq;
+
+	if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) ||
+	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+		return false;
+
+	plug = current->plug;
+	if (plug)
+		blk_flush_plug_list(plug, false);
+
+	hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
+	rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
+
+	return __blk_mq_poll(hctx, rq);
+}
+EXPORT_SYMBOL_GPL(blk_mq_poll);
+
 void blk_mq_disable_hotplug(void)
 {
 	mutex_lock(&all_q_mutex);
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index c2784cfc5e29..ef52b1e70144 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -96,7 +96,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
 
 	cookie = submit_bio(bio);
 
-	blk_poll(bdev_get_queue(req->ns->bdev), cookie);
+	blk_mq_poll(bdev_get_queue(req->ns->bdev), cookie);
 }
 
 static void nvmet_execute_flush(struct nvmet_req *req)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a5138c564019..835e23a4ee4b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -457,7 +457,7 @@ static struct bio *dio_await_one(struct dio *dio)
 		dio->waiter = current;
 		spin_unlock_irqrestore(&dio->bio_lock, flags);
 		if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
-		    !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
+		    !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
 			io_schedule();
 		/* wake up sets us TASK_RUNNING */
 		spin_lock_irqsave(&dio->bio_lock, flags);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 15da9e430f90..bab18ee5810d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -952,7 +952,7 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 
-bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
-- 
cgit v1.2.3


From c540594f864bb4645573c2c0a304919fabb3d7ea Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 9 Nov 2016 22:02:34 +0100
Subject: bpf, mlx4: fix prog refcount in mlx4_en_try_alloc_resources error
 path

Commit 67f8b1dcb9ee ("net/mlx4_en: Refactor the XDP forwarding rings
scheme") added a bug in that the prog's reference count is not dropped
in the error path when mlx4_en_try_alloc_resources() is failing from
mlx4_xdp_set().

We previously took bpf_prog_add(prog, priv->rx_ring_num - 1), that we
need to release again. Earlier in the call path, dev_change_xdp_fd()
itself holds a reference to the prog as well (hence the '- 1' in the
bpf_prog_add()), so a simple atomic_sub() is safe to use here. When
an error is propagated, then bpf_prog_put() is called eventually from
dev_change_xdp_fd()

Fixes: 67f8b1dcb9ee ("net/mlx4_en: Refactor the XDP forwarding rings scheme")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  5 ++++-
 include/linux/bpf.h                            |  5 +++++
 kernel/bpf/syscall.c                           | 11 +++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0f6225c042be..9bf7320107b0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2747,8 +2747,11 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 	}
 
 	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
-	if (err)
+	if (err) {
+		if (prog)
+			bpf_prog_sub(prog, priv->rx_ring_num - 1);
 		goto unlock_out;
+	}
 
 	if (priv->port_up) {
 		port_up = 1;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index edcd96ded8aa..01c1487277b2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -234,6 +234,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 struct bpf_prog *bpf_prog_get(u32 ufd);
 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i);
+void bpf_prog_sub(struct bpf_prog *prog, int i);
 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 
@@ -303,6 +304,10 @@ static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline void bpf_prog_sub(struct bpf_prog *prog, int i)
+{
+}
+
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 228f962447a5..23eb2050f15e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -680,6 +680,17 @@ struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_add);
 
+void bpf_prog_sub(struct bpf_prog *prog, int i)
+{
+	/* Only to be used for undoing previous bpf_prog_add() in some
+	 * error path. We still know that another entity in our call
+	 * path holds a reference to the program, thus atomic_sub() can
+	 * be safely used in such cases!
+	 */
+	WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
+}
+EXPORT_SYMBOL_GPL(bpf_prog_sub);
+
 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 {
 	return bpf_prog_add(prog, 1);
-- 
cgit v1.2.3


From 372788f964c95a6fa0f677c43d6153c27896ef42 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Thu, 10 Nov 2016 17:10:46 -0600
Subject: net: phy: expose phy_aneg_done API for use by drivers

Make phy_aneg_done() available to drivers so that the result of the
auto-negotiation initiated by phy_start_aneg() can be determined.

Remove the local implementation of phy_aneg_done() from the Aeroflex
driver and use the phy library version.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aeroflex/greth.c | 9 ---------
 drivers/net/phy/phy.c                 | 3 ++-
 include/linux/phy.h                   | 1 +
 3 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index f8df8248035e..93def92f9997 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1290,15 +1290,6 @@ static int greth_mdio_probe(struct net_device *dev)
 	return 0;
 }
 
-static inline int phy_aneg_done(struct phy_device *phydev)
-{
-	int retval;
-
-	retval = phy_read(phydev, MII_BMSR);
-
-	return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE);
-}
-
 static int greth_mdio_init(struct greth_private *greth)
 {
 	int ret;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 2f94c60d4939..e6dd222fddb1 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -143,13 +143,14 @@ static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts)
  * Returns > 0 on success or < 0 on error. 0 means that auto-negotiation
  * is still pending.
  */
-static inline int phy_aneg_done(struct phy_device *phydev)
+int phy_aneg_done(struct phy_device *phydev)
 {
 	if (phydev->drv->aneg_done)
 		return phydev->drv->aneg_done(phydev);
 
 	return genphy_aneg_done(phydev);
 }
+EXPORT_SYMBOL(phy_aneg_done);
 
 /* A structure for mapping a particular speed and duplex
  * combination to a particular SUPPORTED and ADVERTISED value
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e7e1fd382564..9880d73a2c3d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -786,6 +786,7 @@ void phy_detach(struct phy_device *phydev);
 void phy_start(struct phy_device *phydev);
 void phy_stop(struct phy_device *phydev);
 int phy_start_aneg(struct phy_device *phydev);
+int phy_aneg_done(struct phy_device *phydev);
 
 int phy_stop_interrupts(struct phy_device *phydev);
 
-- 
cgit v1.2.3


From 636259880a7e7d3446a707dddebc799da94bdd0b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 12 Nov 2016 21:32:31 +0000
Subject: efi: Add support for seeding the RNG from a UEFI config table

Specify a Linux specific UEFI configuration table that carries some
random bits, and use the contents during early boot to seed the kernel's
random number generator. This allows much strong random numbers to be
generated early on.

The entropy is fed to the kernel using add_device_randomness(), which is
documented as being appropriate for being called very early.

Since UEFI configuration tables may also be consumed by kexec'd kernels,
register a reboot notifier that updates the seed in the table.

Note that the config table could be generated by the EFI stub or by any
other UEFI driver or application (e.g., GRUB), but the random seed table
GUID and the associated functionality should be considered an internal
kernel interface (unless it is promoted to ABI later on)

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20161112213237.8804-4-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/efi.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/efi.h        |  8 ++++++
 2 files changed, 80 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index a4944e22f294..92914801e388 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -23,7 +23,10 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/io.h>
+#include <linux/kexec.h>
 #include <linux/platform_device.h>
+#include <linux/random.h>
+#include <linux/reboot.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/ucs2_string.h>
@@ -48,6 +51,7 @@ struct efi __read_mostly efi = {
 	.esrt			= EFI_INVALID_TABLE_ADDR,
 	.properties_table	= EFI_INVALID_TABLE_ADDR,
 	.mem_attr_table		= EFI_INVALID_TABLE_ADDR,
+	.rng_seed		= EFI_INVALID_TABLE_ADDR,
 };
 EXPORT_SYMBOL(efi);
 
@@ -440,6 +444,7 @@ static __initdata efi_config_table_type_t common_tables[] = {
 	{EFI_SYSTEM_RESOURCE_TABLE_GUID, "ESRT", &efi.esrt},
 	{EFI_PROPERTIES_TABLE_GUID, "PROP", &efi.properties_table},
 	{EFI_MEMORY_ATTRIBUTES_TABLE_GUID, "MEMATTR", &efi.mem_attr_table},
+	{LINUX_EFI_RANDOM_SEED_TABLE_GUID, "RNG", &efi.rng_seed},
 	{NULL_GUID, NULL, NULL},
 };
 
@@ -501,6 +506,29 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
 	pr_cont("\n");
 	set_bit(EFI_CONFIG_TABLES, &efi.flags);
 
+	if (efi.rng_seed != EFI_INVALID_TABLE_ADDR) {
+		struct linux_efi_random_seed *seed;
+		u32 size = 0;
+
+		seed = early_memremap(efi.rng_seed, sizeof(*seed));
+		if (seed != NULL) {
+			size = seed->size;
+			early_memunmap(seed, sizeof(*seed));
+		} else {
+			pr_err("Could not map UEFI random seed!\n");
+		}
+		if (size > 0) {
+			seed = early_memremap(efi.rng_seed,
+					      sizeof(*seed) + size);
+			if (seed != NULL) {
+				add_device_randomness(seed->bits, seed->size);
+				early_memunmap(seed, sizeof(*seed) + size);
+			} else {
+				pr_err("Could not map UEFI random seed!\n");
+			}
+		}
+	}
+
 	/* Parse the EFI Properties table if it exists */
 	if (efi.properties_table != EFI_INVALID_TABLE_ADDR) {
 		efi_properties_table_t *tbl;
@@ -824,3 +852,47 @@ int efi_status_to_err(efi_status_t status)
 
 	return err;
 }
+
+#ifdef CONFIG_KEXEC
+static int update_efi_random_seed(struct notifier_block *nb,
+				  unsigned long code, void *unused)
+{
+	struct linux_efi_random_seed *seed;
+	u32 size = 0;
+
+	if (!kexec_in_progress)
+		return NOTIFY_DONE;
+
+	seed = memremap(efi.rng_seed, sizeof(*seed), MEMREMAP_WB);
+	if (seed != NULL) {
+		size = min(seed->size, 32U);
+		memunmap(seed);
+	} else {
+		pr_err("Could not map UEFI random seed!\n");
+	}
+	if (size > 0) {
+		seed = memremap(efi.rng_seed, sizeof(*seed) + size,
+				MEMREMAP_WB);
+		if (seed != NULL) {
+			seed->size = size;
+			get_random_bytes(seed->bits, seed->size);
+			memunmap(seed);
+		} else {
+			pr_err("Could not map UEFI random seed!\n");
+		}
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block efi_random_seed_nb = {
+	.notifier_call = update_efi_random_seed,
+};
+
+static int register_update_efi_random_seed(void)
+{
+	if (efi.rng_seed == EFI_INVALID_TABLE_ADDR)
+		return 0;
+	return register_reboot_notifier(&efi_random_seed_nb);
+}
+late_initcall(register_update_efi_random_seed);
+#endif
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 2d089487d2da..85e28b138cdd 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -599,6 +599,7 @@ void efi_native_runtime_setup(void);
  */
 #define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID	EFI_GUID(0xe03fc20a, 0x85dc, 0x406e,  0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95)
 #define LINUX_EFI_LOADER_ENTRY_GUID		EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf,  0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
+#define LINUX_EFI_RANDOM_SEED_TABLE_GUID	EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2,  0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b)
 
 typedef struct {
 	efi_guid_t guid;
@@ -872,6 +873,7 @@ extern struct efi {
 	unsigned long esrt;		/* ESRT table */
 	unsigned long properties_table;	/* properties table */
 	unsigned long mem_attr_table;	/* memory attributes table */
+	unsigned long rng_seed;		/* UEFI firmware random seed */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
@@ -1493,4 +1495,10 @@ efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table,
 				    struct efi_boot_memmap *map,
 				    void *priv,
 				    efi_exit_boot_map_processing priv_func);
+
+struct linux_efi_random_seed {
+	u32	size;
+	u8	bits[];
+};
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From 568bc4e87033d232c5fd00d5b0cd22a2ccc04944 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 12 Nov 2016 21:32:33 +0000
Subject: efi/arm*/libstub: Invoke EFI_RNG_PROTOCOL to seed the UEFI RNG table

Invoke the EFI_RNG_PROTOCOL protocol in the context of the stub and
install the Linux-specific RNG seed UEFI config table. This will be
picked up by the EFI routines in the core kernel to seed the kernel
entropy pool.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20161112213237.8804-6-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/arm-stub.c |  2 ++
 drivers/firmware/efi/libstub/efistub.h  |  2 ++
 drivers/firmware/efi/libstub/random.c   | 48 +++++++++++++++++++++++++++++++++
 include/linux/efi.h                     |  1 +
 4 files changed, 53 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 993aa56755f6..b4f7d78f9e8b 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -340,6 +340,8 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	if (status != EFI_SUCCESS)
 		pr_efi_err(sys_table, "Failed initrd from command line!\n");
 
+	efi_random_get_seed(sys_table);
+
 	new_fdt_addr = fdt_addr;
 	status = allocate_new_fdt_and_exit_boot(sys_table, handle,
 				&new_fdt_addr, dram_base + MAX_FDT_OFFSET,
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index fe1f22584c69..b98824e3800a 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -71,4 +71,6 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
 
 efi_status_t check_platform_features(efi_system_table_t *sys_table_arg);
 
+efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg);
+
 #endif
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
index f8e2e5ae6872..3a3feacc329f 100644
--- a/drivers/firmware/efi/libstub/random.c
+++ b/drivers/firmware/efi/libstub/random.c
@@ -143,3 +143,51 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
 
 	return status;
 }
+
+#define RANDOM_SEED_SIZE	32
+
+efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg)
+{
+	efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID;
+	efi_guid_t rng_algo_raw = EFI_RNG_ALGORITHM_RAW;
+	efi_guid_t rng_table_guid = LINUX_EFI_RANDOM_SEED_TABLE_GUID;
+	struct efi_rng_protocol *rng;
+	struct linux_efi_random_seed *seed;
+	efi_status_t status;
+
+	status = efi_call_early(locate_protocol, &rng_proto, NULL,
+				(void **)&rng);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
+				sizeof(*seed) + RANDOM_SEED_SIZE,
+				(void **)&seed);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	status = rng->get_rng(rng, &rng_algo_raw, RANDOM_SEED_SIZE,
+			      seed->bits);
+	if (status == EFI_UNSUPPORTED)
+		/*
+		 * Use whatever algorithm we have available if the raw algorithm
+		 * is not implemented.
+		 */
+		status = rng->get_rng(rng, NULL, RANDOM_SEED_SIZE,
+				      seed->bits);
+
+	if (status != EFI_SUCCESS)
+		goto err_freepool;
+
+	seed->size = RANDOM_SEED_SIZE;
+	status = efi_call_early(install_configuration_table, &rng_table_guid,
+				seed);
+	if (status != EFI_SUCCESS)
+		goto err_freepool;
+
+	return EFI_SUCCESS;
+
+err_freepool:
+	efi_call_early(free_pool, seed);
+	return status;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 85e28b138cdd..f5a821d9b90c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -589,6 +589,7 @@ void efi_native_runtime_setup(void);
 #define DEVICE_TREE_GUID			EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5,  0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0)
 #define EFI_PROPERTIES_TABLE_GUID		EFI_GUID(0x880aaca3, 0x4adc, 0x4a04,  0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5)
 #define EFI_RNG_PROTOCOL_GUID			EFI_GUID(0x3152bca5, 0xeade, 0x433d,  0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
+#define EFI_RNG_ALGORITHM_RAW			EFI_GUID(0xe43176d7, 0xb6e8, 0x4827,  0xb7, 0x84, 0x7f, 0xfd, 0xc4, 0xb6, 0x85, 0x61)
 #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID	EFI_GUID(0xdcfa911d, 0x26eb, 0x469f,  0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
 #define EFI_CONSOLE_OUT_DEVICE_GUID		EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4,  0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
 
-- 
cgit v1.2.3


From 46cd4b75cd0edee76e0096225c2d31f8d90e92a2 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 12 Nov 2016 21:32:34 +0000
Subject: efi: Add device path parser

We're about to extended the efistub to retrieve device properties from
EFI on Apple Macs. The properties use EFI Device Paths to indicate the
device they belong to. This commit adds a parser which, given an EFI
Device Path, locates the corresponding struct device and returns a
reference to it.

Initially only ACPI and PCI Device Path nodes are supported, these are
the only types needed for Apple device properties (the corresponding
macOS function AppleACPIPlatformExpert::matchEFIDevicePath() does not
support any others). Further node types can be added with little to
moderate effort.

Apple device properties is currently the only use case of this parser,
but Peter Jones intends to use it to match up devices with the
ConInDev/ConOutDev/ErrOutDev variables and add sysfs attributes to these
devices to say the hardware supports using them as console. Thus,
make this parser a separate component which can be selected with config
option EFI_DEV_PATH_PARSER. It can in principle be compiled as a module
if acpi_get_first_physical_node() and acpi_bus_type are exported (and
efi_get_device_by_path() itself is exported).

The dependency on CONFIG_ACPI is needed for acpi_match_device_ids().
It can be removed if an empty inline stub is added for that function.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Andreas Noever <andreas.noever@gmail.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Jones <pjones@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20161112213237.8804-7-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/Kconfig           |   5 +
 drivers/firmware/efi/Makefile          |   1 +
 drivers/firmware/efi/dev-path-parser.c | 203 +++++++++++++++++++++++++++++++++
 include/linux/efi.h                    |  20 ++++
 4 files changed, 229 insertions(+)
 create mode 100644 drivers/firmware/efi/dev-path-parser.c

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index c981be17d3c0..893fda48fcdd 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -133,3 +133,8 @@ endmenu
 
 config UEFI_CPER
 	bool
+
+config EFI_DEV_PATH_PARSER
+	bool
+	depends on ACPI
+	default n
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index c8a439f6d715..3e91ae31f9d1 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_EFI_STUB)			+= libstub/
 obj-$(CONFIG_EFI_FAKE_MEMMAP)		+= fake_mem.o
 obj-$(CONFIG_EFI_BOOTLOADER_CONTROL)	+= efibc.o
 obj-$(CONFIG_EFI_TEST)			+= test/
+obj-$(CONFIG_EFI_DEV_PATH_PARSER)	+= dev-path-parser.o
 
 arm-obj-$(CONFIG_EFI)			:= arm-init.o arm-runtime.o
 obj-$(CONFIG_ARM)			+= $(arm-obj-y)
diff --git a/drivers/firmware/efi/dev-path-parser.c b/drivers/firmware/efi/dev-path-parser.c
new file mode 100644
index 000000000000..85d1834ee9b7
--- /dev/null
+++ b/drivers/firmware/efi/dev-path-parser.c
@@ -0,0 +1,203 @@
+/*
+ * dev-path-parser.c - EFI Device Path parser
+ * Copyright (C) 2016 Lukas Wunner <lukas@wunner.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/pci.h>
+
+struct acpi_hid_uid {
+	struct acpi_device_id hid[2];
+	char uid[11]; /* UINT_MAX + null byte */
+};
+
+static int __init match_acpi_dev(struct device *dev, void *data)
+{
+	struct acpi_hid_uid hid_uid = *(struct acpi_hid_uid *)data;
+	struct acpi_device *adev = to_acpi_device(dev);
+
+	if (acpi_match_device_ids(adev, hid_uid.hid))
+		return 0;
+
+	if (adev->pnp.unique_id)
+		return !strcmp(adev->pnp.unique_id, hid_uid.uid);
+	else
+		return !strcmp("0", hid_uid.uid);
+}
+
+static long __init parse_acpi_path(struct efi_dev_path *node,
+				   struct device *parent, struct device **child)
+{
+	struct acpi_hid_uid hid_uid = {};
+	struct device *phys_dev;
+
+	if (node->length != 12)
+		return -EINVAL;
+
+	sprintf(hid_uid.hid[0].id, "%c%c%c%04X",
+		'A' + ((node->acpi.hid >> 10) & 0x1f) - 1,
+		'A' + ((node->acpi.hid >>  5) & 0x1f) - 1,
+		'A' + ((node->acpi.hid >>  0) & 0x1f) - 1,
+			node->acpi.hid >> 16);
+	sprintf(hid_uid.uid, "%u", node->acpi.uid);
+
+	*child = bus_find_device(&acpi_bus_type, NULL, &hid_uid,
+				 match_acpi_dev);
+	if (!*child)
+		return -ENODEV;
+
+	phys_dev = acpi_get_first_physical_node(to_acpi_device(*child));
+	if (phys_dev) {
+		get_device(phys_dev);
+		put_device(*child);
+		*child = phys_dev;
+	}
+
+	return 0;
+}
+
+static int __init match_pci_dev(struct device *dev, void *data)
+{
+	unsigned int devfn = *(unsigned int *)data;
+
+	return dev_is_pci(dev) && to_pci_dev(dev)->devfn == devfn;
+}
+
+static long __init parse_pci_path(struct efi_dev_path *node,
+				  struct device *parent, struct device **child)
+{
+	unsigned int devfn;
+
+	if (node->length != 6)
+		return -EINVAL;
+	if (!parent)
+		return -EINVAL;
+
+	devfn = PCI_DEVFN(node->pci.dev, node->pci.fn);
+
+	*child = device_find_child(parent, &devfn, match_pci_dev);
+	if (!*child)
+		return -ENODEV;
+
+	return 0;
+}
+
+/*
+ * Insert parsers for further node types here.
+ *
+ * Each parser takes a pointer to the @node and to the @parent (will be NULL
+ * for the first device path node). If a device corresponding to @node was
+ * found below @parent, its reference count should be incremented and the
+ * device returned in @child.
+ *
+ * The return value should be 0 on success or a negative int on failure.
+ * The special return values 0x01 (EFI_DEV_END_INSTANCE) and 0xFF
+ * (EFI_DEV_END_ENTIRE) signal the end of the device path, only
+ * parse_end_path() is supposed to return this.
+ *
+ * Be sure to validate the node length and contents before commencing the
+ * search for a device.
+ */
+
+static long __init parse_end_path(struct efi_dev_path *node,
+				  struct device *parent, struct device **child)
+{
+	if (node->length != 4)
+		return -EINVAL;
+	if (node->sub_type != EFI_DEV_END_INSTANCE &&
+	    node->sub_type != EFI_DEV_END_ENTIRE)
+		return -EINVAL;
+	if (!parent)
+		return -ENODEV;
+
+	*child = get_device(parent);
+	return node->sub_type;
+}
+
+/**
+ * efi_get_device_by_path - find device by EFI Device Path
+ * @node: EFI Device Path
+ * @len: maximum length of EFI Device Path in bytes
+ *
+ * Parse a series of EFI Device Path nodes at @node and find the corresponding
+ * device.  If the device was found, its reference count is incremented and a
+ * pointer to it is returned.  The caller needs to drop the reference with
+ * put_device() after use.  The @node pointer is updated to point to the
+ * location immediately after the "End of Hardware Device Path" node.
+ *
+ * If another Device Path instance follows, @len is decremented by the number
+ * of bytes consumed.  Otherwise @len is set to %0.
+ *
+ * If a Device Path node is malformed or its corresponding device is not found,
+ * @node is updated to point to this offending node and an ERR_PTR is returned.
+ *
+ * If @len is initially %0, the function returns %NULL.  Thus, to iterate over
+ * all instances in a path, the following idiom may be used:
+ *
+ *	while (!IS_ERR_OR_NULL(dev = efi_get_device_by_path(&node, &len))) {
+ *		// do something with dev
+ *		put_device(dev);
+ *	}
+ *	if (IS_ERR(dev))
+ *		// report error
+ *
+ * Devices can only be found if they're already instantiated. Most buses
+ * instantiate devices in the "subsys" initcall level, hence the earliest
+ * initcall level in which this function should be called is "fs".
+ *
+ * Returns the device on success or
+ *	%ERR_PTR(-ENODEV) if no device was found,
+ *	%ERR_PTR(-EINVAL) if a node is malformed or exceeds @len,
+ *	%ERR_PTR(-ENOTSUPP) if support for a node type is not yet implemented.
+ */
+struct device * __init efi_get_device_by_path(struct efi_dev_path **node,
+					      size_t *len)
+{
+	struct device *parent = NULL, *child;
+	long ret = 0;
+
+	if (!*len)
+		return NULL;
+
+	while (!ret) {
+		if (*len < 4 || *len < (*node)->length)
+			ret = -EINVAL;
+		else if ((*node)->type     == EFI_DEV_ACPI &&
+			 (*node)->sub_type == EFI_DEV_BASIC_ACPI)
+			ret = parse_acpi_path(*node, parent, &child);
+		else if ((*node)->type     == EFI_DEV_HW &&
+			 (*node)->sub_type == EFI_DEV_PCI)
+			ret = parse_pci_path(*node, parent, &child);
+		else if (((*node)->type    == EFI_DEV_END_PATH ||
+			  (*node)->type    == EFI_DEV_END_PATH2))
+			ret = parse_end_path(*node, parent, &child);
+		else
+			ret = -ENOTSUPP;
+
+		put_device(parent);
+		if (ret < 0)
+			return ERR_PTR(ret);
+
+		parent = child;
+		*node  = (void *)*node + (*node)->length;
+		*len  -= (*node)->length;
+	}
+
+	if (ret == EFI_DEV_END_ENTIRE)
+		*len = 0;
+
+	return child;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index f5a821d9b90c..261767212c47 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1148,6 +1148,26 @@ struct efi_generic_dev_path {
 	u16 length;
 } __attribute ((packed));
 
+struct efi_dev_path {
+	u8 type;	/* can be replaced with unnamed */
+	u8 sub_type;	/* struct efi_generic_dev_path; */
+	u16 length;	/* once we've moved to -std=c11 */
+	union {
+		struct {
+			u32 hid;
+			u32 uid;
+		} acpi;
+		struct {
+			u8 fn;
+			u8 dev;
+		} pci;
+	};
+} __attribute ((packed));
+
+#if IS_ENABLED(CONFIG_EFI_DEV_PATH_PARSER)
+struct device *efi_get_device_by_path(struct efi_dev_path **node, size_t *len);
+#endif
+
 static inline void memrange_efi_to_native(u64 *addr, u64 *npages)
 {
 	*npages = PFN_UP(*addr + (*npages<<EFI_PAGE_SHIFT)) - PFN_DOWN(*addr);
-- 
cgit v1.2.3


From 58c5475aba67706b31d9237808d5d3d54074e5ea Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 12 Nov 2016 21:32:36 +0000
Subject: x86/efi: Retrieve and assign Apple device properties
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apple's EFI drivers supply device properties which are needed to support
Macs optimally. They contain vital information which cannot be obtained
any other way (e.g. Thunderbolt Device ROM). They're also used to convey
the current device state so that OS drivers can pick up where EFI
drivers left (e.g. GPU mode setting).

There's an EFI driver dubbed "AAPL,PathProperties" which implements a
per-device key/value store. Other EFI drivers populate it using a custom
protocol. The macOS bootloader /System/Library/CoreServices/boot.efi
retrieves the properties with the same protocol. The kernel extension
AppleACPIPlatform.kext subsequently merges them into the I/O Kit
registry (see ioreg(8)) where they can be queried by other kernel
extensions and user space.

This commit extends the efistub to retrieve the device properties before
ExitBootServices is called. It assigns them to devices in an fs_initcall
so that they can be queried with the API in <linux/property.h>.

Note that the device properties will only be available if the kernel is
booted with the efistub. Distros should adjust their installers to
always use the efistub on Macs. grub with the "linux" directive will not
work unless the functionality of this commit is duplicated in grub.
(The "linuxefi" directive should work but is not included upstream as of
this writing.)

The custom protocol has GUID 91BD12FE-F6C3-44FB-A5B7-5122AB303AE0 and
looks like this:

typedef struct {
	unsigned long version; /* 0x10000 */
	efi_status_t (*get) (
		IN	struct apple_properties_protocol *this,
		IN	struct efi_dev_path *device,
		IN	efi_char16_t *property_name,
		OUT	void *buffer,
		IN OUT	u32 *buffer_len);
		/* EFI_SUCCESS, EFI_NOT_FOUND, EFI_BUFFER_TOO_SMALL */
	efi_status_t (*set) (
		IN	struct apple_properties_protocol *this,
		IN	struct efi_dev_path *device,
		IN	efi_char16_t *property_name,
		IN	void *property_value,
		IN	u32 property_value_len);
		/* allocates copies of property name and value */
		/* EFI_SUCCESS, EFI_OUT_OF_RESOURCES */
	efi_status_t (*del) (
		IN	struct apple_properties_protocol *this,
		IN	struct efi_dev_path *device,
		IN	efi_char16_t *property_name);
		/* EFI_SUCCESS, EFI_NOT_FOUND */
	efi_status_t (*get_all) (
		IN	struct apple_properties_protocol *this,
		OUT	void *buffer,
		IN OUT	u32 *buffer_len);
		/* EFI_SUCCESS, EFI_BUFFER_TOO_SMALL */
} apple_properties_protocol;

Thanks to Pedro Vilaça for this blog post which was helpful in reverse
engineering Apple's EFI drivers and bootloader:
https://reverse.put.as/2016/06/25/apple-efi-firmware-passwords-and-the-scbo-myth/

If someone at Apple is reading this, please note there's a memory leak
in your implementation of the del() function as the property struct is
freed but the name and value allocations are not.

Neither the macOS bootloader nor Apple's EFI drivers check the protocol
version, but we do to avoid breakage if it's ever changed. It's been the
same since at least OS X 10.6 (2009).

The get_all() function conveniently fills a buffer with all properties
in marshalled form which can be passed to the kernel as a setup_data
payload. The number of device properties is dynamic and can change
between a first invocation of get_all() (to determine the buffer size)
and a second invocation (to retrieve the actual buffer), hence the
peculiar loop which does not finish until the buffer size settles.
The macOS bootloader does the same.

The setup_data payload is later on unmarshalled in an fs_initcall. The
idea is that most buses instantiate devices in "subsys" initcall level
and drivers are usually bound to these devices in "device" initcall
level, so we assign the properties in-between, i.e. in "fs" initcall
level.

This assumes that devices to which properties pertain are instantiated
from a "subsys" initcall or earlier. That should always be the case
since on macOS, AppleACPIPlatformExpert::matchEFIDevicePath() only
supports ACPI and PCI nodes and we've fully scanned those buses during
"subsys" initcall level.

The second assumption is that properties are only needed from a "device"
initcall or later. Seems reasonable to me, but should this ever not work
out, an alternative approach would be to store the property sets e.g. in
a btree early during boot. Then whenever device_add() is called, an EFI
Device Path would have to be constructed for the newly added device,
and looked up in the btree. That way, the property set could be assigned
to the device immediately on instantiation. And this would also work for
devices instantiated in a deferred fashion. It seems like this approach
would be more complicated and require more code. That doesn't seem
justified without a specific use case.

For comparison, the strategy on macOS is to assign properties to objects
in the ACPI namespace (AppleACPIPlatformExpert::mergeEFIProperties()).
That approach is definitely wrong as it fails for devices not present in
the namespace: The NHI EFI driver supplies properties for attached
Thunderbolt devices, yet on Macs with Thunderbolt 1 only one device
level behind the host controller is described in the namespace.
Consequently macOS cannot assign properties for chained devices. With
Thunderbolt 2 they started to describe three device levels behind host
controllers in the namespace but this grossly inflates the SSDT and
still fails if the user daisy-chained more than three devices.

We copy the property names and values from the setup_data payload to
swappable virtual memory and afterwards make the payload available to
the page allocator. This is just for the sake of good housekeeping, it
wouldn't occupy a meaningful amount of physical memory (4444 bytes on my
machine). Only the payload is freed, not the setup_data header since
otherwise we'd break the list linkage and we cannot safely update the
predecessor's ->next link because there's no locking for the list.

The payload is currently not passed on to kexec'ed kernels, same for PCI
ROMs retrieved by setup_efi_pci(). This can be added later if there is
demand by amending setup_efi_state(). The payload can then no longer be
made available to the page allocator of course.

Tested-by: Lukas Wunner <lukas@wunner.de> [MacBookPro9,1]
Tested-by: Pierre Moreau <pierre.morrow@free.fr> [MacBookPro11,3]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Andreas Noever <andreas.noever@gmail.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pedro Vilaça <reverser@put.as>
Cc: Peter Jones <pjones@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: grub-devel@gnu.org
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20161112213237.8804-9-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/kernel-parameters.txt     |   5 +
 arch/x86/boot/compressed/eboot.c        |  65 +++++++++
 arch/x86/include/uapi/asm/bootparam.h   |   1 +
 drivers/firmware/efi/Kconfig            |  13 ++
 drivers/firmware/efi/Makefile           |   1 +
 drivers/firmware/efi/apple-properties.c | 248 ++++++++++++++++++++++++++++++++
 include/linux/efi.h                     |  17 +++
 7 files changed, 350 insertions(+)
 create mode 100644 drivers/firmware/efi/apple-properties.c

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 37babf91f2cb..86a31dfc036e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1062,6 +1062,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	dscc4.setup=	[NET]
 
+	dump_apple_properties	[X86]
+			Dump name and content of EFI device properties on
+			x86 Macs.  Useful for driver authors to determine
+			what data is available or for reverse-engineering.
+
 	dyndbg[="val"]		[KNL,DYNAMIC_DEBUG]
 	module.dyndbg[="val"]
 			Enable debug messages at boot time.  See
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index cc69e37548db..ff01c8fc76f7 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -537,6 +537,69 @@ free_handle:
 	efi_call_early(free_pool, pci_handle);
 }
 
+static void retrieve_apple_device_properties(struct boot_params *boot_params)
+{
+	efi_guid_t guid = APPLE_PROPERTIES_PROTOCOL_GUID;
+	struct setup_data *data, *new;
+	efi_status_t status;
+	u32 size = 0;
+	void *p;
+
+	status = efi_call_early(locate_protocol, &guid, NULL, &p);
+	if (status != EFI_SUCCESS)
+		return;
+
+	if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) {
+		efi_printk(sys_table, "Unsupported properties proto version\n");
+		return;
+	}
+
+	efi_call_proto(apple_properties_protocol, get_all, p, NULL, &size);
+	if (!size)
+		return;
+
+	do {
+		status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+					size + sizeof(struct setup_data), &new);
+		if (status != EFI_SUCCESS) {
+			efi_printk(sys_table,
+					"Failed to alloc mem for properties\n");
+			return;
+		}
+
+		status = efi_call_proto(apple_properties_protocol, get_all, p,
+					new->data, &size);
+
+		if (status == EFI_BUFFER_TOO_SMALL)
+			efi_call_early(free_pool, new);
+	} while (status == EFI_BUFFER_TOO_SMALL);
+
+	new->type = SETUP_APPLE_PROPERTIES;
+	new->len  = size;
+	new->next = 0;
+
+	data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+	if (!data)
+		boot_params->hdr.setup_data = (unsigned long)new;
+	else {
+		while (data->next)
+			data = (struct setup_data *)(unsigned long)data->next;
+		data->next = (unsigned long)new;
+	}
+}
+
+static void setup_quirks(struct boot_params *boot_params)
+{
+	efi_char16_t const apple[] = { 'A', 'p', 'p', 'l', 'e', 0 };
+	efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
+		efi_table_attr(efi_system_table, fw_vendor, sys_table);
+
+	if (!memcmp(fw_vendor, apple, sizeof(apple))) {
+		if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
+			retrieve_apple_device_properties(boot_params);
+	}
+}
+
 static efi_status_t
 setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
 {
@@ -1098,6 +1161,8 @@ struct boot_params *efi_main(struct efi_config *c,
 
 	setup_efi_pci(boot_params);
 
+	setup_quirks(boot_params);
+
 	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
 				sizeof(*gdt), (void **)&gdt);
 	if (status != EFI_SUCCESS) {
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index c18ce67495fa..b10bf319ed20 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -7,6 +7,7 @@
 #define SETUP_DTB			2
 #define SETUP_PCI			3
 #define SETUP_EFI			4
+#define SETUP_APPLE_PROPERTIES		5
 
 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK	0x07FF
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 893fda48fcdd..2e78b0b96d74 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -129,6 +129,19 @@ config EFI_TEST
 	  Say Y here to enable the runtime services support via /dev/efi_test.
 	  If unsure, say N.
 
+config APPLE_PROPERTIES
+	bool "Apple Device Properties"
+	depends on EFI_STUB && X86
+	select EFI_DEV_PATH_PARSER
+	select UCS2_STRING
+	help
+	  Retrieve properties from EFI on Apple Macs and assign them to
+	  devices, allowing for improved support of Apple hardware.
+	  Properties that would otherwise be missing include the
+	  Thunderbolt Device ROM and GPU configuration data.
+
+	  If unsure, say Y if you have a Mac.  Otherwise N.
+
 endmenu
 
 config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 3e91ae31f9d1..ad67342313ed 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_EFI_FAKE_MEMMAP)		+= fake_mem.o
 obj-$(CONFIG_EFI_BOOTLOADER_CONTROL)	+= efibc.o
 obj-$(CONFIG_EFI_TEST)			+= test/
 obj-$(CONFIG_EFI_DEV_PATH_PARSER)	+= dev-path-parser.o
+obj-$(CONFIG_APPLE_PROPERTIES)		+= apple-properties.o
 
 arm-obj-$(CONFIG_EFI)			:= arm-init.o arm-runtime.o
 obj-$(CONFIG_ARM)			+= $(arm-obj-y)
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
new file mode 100644
index 000000000000..c473f4c5ca34
--- /dev/null
+++ b/drivers/firmware/efi/apple-properties.c
@@ -0,0 +1,248 @@
+/*
+ * apple-properties.c - EFI device properties on Macs
+ * Copyright (C) 2016 Lukas Wunner <lukas@wunner.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "apple-properties: " fmt
+
+#include <linux/bootmem.h>
+#include <linux/dmi.h>
+#include <linux/efi.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <linux/ucs2_string.h>
+#include <asm/setup.h>
+
+static bool dump_properties __initdata;
+
+static int __init dump_properties_enable(char *arg)
+{
+	dump_properties = true;
+	return 0;
+}
+
+__setup("dump_apple_properties", dump_properties_enable);
+
+struct dev_header {
+	u32 len;
+	u32 prop_count;
+	struct efi_dev_path path[0];
+	/*
+	 * followed by key/value pairs, each key and value preceded by u32 len,
+	 * len includes itself, value may be empty (in which case its len is 4)
+	 */
+};
+
+struct properties_header {
+	u32 len;
+	u32 version;
+	u32 dev_count;
+	struct dev_header dev_header[0];
+};
+
+static u8 one __initdata = 1;
+
+static void __init unmarshal_key_value_pairs(struct dev_header *dev_header,
+					     struct device *dev, void *ptr,
+					     struct property_entry entry[])
+{
+	int i;
+
+	for (i = 0; i < dev_header->prop_count; i++) {
+		int remaining = dev_header->len - (ptr - (void *)dev_header);
+		u32 key_len, val_len;
+		char *key;
+
+		if (sizeof(key_len) > remaining)
+			break;
+
+		key_len = *(typeof(key_len) *)ptr;
+		if (key_len + sizeof(val_len) > remaining ||
+		    key_len < sizeof(key_len) + sizeof(efi_char16_t) ||
+		    *(efi_char16_t *)(ptr + sizeof(key_len)) == 0) {
+			dev_err(dev, "invalid property name len at %#zx\n",
+				ptr - (void *)dev_header);
+			break;
+		}
+
+		val_len = *(typeof(val_len) *)(ptr + key_len);
+		if (key_len + val_len > remaining ||
+		    val_len < sizeof(val_len)) {
+			dev_err(dev, "invalid property val len at %#zx\n",
+				ptr - (void *)dev_header + key_len);
+			break;
+		}
+
+		/* 4 bytes to accommodate UTF-8 code points + null byte */
+		key = kzalloc((key_len - sizeof(key_len)) * 4 + 1, GFP_KERNEL);
+		if (!key) {
+			dev_err(dev, "cannot allocate property name\n");
+			break;
+		}
+		ucs2_as_utf8(key, ptr + sizeof(key_len),
+			     key_len - sizeof(key_len));
+
+		entry[i].name = key;
+		entry[i].is_array = true;
+		entry[i].length = val_len - sizeof(val_len);
+		entry[i].pointer.raw_data = ptr + key_len + sizeof(val_len);
+		if (!entry[i].length) {
+			/* driver core doesn't accept empty properties */
+			entry[i].length = 1;
+			entry[i].pointer.raw_data = &one;
+		}
+
+		if (dump_properties) {
+			dev_info(dev, "property: %s\n", entry[i].name);
+			print_hex_dump(KERN_INFO, pr_fmt(), DUMP_PREFIX_OFFSET,
+				16, 1, entry[i].pointer.raw_data,
+				entry[i].length, true);
+		}
+
+		ptr += key_len + val_len;
+	}
+
+	if (i != dev_header->prop_count) {
+		dev_err(dev, "got %d device properties, expected %u\n", i,
+			dev_header->prop_count);
+		print_hex_dump(KERN_ERR, pr_fmt(), DUMP_PREFIX_OFFSET,
+			16, 1, dev_header, dev_header->len, true);
+		return;
+	}
+
+	dev_info(dev, "assigning %d device properties\n", i);
+}
+
+static int __init unmarshal_devices(struct properties_header *properties)
+{
+	size_t offset = offsetof(struct properties_header, dev_header[0]);
+
+	while (offset + sizeof(struct dev_header) < properties->len) {
+		struct dev_header *dev_header = (void *)properties + offset;
+		struct property_entry *entry = NULL;
+		struct device *dev;
+		size_t len;
+		int ret, i;
+		void *ptr;
+
+		if (offset + dev_header->len > properties->len ||
+		    dev_header->len <= sizeof(*dev_header)) {
+			pr_err("invalid len in dev_header at %#zx\n", offset);
+			return -EINVAL;
+		}
+
+		ptr = dev_header->path;
+		len = dev_header->len - sizeof(*dev_header);
+
+		dev = efi_get_device_by_path((struct efi_dev_path **)&ptr, &len);
+		if (IS_ERR(dev)) {
+			pr_err("device path parse error %ld at %#zx:\n",
+			       PTR_ERR(dev), ptr - (void *)dev_header);
+			print_hex_dump(KERN_ERR, pr_fmt(), DUMP_PREFIX_OFFSET,
+			       16, 1, dev_header, dev_header->len, true);
+			dev = NULL;
+			goto skip_device;
+		}
+
+		entry = kcalloc(dev_header->prop_count + 1, sizeof(*entry),
+				GFP_KERNEL);
+		if (!entry) {
+			dev_err(dev, "cannot allocate properties\n");
+			goto skip_device;
+		}
+
+		unmarshal_key_value_pairs(dev_header, dev, ptr, entry);
+		if (!entry[0].name)
+			goto skip_device;
+
+		ret = device_add_properties(dev, entry); /* makes deep copy */
+		if (ret)
+			dev_err(dev, "error %d assigning properties\n", ret);
+
+		for (i = 0; entry[i].name; i++)
+			kfree(entry[i].name);
+
+skip_device:
+		kfree(entry);
+		put_device(dev);
+		offset += dev_header->len;
+	}
+
+	return 0;
+}
+
+static int __init map_properties(void)
+{
+	struct properties_header *properties;
+	struct setup_data *data;
+	u32 data_len;
+	u64 pa_data;
+	int ret;
+
+	if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc.") &&
+	    !dmi_match(DMI_SYS_VENDOR, "Apple Computer, Inc."))
+		return 0;
+
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = ioremap(pa_data, sizeof(*data));
+		if (!data) {
+			pr_err("cannot map setup_data header\n");
+			return -ENOMEM;
+		}
+
+		if (data->type != SETUP_APPLE_PROPERTIES) {
+			pa_data = data->next;
+			iounmap(data);
+			continue;
+		}
+
+		data_len = data->len;
+		iounmap(data);
+
+		data = ioremap(pa_data, sizeof(*data) + data_len);
+		if (!data) {
+			pr_err("cannot map setup_data payload\n");
+			return -ENOMEM;
+		}
+
+		properties = (struct properties_header *)data->data;
+		if (properties->version != 1) {
+			pr_err("unsupported version:\n");
+			print_hex_dump(KERN_ERR, pr_fmt(), DUMP_PREFIX_OFFSET,
+			       16, 1, properties, data_len, true);
+			ret = -ENOTSUPP;
+		} else if (properties->len != data_len) {
+			pr_err("length mismatch, expected %u\n", data_len);
+			print_hex_dump(KERN_ERR, pr_fmt(), DUMP_PREFIX_OFFSET,
+			       16, 1, properties, data_len, true);
+			ret = -EINVAL;
+		} else
+			ret = unmarshal_devices(properties);
+
+		/*
+		 * Can only free the setup_data payload but not its header
+		 * to avoid breaking the chain of ->next pointers.
+		 */
+		data->len = 0;
+		iounmap(data);
+		free_bootmem_late(pa_data + sizeof(*data), data_len);
+
+		return ret;
+	}
+	return 0;
+}
+
+fs_initcall(map_properties);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 261767212c47..a07a476178cd 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -443,6 +443,22 @@ typedef struct {
 #define EFI_PCI_IO_ATTRIBUTE_VGA_PALETTE_IO_16 0x20000
 #define EFI_PCI_IO_ATTRIBUTE_VGA_IO_16 0x40000
 
+typedef struct {
+	u32 version;
+	u32 get;
+	u32 set;
+	u32 del;
+	u32 get_all;
+} apple_properties_protocol_32_t;
+
+typedef struct {
+	u64 version;
+	u64 get;
+	u64 set;
+	u64 del;
+	u64 get_all;
+} apple_properties_protocol_64_t;
+
 /*
  * Types and defines for EFI ResetSystem
  */
@@ -592,6 +608,7 @@ void efi_native_runtime_setup(void);
 #define EFI_RNG_ALGORITHM_RAW			EFI_GUID(0xe43176d7, 0xb6e8, 0x4827,  0xb7, 0x84, 0x7f, 0xfd, 0xc4, 0xb6, 0x85, 0x61)
 #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID	EFI_GUID(0xdcfa911d, 0x26eb, 0x469f,  0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
 #define EFI_CONSOLE_OUT_DEVICE_GUID		EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4,  0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define APPLE_PROPERTIES_PROTOCOL_GUID		EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb,  0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0)
 
 /*
  * This GUID is used to pass to the kernel proper the struct screen_info
-- 
cgit v1.2.3


From 51239600074bc9979b0a0e83b72c726d7dcc3132 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Tue, 8 Nov 2016 12:58:51 +0100
Subject: iio:core: add a callback to allow drivers to provide _available
 attributes

A large number of attributes can only take a limited range of values.
Currently in IIO this is handled by directly registering additional
*_available attributes thus providing this information to userspace.

It is desirable to provide this information via the core for much the same
reason this was done for the actual channel information attributes in the
first place.  If it isn't there, then it can only really be accessed from
userspace.  Other in kernel IIO consumers have no access to what valid
parameters are.

Two forms are currently supported:
* list of values in one particular IIO_VAL_* format.
	e.g. 1.300000 1.500000 1.730000
* range specification with a step size:
	e.g. [1.000000 0.500000 2.500000]
	equivalent to 1.000000 1.5000000 2.000000 2.500000

An addition set of masks are used to allow different sharing rules for the
*_available attributes generated.

This allows for example:

in_accel_x_offset
in_accel_y_offset
in_accel_offset_available.

We could have gone with having a specification for each and every
info_mask element but that would have meant changing the existing userspace
ABI.  This approach does not.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
[forward ported, added some docs and fixed buffer overflows /peda]
Acked-by: Daniel Baluta <daniel.baluta@intel.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-core.c | 259 +++++++++++++++++++++++++++++++++++-----
 include/linux/iio/iio.h         |  29 +++++
 include/linux/iio/types.h       |   5 +
 3 files changed, 260 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 649725bc15c1..aaca42862389 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -577,66 +577,82 @@ int of_iio_read_mount_matrix(const struct device *dev,
 #endif
 EXPORT_SYMBOL(of_iio_read_mount_matrix);
 
-/**
- * iio_format_value() - Formats a IIO value into its string representation
- * @buf:	The buffer to which the formatted value gets written
- * @type:	One of the IIO_VAL_... constants. This decides how the val
- *		and val2 parameters are formatted.
- * @size:	Number of IIO value entries contained in vals
- * @vals:	Pointer to the values, exact meaning depends on the
- *		type parameter.
- *
- * Return: 0 by default, a negative number on failure or the
- *	   total number of characters written for a type that belongs
- *	   to the IIO_VAL_... constant.
- */
-ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals)
+static ssize_t __iio_format_value(char *buf, size_t len, unsigned int type,
+				  int size, const int *vals)
 {
 	unsigned long long tmp;
+	int tmp0, tmp1;
 	bool scale_db = false;
 
 	switch (type) {
 	case IIO_VAL_INT:
-		return sprintf(buf, "%d\n", vals[0]);
+		return snprintf(buf, len, "%d", vals[0]);
 	case IIO_VAL_INT_PLUS_MICRO_DB:
 		scale_db = true;
 	case IIO_VAL_INT_PLUS_MICRO:
 		if (vals[1] < 0)
-			return sprintf(buf, "-%d.%06u%s\n", abs(vals[0]),
-				       -vals[1], scale_db ? " dB" : "");
+			return snprintf(buf, len, "-%d.%06u%s", abs(vals[0]),
+					-vals[1], scale_db ? " dB" : "");
 		else
-			return sprintf(buf, "%d.%06u%s\n", vals[0], vals[1],
-				scale_db ? " dB" : "");
+			return snprintf(buf, len, "%d.%06u%s", vals[0], vals[1],
+					scale_db ? " dB" : "");
 	case IIO_VAL_INT_PLUS_NANO:
 		if (vals[1] < 0)
-			return sprintf(buf, "-%d.%09u\n", abs(vals[0]),
-				       -vals[1]);
+			return snprintf(buf, len, "-%d.%09u", abs(vals[0]),
+					-vals[1]);
 		else
-			return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
+			return snprintf(buf, len, "%d.%09u", vals[0], vals[1]);
 	case IIO_VAL_FRACTIONAL:
 		tmp = div_s64((s64)vals[0] * 1000000000LL, vals[1]);
-		vals[0] = (int)div_s64_rem(tmp, 1000000000, &vals[1]);
-		return sprintf(buf, "%d.%09u\n", vals[0], abs(vals[1]));
+		tmp1 = vals[1];
+		tmp0 = (int)div_s64_rem(tmp, 1000000000, &tmp1);
+		return snprintf(buf, len, "%d.%09u", tmp0, abs(tmp1));
 	case IIO_VAL_FRACTIONAL_LOG2:
 		tmp = (s64)vals[0] * 1000000000LL >> vals[1];
-		vals[1] = do_div(tmp, 1000000000LL);
-		vals[0] = tmp;
-		return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
+		tmp1 = do_div(tmp, 1000000000LL);
+		tmp0 = tmp;
+		return snprintf(buf, len, "%d.%09u", tmp0, tmp1);
 	case IIO_VAL_INT_MULTIPLE:
 	{
 		int i;
-		int len = 0;
+		int l = 0;
 
-		for (i = 0; i < size; ++i)
-			len += snprintf(&buf[len], PAGE_SIZE - len, "%d ",
-								vals[i]);
-		len += snprintf(&buf[len], PAGE_SIZE - len, "\n");
-		return len;
+		for (i = 0; i < size; ++i) {
+			l += snprintf(&buf[l], len - l, "%d ", vals[i]);
+			if (l >= len)
+				break;
+		}
+		return l;
 	}
 	default:
 		return 0;
 	}
 }
+
+/**
+ * iio_format_value() - Formats a IIO value into its string representation
+ * @buf:	The buffer to which the formatted value gets written
+ *		which is assumed to be big enough (i.e. PAGE_SIZE).
+ * @type:	One of the IIO_VAL_... constants. This decides how the val
+ *		and val2 parameters are formatted.
+ * @size:	Number of IIO value entries contained in vals
+ * @vals:	Pointer to the values, exact meaning depends on the
+ *		type parameter.
+ *
+ * Return: 0 by default, a negative number on failure or the
+ *	   total number of characters written for a type that belongs
+ *	   to the IIO_VAL_... constant.
+ */
+ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals)
+{
+	ssize_t len;
+
+	len = __iio_format_value(buf, PAGE_SIZE, type, size, vals);
+	if (len >= PAGE_SIZE - 1)
+		return -EFBIG;
+
+	return len + sprintf(buf + len, "\n");
+}
 EXPORT_SYMBOL_GPL(iio_format_value);
 
 static ssize_t iio_read_channel_info(struct device *dev,
@@ -664,6 +680,119 @@ static ssize_t iio_read_channel_info(struct device *dev,
 	return iio_format_value(buf, ret, val_len, vals);
 }
 
+static ssize_t iio_format_avail_list(char *buf, const int *vals,
+				     int type, int length)
+{
+	int i;
+	ssize_t len = 0;
+
+	switch (type) {
+	case IIO_VAL_INT:
+		for (i = 0; i < length; i++) {
+			len += __iio_format_value(buf + len, PAGE_SIZE - len,
+						  type, 1, &vals[i]);
+			if (len >= PAGE_SIZE)
+				return -EFBIG;
+			if (i < length - 1)
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						" ");
+			else
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						"\n");
+			if (len >= PAGE_SIZE)
+				return -EFBIG;
+		}
+		break;
+	default:
+		for (i = 0; i < length / 2; i++) {
+			len += __iio_format_value(buf + len, PAGE_SIZE - len,
+						  type, 2, &vals[i * 2]);
+			if (len >= PAGE_SIZE)
+				return -EFBIG;
+			if (i < length / 2 - 1)
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						" ");
+			else
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						"\n");
+			if (len >= PAGE_SIZE)
+				return -EFBIG;
+		}
+	}
+
+	return len;
+}
+
+static ssize_t iio_format_avail_range(char *buf, const int *vals, int type)
+{
+	int i;
+	ssize_t len;
+
+	len = snprintf(buf, PAGE_SIZE, "[");
+	switch (type) {
+	case IIO_VAL_INT:
+		for (i = 0; i < 3; i++) {
+			len += __iio_format_value(buf + len, PAGE_SIZE - len,
+						  type, 1, &vals[i]);
+			if (len >= PAGE_SIZE)
+				return -EFBIG;
+			if (i < 2)
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						" ");
+			else
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						"]\n");
+			if (len >= PAGE_SIZE)
+				return -EFBIG;
+		}
+		break;
+	default:
+		for (i = 0; i < 3; i++) {
+			len += __iio_format_value(buf + len, PAGE_SIZE - len,
+						  type, 2, &vals[i * 2]);
+			if (len >= PAGE_SIZE)
+				return -EFBIG;
+			if (i < 2)
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						" ");
+			else
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						"]\n");
+			if (len >= PAGE_SIZE)
+				return -EFBIG;
+		}
+	}
+
+	return len;
+}
+
+static ssize_t iio_read_channel_info_avail(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
+	const int *vals;
+	int ret;
+	int length;
+	int type;
+
+	ret = indio_dev->info->read_avail(indio_dev, this_attr->c,
+					  &vals, &type, &length,
+					  this_attr->address);
+
+	if (ret < 0)
+		return ret;
+	switch (ret) {
+	case IIO_AVAIL_LIST:
+		return iio_format_avail_list(buf, vals, type, length);
+	case IIO_AVAIL_RANGE:
+		return iio_format_avail_range(buf, vals, type);
+	default:
+		return -EINVAL;
+	}
+}
+
 /**
  * iio_str_to_fixpoint() - Parse a fixed-point number from a string
  * @str: The string to parse
@@ -980,6 +1109,40 @@ static int iio_device_add_info_mask_type(struct iio_dev *indio_dev,
 	return attrcount;
 }
 
+static int iio_device_add_info_mask_type_avail(struct iio_dev *indio_dev,
+					       struct iio_chan_spec const *chan,
+					       enum iio_shared_by shared_by,
+					       const long *infomask)
+{
+	int i, ret, attrcount = 0;
+	char *avail_postfix;
+
+	for_each_set_bit(i, infomask, sizeof(infomask) * 8) {
+		avail_postfix = kasprintf(GFP_KERNEL,
+					  "%s_available",
+					  iio_chan_info_postfix[i]);
+		if (!avail_postfix)
+			return -ENOMEM;
+
+		ret = __iio_add_chan_devattr(avail_postfix,
+					     chan,
+					     &iio_read_channel_info_avail,
+					     NULL,
+					     i,
+					     shared_by,
+					     &indio_dev->dev,
+					     &indio_dev->channel_attr_list);
+		kfree(avail_postfix);
+		if ((ret == -EBUSY) && (shared_by != IIO_SEPARATE))
+			continue;
+		else if (ret < 0)
+			return ret;
+		attrcount++;
+	}
+
+	return attrcount;
+}
+
 static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
 					struct iio_chan_spec const *chan)
 {
@@ -995,6 +1158,14 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
 		return ret;
 	attrcount += ret;
 
+	ret = iio_device_add_info_mask_type_avail(indio_dev, chan,
+						  IIO_SEPARATE,
+						  &chan->
+						  info_mask_separate_available);
+	if (ret < 0)
+		return ret;
+	attrcount += ret;
+
 	ret = iio_device_add_info_mask_type(indio_dev, chan,
 					    IIO_SHARED_BY_TYPE,
 					    &chan->info_mask_shared_by_type);
@@ -1002,6 +1173,14 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
 		return ret;
 	attrcount += ret;
 
+	ret = iio_device_add_info_mask_type_avail(indio_dev, chan,
+						  IIO_SHARED_BY_TYPE,
+						  &chan->
+						  info_mask_shared_by_type_available);
+	if (ret < 0)
+		return ret;
+	attrcount += ret;
+
 	ret = iio_device_add_info_mask_type(indio_dev, chan,
 					    IIO_SHARED_BY_DIR,
 					    &chan->info_mask_shared_by_dir);
@@ -1009,6 +1188,13 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
 		return ret;
 	attrcount += ret;
 
+	ret = iio_device_add_info_mask_type_avail(indio_dev, chan,
+						  IIO_SHARED_BY_DIR,
+						  &chan->info_mask_shared_by_dir_available);
+	if (ret < 0)
+		return ret;
+	attrcount += ret;
+
 	ret = iio_device_add_info_mask_type(indio_dev, chan,
 					    IIO_SHARED_BY_ALL,
 					    &chan->info_mask_shared_by_all);
@@ -1016,6 +1202,13 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
 		return ret;
 	attrcount += ret;
 
+	ret = iio_device_add_info_mask_type_avail(indio_dev, chan,
+						  IIO_SHARED_BY_ALL,
+						  &chan->info_mask_shared_by_all_available);
+	if (ret < 0)
+		return ret;
+	attrcount += ret;
+
 	if (chan->ext_info) {
 		unsigned int i = 0;
 		for (ext_info = chan->ext_info; ext_info->name; ext_info++) {
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 4591d8ea41bd..849d524645e8 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -225,12 +225,22 @@ struct iio_event_spec {
  *			endianness:	little or big endian
  * @info_mask_separate: What information is to be exported that is specific to
  *			this channel.
+ * @info_mask_separate_available: What availability information is to be
+ *			exported that is specific to this channel.
  * @info_mask_shared_by_type: What information is to be exported that is shared
  *			by all channels of the same type.
+ * @info_mask_shared_by_type_available: What availability information is to be
+ *			exported that is shared by all channels of the same
+ *			type.
  * @info_mask_shared_by_dir: What information is to be exported that is shared
  *			by all channels of the same direction.
+ * @info_mask_shared_by_dir_available: What availability information is to be
+ *			exported that is shared by all channels of the same
+ *			direction.
  * @info_mask_shared_by_all: What information is to be exported that is shared
  *			by all channels.
+ * @info_mask_shared_by_all_available: What availability information is to be
+ *			exported that is shared by all channels.
  * @event_spec:		Array of events which should be registered for this
  *			channel.
  * @num_event_specs:	Size of the event_spec array.
@@ -269,9 +279,13 @@ struct iio_chan_spec {
 		enum iio_endian endianness;
 	} scan_type;
 	long			info_mask_separate;
+	long			info_mask_separate_available;
 	long			info_mask_shared_by_type;
+	long			info_mask_shared_by_type_available;
 	long			info_mask_shared_by_dir;
+	long			info_mask_shared_by_dir_available;
 	long			info_mask_shared_by_all;
+	long			info_mask_shared_by_all_available;
 	const struct iio_event_spec *event_spec;
 	unsigned int		num_event_specs;
 	const struct iio_chan_spec_ext_info *ext_info;
@@ -349,6 +363,14 @@ struct iio_dev;
  *			max_len specifies maximum number of elements
  *			vals pointer can contain. val_len is used to return
  *			length of valid elements in vals.
+ * @read_avail:		function to return the available values from the device.
+ *			mask specifies which value. Note 0 means the available
+ *			values for the channel in question.  Return value
+ *			specifies if a IIO_AVAIL_LIST or a IIO_AVAIL_RANGE is
+ *			returned in vals. The type of the vals are returned in
+ *			type and the number of vals is returned in length. For
+ *			ranges, there are always three vals returned; min, step
+ *			and max. For lists, all possible values are enumerated.
  * @write_raw:		function to write a value to the device.
  *			Parameters are the same as for read_raw.
  * @write_raw_get_fmt:	callback function to query the expected
@@ -397,6 +419,13 @@ struct iio_info {
 			int *val_len,
 			long mask);
 
+	int (*read_avail)(struct iio_dev *indio_dev,
+			  struct iio_chan_spec const *chan,
+			  const int **vals,
+			  int *type,
+			  int *length,
+			  long mask);
+
 	int (*write_raw)(struct iio_dev *indio_dev,
 			 struct iio_chan_spec const *chan,
 			 int val,
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 32b579525004..2aa7b6384d64 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -29,4 +29,9 @@ enum iio_event_info {
 #define IIO_VAL_FRACTIONAL 10
 #define IIO_VAL_FRACTIONAL_LOG2 11
 
+enum iio_available_type {
+	IIO_AVAIL_LIST,
+	IIO_AVAIL_RANGE,
+};
+
 #endif /* _IIO_TYPES_H_ */
-- 
cgit v1.2.3


From 00c5f80c2fad5368cd5bfa6c9d90e75a9041ac16 Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Tue, 8 Nov 2016 12:58:52 +0100
Subject: iio: inkern: add helpers to query available values from channels

Specifically a helper for reading the available maximum raw value of a
channel and a helper for forwarding read_avail requests for raw values
from one iio driver to an iio channel that is consumed.

These rather specific helpers are in turn built with generic helpers
making it easy to build more helpers for available values as needed.

Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/inkern.c         | 104 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/iio/consumer.h |  28 ++++++++++++
 include/linux/iio/iio.h      |  17 +++++++
 3 files changed, 149 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 29df11572858..b0f4630a163f 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -716,6 +716,110 @@ int iio_read_channel_scale(struct iio_channel *chan, int *val, int *val2)
 }
 EXPORT_SYMBOL_GPL(iio_read_channel_scale);
 
+static int iio_channel_read_avail(struct iio_channel *chan,
+				  const int **vals, int *type, int *length,
+				  enum iio_chan_info_enum info)
+{
+	if (!iio_channel_has_available(chan->channel, info))
+		return -EINVAL;
+
+	return chan->indio_dev->info->read_avail(chan->indio_dev, chan->channel,
+						 vals, type, length, info);
+}
+
+int iio_read_avail_channel_raw(struct iio_channel *chan,
+			       const int **vals, int *length)
+{
+	int ret;
+	int type;
+
+	mutex_lock(&chan->indio_dev->info_exist_lock);
+	if (!chan->indio_dev->info) {
+		ret = -ENODEV;
+		goto err_unlock;
+	}
+
+	ret = iio_channel_read_avail(chan,
+				     vals, &type, length, IIO_CHAN_INFO_RAW);
+err_unlock:
+	mutex_unlock(&chan->indio_dev->info_exist_lock);
+
+	if (ret >= 0 && type != IIO_VAL_INT) {
+		/* raw values are assumed to be IIO_VAL_INT */
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iio_read_avail_channel_raw);
+
+static int iio_channel_read_max(struct iio_channel *chan,
+				int *val, int *val2, int *type,
+				enum iio_chan_info_enum info)
+{
+	int unused;
+	const int *vals;
+	int length;
+	int ret;
+
+	if (!val2)
+		val2 = &unused;
+
+	ret = iio_channel_read_avail(chan, &vals, type, &length, info);
+	switch (ret) {
+	case IIO_AVAIL_RANGE:
+		switch (*type) {
+		case IIO_VAL_INT:
+			*val = vals[2];
+			break;
+		default:
+			*val = vals[4];
+			*val2 = vals[5];
+		}
+		return 0;
+
+	case IIO_AVAIL_LIST:
+		if (length <= 0)
+			return -EINVAL;
+		switch (*type) {
+		case IIO_VAL_INT:
+			*val = vals[--length];
+			while (length) {
+				if (vals[--length] > *val)
+					*val = vals[length];
+			}
+			break;
+		default:
+			/* FIXME: learn about max for other iio values */
+			return -EINVAL;
+		}
+		return 0;
+
+	default:
+		return ret;
+	}
+}
+
+int iio_read_max_channel_raw(struct iio_channel *chan, int *val)
+{
+	int ret;
+	int type;
+
+	mutex_lock(&chan->indio_dev->info_exist_lock);
+	if (!chan->indio_dev->info) {
+		ret = -ENODEV;
+		goto err_unlock;
+	}
+
+	ret = iio_channel_read_max(chan, val, NULL, &type, IIO_CHAN_INFO_RAW);
+err_unlock:
+	mutex_unlock(&chan->indio_dev->info_exist_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iio_read_max_channel_raw);
+
 int iio_get_channel_type(struct iio_channel *chan, enum iio_chan_type *type)
 {
 	int ret = 0;
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 638157234357..47eeec3218b5 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -225,6 +225,34 @@ int iio_read_channel_processed(struct iio_channel *chan, int *val);
  */
 int iio_write_channel_raw(struct iio_channel *chan, int val);
 
+/**
+ * iio_read_max_channel_raw() - read maximum available raw value from a given
+ *				channel, i.e. the maximum possible value.
+ * @chan:		The channel being queried.
+ * @val:		Value read back.
+ *
+ * Note raw reads from iio channels are in adc counts and hence
+ * scale will need to be applied if standard units are required.
+ */
+int iio_read_max_channel_raw(struct iio_channel *chan, int *val);
+
+/**
+ * iio_read_avail_channel_raw() - read available raw values from a given channel
+ * @chan:		The channel being queried.
+ * @vals:		Available values read back.
+ * @length:		Number of entries in vals.
+ *
+ * Returns an error code, IIO_AVAIL_RANGE or IIO_AVAIL_LIST.
+ *
+ * For ranges, three vals are always returned; min, step and max.
+ * For lists, all the possible values are enumerated.
+ *
+ * Note raw available values from iio channels are in adc counts and
+ * hence scale will need to be applied if standard units are required.
+ */
+int iio_read_avail_channel_raw(struct iio_channel *chan,
+			       const int **vals, int *length);
+
 /**
  * iio_get_channel_type() - get the type of a channel
  * @channel:		The channel being queried.
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 849d524645e8..3f5ea2e9a39e 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -315,6 +315,23 @@ static inline bool iio_channel_has_info(const struct iio_chan_spec *chan,
 		(chan->info_mask_shared_by_all & BIT(type));
 }
 
+/**
+ * iio_channel_has_available() - Checks if a channel has an available attribute
+ * @chan: The channel to be queried
+ * @type: Type of the available attribute to be checked
+ *
+ * Returns true if the channel supports reporting available values for the
+ * given attribute type, false otherwise.
+ */
+static inline bool iio_channel_has_available(const struct iio_chan_spec *chan,
+					     enum iio_chan_info_enum type)
+{
+	return (chan->info_mask_separate_available & BIT(type)) |
+		(chan->info_mask_shared_by_type_available & BIT(type)) |
+		(chan->info_mask_shared_by_dir_available & BIT(type)) |
+		(chan->info_mask_shared_by_all_available & BIT(type));
+}
+
 #define IIO_CHAN_SOFT_TIMESTAMP(_si) {					\
 	.type = IIO_TIMESTAMP,						\
 	.channel = -1,							\
-- 
cgit v1.2.3


From 1c7dcf69eea3224474fe884a03b3e19b82d1101e Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Sun, 13 Nov 2016 22:20:44 +0100
Subject: fscrypt: Add in-place encryption mode

ext4 and f2fs require a bounce page when encrypting pages. However, not
all filesystems will need that (eg. UBIFS). This is handled via a
flag on fscrypt_operations where a fs implementation can select in-place
encryption over using a bounce page (which is the default).

Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/crypto.c       | 25 +++++++++++++++----------
 include/linux/fscrypto.h |  6 ++++++
 2 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 98f87fe8f186..f38dc8aac2fe 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -217,8 +217,9 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
  * @plaintext_page: The page to encrypt. Must be locked.
  * @gfp_flags:      The gfp flag for memory allocation
  *
- * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
- * encryption context.
+ * Encrypts plaintext_page using the ctx encryption context. If
+ * the filesystem supports it, encryption is performed in-place, otherwise a
+ * new ciphertext_page is allocated and returned.
  *
  * Called on the page write path.  The caller must call
  * fscrypt_restore_control_page() on the returned ciphertext page to
@@ -231,7 +232,7 @@ struct page *fscrypt_encrypt_page(struct inode *inode,
 				struct page *plaintext_page, gfp_t gfp_flags)
 {
 	struct fscrypt_ctx *ctx;
-	struct page *ciphertext_page = NULL;
+	struct page *ciphertext_page = plaintext_page;
 	int err;
 
 	BUG_ON(!PageLocked(plaintext_page));
@@ -240,10 +241,12 @@ struct page *fscrypt_encrypt_page(struct inode *inode,
 	if (IS_ERR(ctx))
 		return (struct page *)ctx;
 
-	/* The encryption operation will require a bounce page. */
-	ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
-	if (IS_ERR(ciphertext_page))
-		goto errout;
+	if (!(inode->i_sb->s_cop->flags & FS_CFLG_INPLACE_ENCRYPTION)) {
+		/* The encryption operation will require a bounce page. */
+		ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
+		if (IS_ERR(ciphertext_page))
+			goto errout;
+	}
 
 	ctx->w.control_page = plaintext_page;
 	err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index,
@@ -253,9 +256,11 @@ struct page *fscrypt_encrypt_page(struct inode *inode,
 		ciphertext_page = ERR_PTR(err);
 		goto errout;
 	}
-	SetPagePrivate(ciphertext_page);
-	set_page_private(ciphertext_page, (unsigned long)ctx);
-	lock_page(ciphertext_page);
+	if (!(inode->i_sb->s_cop->flags & FS_CFLG_INPLACE_ENCRYPTION)) {
+		SetPagePrivate(ciphertext_page);
+		set_page_private(ciphertext_page, (unsigned long)ctx);
+		lock_page(ciphertext_page);
+	}
 	return ciphertext_page;
 
 errout:
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index ff8b11b26f31..5a65b0e3773f 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -153,10 +153,16 @@ struct fscrypt_name {
 #define fname_name(p)		((p)->disk_name.name)
 #define fname_len(p)		((p)->disk_name.len)
 
+/*
+ * fscrypt superblock flags
+ */
+#define FS_CFLG_INPLACE_ENCRYPTION (1U << 1)
+
 /*
  * crypto opertions for filesystems
  */
 struct fscrypt_operations {
+	unsigned int flags;
 	int (*get_context)(struct inode *, void *, size_t);
 	int (*key_prefix)(struct inode *, u8 **);
 	int (*prepare_context)(struct inode *);
-- 
cgit v1.2.3


From b50f7b268bad8088dfe7579a65cd910d8cc5c40f Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Sun, 13 Nov 2016 22:20:45 +0100
Subject: fscrypt: Allow fscrypt_decrypt_page() to function with non-writeback
 pages

Some filesystem might pass pages which do not have page->mapping->host
set to the encrypted inode. We want the caller to explicitly pass the
corresponding inode.

Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/crypto.c       | 11 ++++++-----
 fs/ext4/inode.c          |  5 +++--
 include/linux/fscrypto.h |  5 +++--
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index f38dc8aac2fe..222a70520565 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -271,7 +271,8 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
 
 /**
  * f2crypt_decrypt_page() - Decrypts a page in-place
- * @page: The page to decrypt. Must be locked.
+ * @inode: The encrypted inode to decrypt.
+ * @page:  The page to decrypt. Must be locked.
  *
  * Decrypts page in-place using the ctx encryption context.
  *
@@ -279,12 +280,12 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
  *
  * Return: Zero on success, non-zero otherwise.
  */
-int fscrypt_decrypt_page(struct page *page)
+int fscrypt_decrypt_page(struct inode *inode, struct page *page)
 {
 	BUG_ON(!PageLocked(page));
 
-	return do_page_crypto(page->mapping->host,
-			FS_DECRYPT, page->index, page, page, GFP_NOFS);
+	return do_page_crypto(inode, FS_DECRYPT, page->index, page, page,
+			GFP_NOFS);
 }
 EXPORT_SYMBOL(fscrypt_decrypt_page);
 
@@ -419,7 +420,7 @@ static void completion_pages(struct work_struct *work)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		int ret = fscrypt_decrypt_page(page);
+		int ret = fscrypt_decrypt_page(page->mapping->host, page);
 
 		if (ret) {
 			WARN_ON_ONCE(1);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9c064727ed62..4b7b842ec024 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1166,7 +1166,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 	if (unlikely(err))
 		page_zero_new_buffers(page, from, to);
 	else if (decrypt)
-		err = fscrypt_decrypt_page(page);
+		err = fscrypt_decrypt_page(page->mapping->host, page);
 	return err;
 }
 #endif
@@ -3743,7 +3743,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 			/* We expect the key to be set. */
 			BUG_ON(!fscrypt_has_encryption_key(inode));
 			BUG_ON(blocksize != PAGE_SIZE);
-			WARN_ON_ONCE(fscrypt_decrypt_page(page));
+			WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
+						page));
 		}
 	}
 	if (ext4_should_journal_data(inode)) {
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 5a65b0e3773f..8be8e9657c63 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -249,7 +249,7 @@ int fscrypt_initialize(void);
 extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t);
 extern void fscrypt_release_ctx(struct fscrypt_ctx *);
 extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t);
-extern int fscrypt_decrypt_page(struct page *);
+extern int fscrypt_decrypt_page(struct inode *, struct page *);
 extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
 extern void fscrypt_pullback_bio_page(struct page **, bool);
 extern void fscrypt_restore_control_page(struct page *);
@@ -298,7 +298,8 @@ static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i,
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline int fscrypt_notsupp_decrypt_page(struct page *p)
+static inline int fscrypt_notsupp_decrypt_page(struct inode *i,
+						struct page *p)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From 7821d4dd4589ce5af54f3e46d04a29439ba3c2e5 Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Sun, 13 Nov 2016 22:20:46 +0100
Subject: fscrypt: Enable partial page encryption

Not all filesystems work on full pages, thus we should allow them to
hand partial pages to fscrypt for en/decryption.

Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/crypto.c       | 42 ++++++++++++++++++++++++++----------------
 fs/ext4/inode.c          |  6 ++++--
 fs/ext4/page-io.c        |  2 +-
 fs/f2fs/data.c           |  2 ++
 include/linux/fscrypto.h | 16 +++++++++++-----
 5 files changed, 44 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 222a70520565..e170aa05011d 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -149,6 +149,7 @@ typedef enum {
 static int do_page_crypto(struct inode *inode,
 			fscrypt_direction_t rw, pgoff_t index,
 			struct page *src_page, struct page *dest_page,
+			unsigned int src_len, unsigned int src_offset,
 			gfp_t gfp_flags)
 {
 	struct {
@@ -179,10 +180,10 @@ static int do_page_crypto(struct inode *inode,
 	memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding));
 
 	sg_init_table(&dst, 1);
-	sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
+	sg_set_page(&dst, dest_page, src_len, src_offset);
 	sg_init_table(&src, 1);
-	sg_set_page(&src, src_page, PAGE_SIZE, 0);
-	skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, &xts_tweak);
+	sg_set_page(&src, src_page, src_len, src_offset);
+	skcipher_request_set_crypt(req, &src, &dst, src_len, &xts_tweak);
 	if (rw == FS_DECRYPT)
 		res = crypto_skcipher_decrypt(req);
 	else
@@ -213,9 +214,11 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
 
 /**
  * fscypt_encrypt_page() - Encrypts a page
- * @inode:          The inode for which the encryption should take place
- * @plaintext_page: The page to encrypt. Must be locked.
- * @gfp_flags:      The gfp flag for memory allocation
+ * @inode:            The inode for which the encryption should take place
+ * @plaintext_page:   The page to encrypt. Must be locked.
+ * @plaintext_len:    Length of plaintext within page
+ * @plaintext_offset: Offset of plaintext within page
+ * @gfp_flags:        The gfp flag for memory allocation
  *
  * Encrypts plaintext_page using the ctx encryption context. If
  * the filesystem supports it, encryption is performed in-place, otherwise a
@@ -229,13 +232,17 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
  * error value or NULL.
  */
 struct page *fscrypt_encrypt_page(struct inode *inode,
-				struct page *plaintext_page, gfp_t gfp_flags)
+				struct page *plaintext_page,
+				unsigned int plaintext_len,
+				unsigned int plaintext_offset,
+				gfp_t gfp_flags)
+
 {
 	struct fscrypt_ctx *ctx;
 	struct page *ciphertext_page = plaintext_page;
 	int err;
 
-	BUG_ON(!PageLocked(plaintext_page));
+	BUG_ON(plaintext_len % FS_CRYPTO_BLOCK_SIZE != 0);
 
 	ctx = fscrypt_get_ctx(inode, gfp_flags);
 	if (IS_ERR(ctx))
@@ -251,6 +258,7 @@ struct page *fscrypt_encrypt_page(struct inode *inode,
 	ctx->w.control_page = plaintext_page;
 	err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index,
 					plaintext_page, ciphertext_page,
+					plaintext_len, plaintext_offset,
 					gfp_flags);
 	if (err) {
 		ciphertext_page = ERR_PTR(err);
@@ -270,9 +278,11 @@ errout:
 EXPORT_SYMBOL(fscrypt_encrypt_page);
 
 /**
- * f2crypt_decrypt_page() - Decrypts a page in-place
- * @inode: The encrypted inode to decrypt.
+ * fscrypt_decrypt_page() - Decrypts a page in-place
+ * @inode: Encrypted inode to decrypt.
  * @page:  The page to decrypt. Must be locked.
+ * @len:   Number of bytes in @page to be decrypted.
+ * @offs:  Start of data in @page.
  *
  * Decrypts page in-place using the ctx encryption context.
  *
@@ -280,11 +290,10 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
  *
  * Return: Zero on success, non-zero otherwise.
  */
-int fscrypt_decrypt_page(struct inode *inode, struct page *page)
+int fscrypt_decrypt_page(struct inode *inode, struct page *page,
+			unsigned int len, unsigned int offs)
 {
-	BUG_ON(!PageLocked(page));
-
-	return do_page_crypto(inode, FS_DECRYPT, page->index, page, page,
+	return do_page_crypto(inode, FS_DECRYPT, page->index, page, page, len, offs,
 			GFP_NOFS);
 }
 EXPORT_SYMBOL(fscrypt_decrypt_page);
@@ -312,7 +321,7 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
 	while (len--) {
 		err = do_page_crypto(inode, FS_ENCRYPT, lblk,
 					ZERO_PAGE(0), ciphertext_page,
-					GFP_NOFS);
+					PAGE_SIZE, 0, GFP_NOFS);
 		if (err)
 			goto errout;
 
@@ -420,7 +429,8 @@ static void completion_pages(struct work_struct *work)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		int ret = fscrypt_decrypt_page(page->mapping->host, page);
+		int ret = fscrypt_decrypt_page(page->mapping->host, page,
+				PAGE_SIZE, 0);
 
 		if (ret) {
 			WARN_ON_ONCE(1);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4b7b842ec024..1d498c5e2990 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1166,7 +1166,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 	if (unlikely(err))
 		page_zero_new_buffers(page, from, to);
 	else if (decrypt)
-		err = fscrypt_decrypt_page(page->mapping->host, page);
+		err = fscrypt_decrypt_page(page->mapping->host, page,
+				PAGE_SIZE, 0);
 	return err;
 }
 #endif
@@ -3743,8 +3744,9 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 			/* We expect the key to be set. */
 			BUG_ON(!fscrypt_has_encryption_key(inode));
 			BUG_ON(blocksize != PAGE_SIZE);
+			BUG_ON(!PageLocked(page));
 			WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
-						page));
+						page, PAGE_SIZE, 0));
 		}
 	}
 	if (ext4_should_journal_data(inode)) {
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 0094923e5ebf..3d1d3d0f4303 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -470,7 +470,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 		gfp_t gfp_flags = GFP_NOFS;
 
 	retry_encrypt:
-		data_page = fscrypt_encrypt_page(inode, page, gfp_flags);
+		data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, gfp_flags);
 		if (IS_ERR(data_page)) {
 			ret = PTR_ERR(data_page);
 			if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9ae194fd2fdb..fac207254e8d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1194,7 +1194,9 @@ int do_write_data_page(struct f2fs_io_info *fio)
 		f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
 							fio->old_blkaddr);
 retry_encrypt:
+		BUG_ON(!PageLocked(fio->page));
 		fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
+								PAGE_SIZE, 0,
 								gfp_flags);
 		if (IS_ERR(fio->encrypted_page)) {
 			err = PTR_ERR(fio->encrypted_page);
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 8be8e9657c63..e05931272cea 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -248,8 +248,11 @@ int fscrypt_initialize(void);
 
 extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t);
 extern void fscrypt_release_ctx(struct fscrypt_ctx *);
-extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t);
-extern int fscrypt_decrypt_page(struct inode *, struct page *);
+extern struct page *fscrypt_encrypt_page(struct inode *, struct page *,
+						unsigned int, unsigned int,
+						gfp_t);
+extern int fscrypt_decrypt_page(struct inode *, struct page *, unsigned int,
+				unsigned int);
 extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
 extern void fscrypt_pullback_bio_page(struct page **, bool);
 extern void fscrypt_restore_control_page(struct page *);
@@ -293,13 +296,16 @@ static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c)
 }
 
 static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i,
-						struct page *p, gfp_t f)
+						struct page *p,
+						unsigned int len,
+						unsigned int offs,
+						gfp_t f)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline int fscrypt_notsupp_decrypt_page(struct inode *i,
-						struct page *p)
+static inline int fscrypt_notsupp_decrypt_page(struct inode *i, struct page *p,
+						unsigned int len, unsigned int offs)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From 0b93e1b94b86f826d18a2aaf219a53e271274d49 Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Sun, 13 Nov 2016 22:20:47 +0100
Subject: fscrypt: Constify struct inode pointer

Some filesystems, such as UBIFS, maintain a const pointer for struct
inode.

Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/crypto.c       | 10 +++++-----
 fs/crypto/fname.c        |  4 ++--
 include/linux/fscrypto.h | 22 +++++++++++-----------
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index e170aa05011d..f5c5e84ea9db 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -88,7 +88,7 @@ EXPORT_SYMBOL(fscrypt_release_ctx);
  * Return: An allocated and initialized encryption context on success; error
  * value or NULL otherwise.
  */
-struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags)
+struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags)
 {
 	struct fscrypt_ctx *ctx = NULL;
 	struct fscrypt_info *ci = inode->i_crypt_info;
@@ -146,7 +146,7 @@ typedef enum {
 	FS_ENCRYPT,
 } fscrypt_direction_t;
 
-static int do_page_crypto(struct inode *inode,
+static int do_page_crypto(const struct inode *inode,
 			fscrypt_direction_t rw, pgoff_t index,
 			struct page *src_page, struct page *dest_page,
 			unsigned int src_len, unsigned int src_offset,
@@ -231,7 +231,7 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
  * Return: An allocated page with the encrypted content on success. Else, an
  * error value or NULL.
  */
-struct page *fscrypt_encrypt_page(struct inode *inode,
+struct page *fscrypt_encrypt_page(const struct inode *inode,
 				struct page *plaintext_page,
 				unsigned int plaintext_len,
 				unsigned int plaintext_offset,
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
  *
  * Return: Zero on success, non-zero otherwise.
  */
-int fscrypt_decrypt_page(struct inode *inode, struct page *page,
+int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
 			unsigned int len, unsigned int offs)
 {
 	return do_page_crypto(inode, FS_DECRYPT, page->index, page, page, len, offs,
@@ -298,7 +298,7 @@ int fscrypt_decrypt_page(struct inode *inode, struct page *page,
 }
 EXPORT_SYMBOL(fscrypt_decrypt_page);
 
-int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
+int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
 				sector_t pblk, unsigned int len)
 {
 	struct fscrypt_ctx *ctx;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 9a28133ac3b8..faeaf0be9400 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -220,7 +220,7 @@ static int digest_decode(const char *src, int len, char *dst)
 	return cp - dst;
 }
 
-u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen)
+u32 fscrypt_fname_encrypted_size(const struct inode *inode, u32 ilen)
 {
 	int padding = 32;
 	struct fscrypt_info *ci = inode->i_crypt_info;
@@ -238,7 +238,7 @@ EXPORT_SYMBOL(fscrypt_fname_encrypted_size);
  * Allocates an output buffer that is sufficient for the crypto operation
  * specified by the context and the direction.
  */
-int fscrypt_fname_alloc_buffer(struct inode *inode,
+int fscrypt_fname_alloc_buffer(const struct inode *inode,
 				u32 ilen, struct fscrypt_str *crypto_str)
 {
 	unsigned int olen = fscrypt_fname_encrypted_size(inode, ilen);
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index e05931272cea..e9be944a324c 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -212,7 +212,7 @@ static inline struct page *fscrypt_control_page(struct page *page)
 #endif
 }
 
-static inline int fscrypt_has_encryption_key(struct inode *inode)
+static inline int fscrypt_has_encryption_key(const struct inode *inode)
 {
 #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
 	return (inode->i_crypt_info != NULL);
@@ -246,17 +246,17 @@ static inline void fscrypt_set_d_op(struct dentry *dentry)
 extern struct kmem_cache *fscrypt_info_cachep;
 int fscrypt_initialize(void);
 
-extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t);
+extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
 extern void fscrypt_release_ctx(struct fscrypt_ctx *);
-extern struct page *fscrypt_encrypt_page(struct inode *, struct page *,
+extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
 						unsigned int, unsigned int,
 						gfp_t);
-extern int fscrypt_decrypt_page(struct inode *, struct page *, unsigned int,
+extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
 				unsigned int);
 extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
 extern void fscrypt_pullback_bio_page(struct page **, bool);
 extern void fscrypt_restore_control_page(struct page *);
-extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t,
+extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
 						unsigned int);
 /* policy.c */
 extern int fscrypt_process_policy(struct file *, const struct fscrypt_policy *);
@@ -273,8 +273,8 @@ extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
 extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
 				int lookup, struct fscrypt_name *);
 extern void fscrypt_free_filename(struct fscrypt_name *);
-extern u32 fscrypt_fname_encrypted_size(struct inode *, u32);
-extern int fscrypt_fname_alloc_buffer(struct inode *, u32,
+extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32);
+extern int fscrypt_fname_alloc_buffer(const struct inode *, u32,
 				struct fscrypt_str *);
 extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
 extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
@@ -284,7 +284,7 @@ extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
 #endif
 
 /* crypto.c */
-static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i,
+static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(const struct inode *i,
 							gfp_t f)
 {
 	return ERR_PTR(-EOPNOTSUPP);
@@ -295,7 +295,7 @@ static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c)
 	return;
 }
 
-static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i,
+static inline struct page *fscrypt_notsupp_encrypt_page(const struct inode *i,
 						struct page *p,
 						unsigned int len,
 						unsigned int offs,
@@ -304,7 +304,7 @@ static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i,
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline int fscrypt_notsupp_decrypt_page(struct inode *i, struct page *p,
+static inline int fscrypt_notsupp_decrypt_page(const struct inode *i, struct page *p,
 						unsigned int len, unsigned int offs)
 {
 	return -EOPNOTSUPP;
@@ -326,7 +326,7 @@ static inline void fscrypt_notsupp_restore_control_page(struct page *p)
 	return;
 }
 
-static inline int fscrypt_notsupp_zeroout_range(struct inode *i, pgoff_t p,
+static inline int fscrypt_notsupp_zeroout_range(const struct inode *i, pgoff_t p,
 					sector_t s, unsigned int f)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 9c4bb8a3a9b4de21753053d667310c2b7cb39916 Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Sun, 13 Nov 2016 22:20:48 +0100
Subject: fscrypt: Let fs select encryption index/tweak

Avoid re-use of page index as tweak for AES-XTS when multiple parts of
same page are encrypted. This will happen on multiple (partial) calls of
fscrypt_encrypt_page on same page.
page->index is only valid for writeback pages.

Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/crypto.c       | 11 +++++++----
 fs/ext4/inode.c          |  4 ++--
 fs/ext4/page-io.c        |  3 ++-
 fs/f2fs/data.c           |  5 +++--
 include/linux/fscrypto.h |  9 +++++----
 5 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index f5c5e84ea9db..b6029785714c 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -218,6 +218,8 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
  * @plaintext_page:   The page to encrypt. Must be locked.
  * @plaintext_len:    Length of plaintext within page
  * @plaintext_offset: Offset of plaintext within page
+ * @index:            Index for encryption. This is mainly the page index, but
+ *                    but might be different for multiple calls on same page.
  * @gfp_flags:        The gfp flag for memory allocation
  *
  * Encrypts plaintext_page using the ctx encryption context. If
@@ -235,7 +237,7 @@ struct page *fscrypt_encrypt_page(const struct inode *inode,
 				struct page *plaintext_page,
 				unsigned int plaintext_len,
 				unsigned int plaintext_offset,
-				gfp_t gfp_flags)
+				pgoff_t index, gfp_t gfp_flags)
 
 {
 	struct fscrypt_ctx *ctx;
@@ -256,7 +258,7 @@ struct page *fscrypt_encrypt_page(const struct inode *inode,
 	}
 
 	ctx->w.control_page = plaintext_page;
-	err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index,
+	err = do_page_crypto(inode, FS_ENCRYPT, index,
 					plaintext_page, ciphertext_page,
 					plaintext_len, plaintext_offset,
 					gfp_flags);
@@ -283,6 +285,7 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
  * @page:  The page to decrypt. Must be locked.
  * @len:   Number of bytes in @page to be decrypted.
  * @offs:  Start of data in @page.
+ * @index: Index for encryption.
  *
  * Decrypts page in-place using the ctx encryption context.
  *
@@ -291,7 +294,7 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
  * Return: Zero on success, non-zero otherwise.
  */
 int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
-			unsigned int len, unsigned int offs)
+			unsigned int len, unsigned int offs, pgoff_t index)
 {
 	return do_page_crypto(inode, FS_DECRYPT, page->index, page, page, len, offs,
 			GFP_NOFS);
@@ -430,7 +433,7 @@ static void completion_pages(struct work_struct *work)
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
 		int ret = fscrypt_decrypt_page(page->mapping->host, page,
-				PAGE_SIZE, 0);
+				PAGE_SIZE, 0, page->index);
 
 		if (ret) {
 			WARN_ON_ONCE(1);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1d498c5e2990..1485ac273bfb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1167,7 +1167,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 		page_zero_new_buffers(page, from, to);
 	else if (decrypt)
 		err = fscrypt_decrypt_page(page->mapping->host, page,
-				PAGE_SIZE, 0);
+				PAGE_SIZE, 0, page->index);
 	return err;
 }
 #endif
@@ -3746,7 +3746,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 			BUG_ON(blocksize != PAGE_SIZE);
 			BUG_ON(!PageLocked(page));
 			WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
-						page, PAGE_SIZE, 0));
+						page, PAGE_SIZE, 0, page->index));
 		}
 	}
 	if (ext4_should_journal_data(inode)) {
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 3d1d3d0f4303..902a3e3059b3 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -470,7 +470,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 		gfp_t gfp_flags = GFP_NOFS;
 
 	retry_encrypt:
-		data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, gfp_flags);
+		data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
+						page->index, gfp_flags);
 		if (IS_ERR(data_page)) {
 			ret = PTR_ERR(data_page);
 			if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index fac207254e8d..435590c4b341 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1196,8 +1196,9 @@ int do_write_data_page(struct f2fs_io_info *fio)
 retry_encrypt:
 		BUG_ON(!PageLocked(fio->page));
 		fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
-								PAGE_SIZE, 0,
-								gfp_flags);
+							PAGE_SIZE, 0,
+							fio->page->index,
+							gfp_flags);
 		if (IS_ERR(fio->encrypted_page)) {
 			err = PTR_ERR(fio->encrypted_page);
 			if (err == -ENOMEM) {
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index e9be944a324c..98c71e973a96 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -250,9 +250,9 @@ extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
 extern void fscrypt_release_ctx(struct fscrypt_ctx *);
 extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
 						unsigned int, unsigned int,
-						gfp_t);
+						pgoff_t, gfp_t);
 extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
-				unsigned int);
+				unsigned int, pgoff_t);
 extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
 extern void fscrypt_pullback_bio_page(struct page **, bool);
 extern void fscrypt_restore_control_page(struct page *);
@@ -299,13 +299,14 @@ static inline struct page *fscrypt_notsupp_encrypt_page(const struct inode *i,
 						struct page *p,
 						unsigned int len,
 						unsigned int offs,
-						gfp_t f)
+						pgoff_t index, gfp_t f)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline int fscrypt_notsupp_decrypt_page(const struct inode *i, struct page *p,
-						unsigned int len, unsigned int offs)
+						unsigned int len, unsigned int offs,
+						pgoff_t index)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From e29bd6f26703088930a5b3595623cf155ddea600 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Wed, 2 Nov 2016 11:55:33 +0000
Subject: KVM: arm64: vgic-its: Fix compatibility with 32-bit

Evaluate GITS_BASER_ENTRY_SIZE once as an int data (GITS_BASER<n>'s
Entry Size is 5-bit wide only), so when used as divider no reference
to __aeabi_uldivmod is generated when build for AArch32.

Use unsigned long long for GITS_BASER_PAGE_SIZE_* since they are
used in conjunction with 64-bit data.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h |  8 ++++----
 virt/kvm/arm/vgic/vgic-its.c       | 11 ++++++-----
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index b7e34313cdfe..0deea34494e7 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -295,10 +295,10 @@
 #define GITS_BASER_InnerShareable					\
 	GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
 #define GITS_BASER_PAGE_SIZE_SHIFT	(8)
-#define GITS_BASER_PAGE_SIZE_4K		(0UL << GITS_BASER_PAGE_SIZE_SHIFT)
-#define GITS_BASER_PAGE_SIZE_16K	(1UL << GITS_BASER_PAGE_SIZE_SHIFT)
-#define GITS_BASER_PAGE_SIZE_64K	(2UL << GITS_BASER_PAGE_SIZE_SHIFT)
-#define GITS_BASER_PAGE_SIZE_MASK	(3UL << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_4K		(0ULL << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_16K	(1ULL << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_64K	(2ULL << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_MASK	(3ULL << GITS_BASER_PAGE_SIZE_SHIFT)
 #define GITS_BASER_PAGES_MAX		256
 #define GITS_BASER_PAGES_SHIFT		(0)
 #define GITS_BASER_NR_PAGES(r)		(((r) & 0xff) + 1)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 4660a7d04eea..8c2b3cdcb2c5 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -632,21 +632,22 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
 	int index;
 	u64 indirect_ptr;
 	gfn_t gfn;
+	int esz = GITS_BASER_ENTRY_SIZE(baser);
 
 	if (!(baser & GITS_BASER_INDIRECT)) {
 		phys_addr_t addr;
 
-		if (id >= (l1_tbl_size / GITS_BASER_ENTRY_SIZE(baser)))
+		if (id >= (l1_tbl_size / esz))
 			return false;
 
-		addr = BASER_ADDRESS(baser) + id * GITS_BASER_ENTRY_SIZE(baser);
+		addr = BASER_ADDRESS(baser) + id * esz;
 		gfn = addr >> PAGE_SHIFT;
 
 		return kvm_is_visible_gfn(its->dev->kvm, gfn);
 	}
 
 	/* calculate and check the index into the 1st level */
-	index = id / (SZ_64K / GITS_BASER_ENTRY_SIZE(baser));
+	index = id / (SZ_64K / esz);
 	if (index >= (l1_tbl_size / sizeof(u64)))
 		return false;
 
@@ -670,8 +671,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
 	indirect_ptr &= GENMASK_ULL(51, 16);
 
 	/* Find the address of the actual entry */
-	index = id % (SZ_64K / GITS_BASER_ENTRY_SIZE(baser));
-	indirect_ptr += index * GITS_BASER_ENTRY_SIZE(baser);
+	index = id % (SZ_64K / esz);
+	indirect_ptr += index * esz;
 	gfn = indirect_ptr >> PAGE_SHIFT;
 
 	return kvm_is_visible_gfn(its->dev->kvm, gfn);
-- 
cgit v1.2.3


From 51f8cc9e818a9e6df376db7aeb5822d43e58cfb3 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 14 Nov 2016 12:16:26 +0000
Subject: iommu/dma: Implement dma_{map,unmap}_resource()

With the new dma_{map,unmap}_resource() functions added to the DMA API
for the benefit of cases like slave DMA, add suitable implementations to
the arsenal of our generic layer. Since cache maintenance should not be
a concern, these can both be standalone callback implementations without
the need for arch code wrappers.

CC: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 24 +++++++++++++++++++++---
 include/linux/dma-iommu.h |  4 ++++
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index c5ab8667e6f2..2db0d641cf45 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -432,13 +432,12 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
 	return ret;
 }
 
-dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size, int prot)
+static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
+		size_t size, int prot)
 {
 	dma_addr_t dma_addr;
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 	struct iova_domain *iovad = cookie_iovad(domain);
-	phys_addr_t phys = page_to_phys(page) + offset;
 	size_t iova_off = iova_offset(iovad, phys);
 	size_t len = iova_align(iovad, size + iova_off);
 	struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev));
@@ -454,6 +453,12 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 	return dma_addr + iova_off;
 }
 
+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, int prot)
+{
+	return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot);
+}
+
 void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
@@ -624,6 +629,19 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
 }
 
+dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	return __iommu_dma_map(dev, phys, size,
+			dma_direction_to_prot(dir, false) | IOMMU_MMIO);
+}
+
+void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+}
+
 int iommu_dma_supported(struct device *dev, u64 mask)
 {
 	/*
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 32c589062bd9..7f7e9a7e3839 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -61,6 +61,10 @@ void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
 		enum dma_data_direction dir, unsigned long attrs);
 void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 		enum dma_data_direction dir, unsigned long attrs);
+dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
+		size_t size, enum dma_data_direction dir, unsigned long attrs);
+void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs);
 int iommu_dma_supported(struct device *dev, u64 mask);
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
-- 
cgit v1.2.3


From 9a1dc3891255afd836f355b117fd6b975d0b1eb2 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Mon, 14 Nov 2016 15:21:14 -0600
Subject: of/pci: Add of_pci_get_max_link_speed() to parse max-link-speed from
 DT

This new helper function could be used by host drivers to get the limitaion
of max link speed provided by DT.  If the property isn't assigned or is
invalid, it will return -EINVAL to the caller.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rob Herring <robh@kernel.org>
---
 drivers/of/of_pci.c    | 21 +++++++++++++++++++++
 include/linux/of_pci.h |  7 +++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index b58be12ab277..0ee42c3e66a1 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -119,6 +119,27 @@ int of_get_pci_domain_nr(struct device_node *node)
 }
 EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
 
+/**
+ * This function will try to find the limitation of link speed by finding
+ * a property called "max-link-speed" of the given device node.
+ *
+ * @node: device tree node with the max link speed information
+ *
+ * Returns the associated max link speed from DT, or a negative value if the
+ * required property is not found or is invalid.
+ */
+int of_pci_get_max_link_speed(struct device_node *node)
+{
+	u32 max_link_speed;
+
+	if (of_property_read_u32(node, "max-link-speed", &max_link_speed) ||
+	    max_link_speed > 4)
+		return -EINVAL;
+
+	return max_link_speed;
+}
+EXPORT_SYMBOL_GPL(of_pci_get_max_link_speed);
+
 /**
  * of_pci_check_probe_only - Setup probe only mode if linux,pci-probe-only
  *                           is present and valid
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 7fd5cfce9140..0e0974eceb80 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -16,6 +16,7 @@ int of_pci_get_devfn(struct device_node *np);
 int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
+int of_pci_get_max_link_speed(struct device_node *node);
 void of_pci_check_probe_only(void);
 int of_pci_map_rid(struct device_node *np, u32 rid,
 		   const char *map_name, const char *map_mask_name,
@@ -62,6 +63,12 @@ static inline int of_pci_map_rid(struct device_node *np, u32 rid,
 	return -EINVAL;
 }
 
+static inline int
+of_pci_get_max_link_speed(struct device_node *node)
+{
+	return -EINVAL;
+}
+
 static inline void of_pci_check_probe_only(void) { }
 #endif
 
-- 
cgit v1.2.3


From 8fc947230fbc5da1d6d198c758b894f847bf2a28 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 19 Oct 2016 19:40:03 -0700
Subject: rpmsg: smd: Expose edge registration functions

The edge registration functions is to be used from a remoteproc driver
to register and unregister an edge as the remote processor comes and
goes.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/rpmsg/qcom_smd.c       |  1 +
 include/linux/rpmsg/qcom_smd.h | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 include/linux/rpmsg/qcom_smd.h

(limited to 'include/linux')

diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
index 06fef2b4c814..394a1b52e471 100644
--- a/drivers/rpmsg/qcom_smd.c
+++ b/drivers/rpmsg/qcom_smd.c
@@ -25,6 +25,7 @@
 #include <linux/soc/qcom/smem.h>
 #include <linux/wait.h>
 #include <linux/rpmsg.h>
+#include <linux/rpmsg/qcom_smd.h>
 
 #include "rpmsg_internal.h"
 
diff --git a/include/linux/rpmsg/qcom_smd.h b/include/linux/rpmsg/qcom_smd.h
new file mode 100644
index 000000000000..e674b2e3074b
--- /dev/null
+++ b/include/linux/rpmsg/qcom_smd.h
@@ -0,0 +1,33 @@
+
+#ifndef _LINUX_RPMSG_QCOM_SMD_H
+#define _LINUX_RPMSG_QCOM_SMD_H
+
+#include <linux/device.h>
+
+struct qcom_smd_edge;
+
+#if IS_ENABLED(CONFIG_RPMSG_QCOM_SMD) || IS_ENABLED(CONFIG_QCOM_SMD)
+
+struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
+					     struct device_node *node);
+int qcom_smd_unregister_edge(struct qcom_smd_edge *edge);
+
+#else
+
+static inline struct qcom_smd_edge *
+qcom_smd_register_edge(struct device *parent,
+		       struct device_node *node)
+{
+	return ERR_PTR(-ENXIO);
+}
+
+static inline int qcom_smd_unregister_edge(struct qcom_smd_edge *edge)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+	return -ENXIO;
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From aab8d8022304b646fbf6eed5f6ac9bc21d54d2fd Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 19 Oct 2016 19:40:06 -0700
Subject: remoteproc: Assign kref to rproc_vdev

No functional change

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c     | 10 ++++++++++
 drivers/remoteproc/remoteproc_internal.h |  1 +
 drivers/remoteproc/remoteproc_virtio.c   | 10 ++++++----
 include/linux/remoteproc.h               |  3 +++
 4 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index b5e314fe1f4c..67f581d0c488 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -356,6 +356,8 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 	if (!rvdev)
 		return -ENOMEM;
 
+	kref_init(&rvdev->refcount);
+
 	rvdev->rproc = rproc;
 
 	/* parse the vrings */
@@ -384,6 +386,14 @@ free_rvdev:
 	return ret;
 }
 
+void rproc_vdev_release(struct kref *ref)
+{
+	struct rproc_vdev *rvdev = container_of(ref, struct rproc_vdev, refcount);
+
+	list_del(&rvdev->node);
+	kfree(rvdev);
+}
+
 /**
  * rproc_handle_trace() - handle a shared trace buffer resource
  * @rproc: the remote processor
diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
index c2c3e4762b90..1e9e5b3f021c 100644
--- a/drivers/remoteproc/remoteproc_internal.h
+++ b/drivers/remoteproc/remoteproc_internal.h
@@ -49,6 +49,7 @@ struct rproc_fw_ops {
 void rproc_release(struct kref *kref);
 irqreturn_t rproc_vq_interrupt(struct rproc *rproc, int vq_id);
 int rproc_boot_nowait(struct rproc *rproc);
+void rproc_vdev_release(struct kref *ref);
 
 /* from remoteproc_virtio.c */
 int rproc_add_virtio_dev(struct rproc_vdev *rvdev, int id);
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 01870a16d6d2..0d1ad3ed149d 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -282,14 +282,13 @@ static const struct virtio_config_ops rproc_virtio_config_ops = {
  * Never call this function directly; it will be called by the driver
  * core when needed.
  */
-static void rproc_vdev_release(struct device *dev)
+static void rproc_virtio_dev_release(struct device *dev)
 {
 	struct virtio_device *vdev = dev_to_virtio(dev);
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct rproc *rproc = vdev_to_rproc(vdev);
 
-	list_del(&rvdev->node);
-	kfree(rvdev);
+	kref_put(&rvdev->refcount, rproc_vdev_release);
 
 	put_device(&rproc->dev);
 }
@@ -313,7 +312,7 @@ int rproc_add_virtio_dev(struct rproc_vdev *rvdev, int id)
 	vdev->id.device	= id,
 	vdev->config = &rproc_virtio_config_ops,
 	vdev->dev.parent = dev;
-	vdev->dev.release = rproc_vdev_release;
+	vdev->dev.release = rproc_virtio_dev_release;
 
 	/*
 	 * We're indirectly making a non-temporary copy of the rproc pointer
@@ -325,6 +324,9 @@ int rproc_add_virtio_dev(struct rproc_vdev *rvdev, int id)
 	 */
 	get_device(&rproc->dev);
 
+	/* Reference the vdev and vring allocations */
+	kref_get(&rvdev->refcount);
+
 	ret = register_virtio_device(vdev);
 	if (ret) {
 		put_device(&rproc->dev);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index f6d5e66854e4..5def5c84b9c0 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -487,6 +487,7 @@ struct rproc_vring {
 
 /**
  * struct rproc_vdev - remoteproc state for a supported virtio device
+ * @refcount: reference counter for the vdev and vring allocations
  * @node: list node
  * @rproc: the rproc handle
  * @vdev: the virio device
@@ -494,6 +495,8 @@ struct rproc_vring {
  * @rsc_offset: offset of the vdev's resource entry
  */
 struct rproc_vdev {
+	struct kref refcount;
+
 	struct list_head node;
 	struct rproc *rproc;
 	struct virtio_device vdev;
-- 
cgit v1.2.3


From f5bcb35387efc994cfd88f87039d7cdb6c1a06a2 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 19 Oct 2016 19:40:09 -0700
Subject: remoteproc: Decouple vdev resources and devices

Represent the virtio device part of the vdev resources as remoteproc
subdevices to finalize the decoupling of the virtio resource and device
handling.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 35 ++++++++++++++++++++---------------
 include/linux/remoteproc.h           |  5 +++++
 2 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 62de765a9498..57c66397f31f 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -296,6 +296,20 @@ void rproc_free_vring(struct rproc_vring *rvring)
 	rsc->vring[idx].notifyid = -1;
 }
 
+static int rproc_vdev_do_probe(struct rproc_subdev *subdev)
+{
+	struct rproc_vdev *rvdev = container_of(subdev, struct rproc_vdev, subdev);
+
+	return rproc_add_virtio_dev(rvdev, rvdev->id);
+}
+
+static void rproc_vdev_do_remove(struct rproc_subdev *subdev)
+{
+	struct rproc_vdev *rvdev = container_of(subdev, struct rproc_vdev, subdev);
+
+	rproc_remove_virtio_dev(rvdev);
+}
+
 /**
  * rproc_handle_vdev() - handle a vdev fw resource
  * @rproc: the remote processor
@@ -358,6 +372,7 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 
 	kref_init(&rvdev->refcount);
 
+	rvdev->id = rsc->id;
 	rvdev->rproc = rproc;
 
 	/* parse the vrings */
@@ -382,18 +397,14 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 
 	list_add_tail(&rvdev->node, &rproc->rvdevs);
 
-	/* it is now safe to add the virtio device */
-	ret = rproc_add_virtio_dev(rvdev, rsc->id);
-	if (ret)
-		goto remove_rvdev;
+	rproc_add_subdev(rproc, &rvdev->subdev,
+			 rproc_vdev_do_probe, rproc_vdev_do_remove);
 
 	return 0;
 
 unwind_vring_allocations:
 	for (i--; i >= 0; i--)
 		rproc_free_vring(&rvdev->vring[i]);
-remove_rvdev:
-	list_del(&rvdev->node);
 free_rvdev:
 	kfree(rvdev);
 	return ret;
@@ -403,6 +414,7 @@ void rproc_vdev_release(struct kref *ref)
 {
 	struct rproc_vdev *rvdev = container_of(ref, struct rproc_vdev, refcount);
 	struct rproc_vring *rvring;
+	struct rproc *rproc = rvdev->rproc;
 	int id;
 
 	for (id = 0; id < ARRAY_SIZE(rvdev->vring); id++) {
@@ -413,6 +425,7 @@ void rproc_vdev_release(struct kref *ref)
 		rproc_free_vring(rvring);
 	}
 
+	rproc_remove_subdev(rproc, &rvdev->subdev);
 	list_del(&rvdev->node);
 	kfree(rvdev);
 }
@@ -842,10 +855,8 @@ static void rproc_resource_cleanup(struct rproc *rproc)
 	}
 
 	/* clean up remote vdev entries */
-	list_for_each_entry_safe(rvdev, rvtmp, &rproc->rvdevs, node) {
-		rproc_remove_virtio_dev(rvdev);
+	list_for_each_entry_safe(rvdev, rvtmp, &rproc->rvdevs, node)
 		kref_put(&rvdev->refcount, rproc_vdev_release);
-	}
 }
 
 /*
@@ -1507,8 +1518,6 @@ EXPORT_SYMBOL(rproc_put);
  */
 int rproc_del(struct rproc *rproc)
 {
-	struct rproc_vdev *rvdev, *tmp;
-
 	if (!rproc)
 		return -EINVAL;
 
@@ -1520,10 +1529,6 @@ int rproc_del(struct rproc *rproc)
 	if (rproc->auto_boot)
 		rproc_shutdown(rproc);
 
-	/* clean up remote vdev entries */
-	list_for_each_entry_safe(rvdev, tmp, &rproc->rvdevs, node)
-		rproc_remove_virtio_dev(rvdev);
-
 	/* the rproc is downref'ed as soon as it's removed from the klist */
 	mutex_lock(&rproc_list_mutex);
 	list_del(&rproc->node);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 5def5c84b9c0..8265d351c9f0 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -488,6 +488,8 @@ struct rproc_vring {
 /**
  * struct rproc_vdev - remoteproc state for a supported virtio device
  * @refcount: reference counter for the vdev and vring allocations
+ * @subdev: handle for registering the vdev as a rproc subdevice
+ * @id: virtio device id (as in virtio_ids.h)
  * @node: list node
  * @rproc: the rproc handle
  * @vdev: the virio device
@@ -497,6 +499,9 @@ struct rproc_vring {
 struct rproc_vdev {
 	struct kref refcount;
 
+	struct rproc_subdev subdev;
+
+	unsigned int id;
 	struct list_head node;
 	struct rproc *rproc;
 	struct virtio_device vdev;
-- 
cgit v1.2.3


From cda8529346935fc86f476999ac4fbfe4e17abf11 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 19 Oct 2016 19:40:12 -0700
Subject: remoteproc: Merge table_ptr and cached_table pointers

As all vdev resources are allocated before we boot the remote processor
we no longer need to support modifying the resource table while the
remote is running.

This saves us from the table_ptr dance, but more importantly allow the
remote processor to enable security lock down of the loaded table memory
region.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 26 ++++++++++----------------
 include/linux/remoteproc.h           |  4 +---
 2 files changed, 11 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 1abd78bfce6a..f0f6ec1ab12b 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -889,15 +889,13 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 	/*
 	 * Create a copy of the resource table. When a virtio device starts
 	 * and calls vring_new_virtqueue() the address of the allocated vring
-	 * will be stored in the cached_table. Before the device is started,
-	 * cached_table will be copied into device memory.
+	 * will be stored in the table_ptr. Before the device is started,
+	 * table_ptr will be copied into device memory.
 	 */
-	rproc->cached_table = kmemdup(table, tablesz, GFP_KERNEL);
-	if (!rproc->cached_table)
+	rproc->table_ptr = kmemdup(table, tablesz, GFP_KERNEL);
+	if (!rproc->table_ptr)
 		goto clean_up;
 
-	rproc->table_ptr = rproc->cached_table;
-
 	/* reset max_notifyid */
 	rproc->max_notifyid = -1;
 
@@ -916,18 +914,16 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 	}
 
 	/*
-	 * The starting device has been given the rproc->cached_table as the
+	 * The starting device has been given the rproc->table_ptr as the
 	 * resource table. The address of the vring along with the other
-	 * allocated resources (carveouts etc) is stored in cached_table.
+	 * allocated resources (carveouts etc) is stored in table_ptr.
 	 * In order to pass this information to the remote device we must copy
 	 * this information to device memory. We also update the table_ptr so
 	 * that any subsequent changes will be applied to the loaded version.
 	 */
 	loaded_table = rproc_find_loaded_rsc_table(rproc, fw);
-	if (loaded_table) {
-		memcpy(loaded_table, rproc->cached_table, tablesz);
-		rproc->table_ptr = loaded_table;
-	}
+	if (loaded_table)
+		memcpy(loaded_table, rproc->table_ptr, tablesz);
 
 	/* power up the remote processor */
 	ret = rproc->ops->start(rproc);
@@ -955,8 +951,7 @@ stop_rproc:
 clean_up_resources:
 	rproc_resource_cleanup(rproc);
 clean_up:
-	kfree(rproc->cached_table);
-	rproc->cached_table = NULL;
+	kfree(rproc->table_ptr);
 	rproc->table_ptr = NULL;
 
 	rproc_disable_iommu(rproc);
@@ -1206,8 +1201,7 @@ void rproc_shutdown(struct rproc *rproc)
 	rproc_disable_iommu(rproc);
 
 	/* Free the copy of the resource table */
-	kfree(rproc->cached_table);
-	rproc->cached_table = NULL;
+	kfree(rproc->table_ptr);
 	rproc->table_ptr = NULL;
 
 	/* if in crash state, unlock crash handler */
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 8265d351c9f0..e2f3a3281d8f 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -408,8 +408,7 @@ enum rproc_crash_type {
  * @crash_comp: completion used to sync crash handler and the rproc reload
  * @recovery_disabled: flag that state if recovery was disabled
  * @max_notifyid: largest allocated notify id.
- * @table_ptr: pointer to the resource table in effect
- * @cached_table: copy of the resource table
+ * @table_ptr: our copy of the resource table
  * @has_iommu: flag to indicate if remote processor is behind an MMU
  */
 struct rproc {
@@ -441,7 +440,6 @@ struct rproc {
 	bool recovery_disabled;
 	int max_notifyid;
 	struct resource_table *table_ptr;
-	struct resource_table *cached_table;
 	bool has_iommu;
 	bool auto_boot;
 };
-- 
cgit v1.2.3


From 6bfec6d94556683c91c937a20576118183af9e1d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 14 Nov 2016 11:55:40 +0000
Subject: dma-buf: Use fence_get_rcu_safe() for retrieving the exclusive fence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current code is subject to a race where we may try to acquire a
reference on a stale fence:

[13703.335118] WARNING: CPU: 1 PID: 14975 at ./include/linux/kref.h:46 i915_gem_object_wait+0x1a3/0x1c0
[13703.335184] Modules linked in:
[13703.335202] CPU: 1 PID: 14975 Comm: gem_concurrent_ Not tainted 4.9.0-rc4+ #26
[13703.335216] Hardware name:                  /        , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015
[13703.335233]  ffffc90002f5bcc8 ffffffff812807de 0000000000000000 0000000000000000
[13703.335257]  ffffc90002f5bd08 ffffffff81073811 0000002e80000000 ffff88026bf7c780
[13703.335279]  7fffffffffffffff 0000000000000001 ffff88027045a550 ffff88026bf7c780
[13703.335301] Call Trace:
[13703.335316]  [<ffffffff812807de>] dump_stack+0x4d/0x6f
[13703.335331]  [<ffffffff81073811>] __warn+0xc1/0xe0
[13703.335343]  [<ffffffff810738e8>] warn_slowpath_null+0x18/0x20
[13703.335355]  [<ffffffff813ac443>] i915_gem_object_wait+0x1a3/0x1c0
[13703.335367]  [<ffffffff813ae8ec>] i915_gem_set_domain_ioctl+0xcc/0x330
[13703.335386]  [<ffffffff813534ab>] drm_ioctl+0x1cb/0x410
[13703.335400]  [<ffffffff813ae820>] ? i915_gem_obj_prepare_shmem_write+0x1d0/0x1d0
[13703.335416]  [<ffffffff8135359b>] ? drm_ioctl+0x2bb/0x410
[13703.335429]  [<ffffffff8117d32f>] do_vfs_ioctl+0x8f/0x5c0
[13703.335442]  [<ffffffff8117d89c>] SyS_ioctl+0x3c/0x70
[13703.335456]  [<ffffffff815a07a4>] entry_SYSCALL_64_fastpath+0x17/0x98
[13703.335558] ---[ end trace fd24176416ba6981 ]---
[13703.382778] general protection fault: 0000 [#1] SMP
[13703.382802] Modules linked in:
[13703.382816] CPU: 1 PID: 14967 Comm: gem_concurrent_ Tainted: G        W       4.9.0-rc4+ #26
[13703.382828] Hardware name:                  /        , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015
[13703.382841] task: ffff880275458000 task.stack: ffffc90002f18000
[13703.382849] RIP: 0010:[<ffffffff813b3534>]  [<ffffffff813b3534>] i915_gem_request_retire+0x2b4/0x320
[13703.382870] RSP: 0018:ffffc90002f1bbc8  EFLAGS: 00010293
[13703.382878] RAX: dead000000000200 RBX: ffff88026bf7dce8 RCX: dead000000000100
[13703.382887] RDX: dead000000000100 RSI: ffff88026bf7c930 RDI: ffff88026bf7dd00
[13703.382897] RBP: ffffc90002f1bbf8 R08: 00000000ffffffff R09: ffff88026b89a000
[13703.382905] R10: 0000000000000001 R11: ffff88026bbe8fe0 R12: ffff88026bf7c000
[13703.382913] R13: ffff880275af8000 R14: ffff88026bf7c180 R15: dead000000000200
[13703.382922] FS:  00007f89e787d740(0000) GS:ffff88027fd00000(0000) knlGS:0000000000000000
[13703.382934] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[13703.382942] CR2: 00007f9053d2e000 CR3: 000000026d414000 CR4: 00000000001006e0
[13703.382951] Stack:
[13703.382958]  ffff880275413000 ffffc90002f1bde8 ffff880275af8000 ffff880274e8a600
[13703.382976]  ffff880276a06000 ffffc90002f1bde8 ffffc90002f1bc38 ffffffff813b48c5
[13703.382995]  ffffc90002f1bc00 ffffc90002f1bde8 ffff88026972a440 0000000000000000
[13703.383021] Call Trace:
[13703.383032]  [<ffffffff813b48c5>] i915_gem_request_alloc+0xa5/0x350
[13703.383043]  [<ffffffff813a17c3>] i915_gem_do_execbuffer.isra.41+0x7b3/0x18b0
[13703.383055]  [<ffffffff813b144c>] ? i915_gem_object_get_sg+0x25c/0x2b0
[13703.383065]  [<ffffffff813b1d4d>] ? i915_gem_object_get_page+0x1d/0x50
[13703.383076]  [<ffffffff813b28cc>] ? i915_gem_pwrite_ioctl+0x66c/0x6d0
[13703.383086]  [<ffffffff813a2c25>] i915_gem_execbuffer2+0x95/0x1e0
[13703.383096]  [<ffffffff813534ab>] drm_ioctl+0x1cb/0x410
[13703.383105]  [<ffffffff813a2b90>] ? i915_gem_execbuffer+0x2d0/0x2d0
[13703.383117]  [<ffffffff810c3df0>] ? hrtimer_start_range_ns+0x1a0/0x310
[13703.383128]  [<ffffffff8117d32f>] do_vfs_ioctl+0x8f/0x5c0
[13703.383140]  [<ffffffff810c60e8>] ? SyS_timer_settime+0x118/0x1a0
[13703.383150]  [<ffffffff8117d89c>] SyS_ioctl+0x3c/0x70
[13703.383162]  [<ffffffff815a07a4>] entry_SYSCALL_64_fastpath+0x17/0x98
[13703.383172] Code: 49 39 c6 48 8d 70 e8 48 8d 5f e8 75 16 eb 47 48 8d 43 18 48 8b 53 18 48 89 de 49 39 c6 48 8d 5a e8 74 33 48 8b 56 08 48 8b 46 10 <48> 89 42 08 48 89 10 f6 46 38 01 48 89 4e 08 4c 89 7e 10 74 cf
[13703.383557] RIP  [<ffffffff813b3534>] i915_gem_request_retire+0x2b4/0x320
[13703.383570]  RSP <ffffc90002f1bbc8>
[13703.383586] ---[ end trace fd24176416ba6982 ]---

This is fixed by using the kref_get_unless_zero() as a full memory
barrier to validate the fence is still the current exclusive fence before
returning it back to the caller. (Note the fix only requires using
dma_fence_get_rcu() and correct handling, but we may as well use the
helper rather than inline equivalent code.)

Note: Issue can only be hit with the i915 driver.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org
Fixes: d07f0e59b2c7 ("drm/i915: Move GEM activity tracking into a common struct reservation_object")
Reviewed-by: Christian König <christian.koenig@amd.com>.
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161114115540.31155-1-chris@chris-wilson.co.uk
---
 include/linux/reservation.h | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index 2e313cca08f0..d9706a6f5ae2 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -177,17 +177,14 @@ static inline struct dma_fence *
 reservation_object_get_excl_rcu(struct reservation_object *obj)
 {
 	struct dma_fence *fence;
-	unsigned seq;
-retry:
-	seq = read_seqcount_begin(&obj->seq);
+
+	if (!rcu_access_pointer(obj->fence_excl))
+		return NULL;
+
 	rcu_read_lock();
-	fence = rcu_dereference(obj->fence_excl);
-	if (read_seqcount_retry(&obj->seq, seq)) {
-		rcu_read_unlock();
-		goto retry;
-	}
-	fence = dma_fence_get(fence);
+	fence = dma_fence_get_rcu_safe(&obj->fence_excl);
 	rcu_read_unlock();
+
 	return fence;
 }
 
-- 
cgit v1.2.3


From 40565b5aedd6d0ca88b7dfd3859d709d2f6f8cf9 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 15 Nov 2016 03:06:51 +0100
Subject: sched/cputime, powerpc, s390: Make scaled cputime arch specific

Only s390 and powerpc have hardware facilities allowing to measure
cputimes scaled by frequency. On all other architectures
utimescaled/stimescaled are equal to utime/stime (however they are
accounted separately).

Remove {u,s}timescaled accounting on all architectures except
powerpc and s390, where those values are explicitly accounted
in the proper places.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20161031162143.GB12646@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig                |  3 +++
 arch/ia64/kernel/time.c     |  4 +--
 arch/powerpc/Kconfig        |  1 +
 arch/powerpc/kernel/time.c  |  6 +++--
 arch/s390/Kconfig           |  1 +
 arch/s390/kernel/vtime.c    |  9 ++++---
 include/linux/kernel_stat.h |  4 +--
 include/linux/sched.h       | 23 ++++++++++++-----
 kernel/fork.c               |  2 ++
 kernel/sched/cputime.c      | 61 +++++++++++----------------------------------
 10 files changed, 53 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 659bdd079277..abab6590f08f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -512,6 +512,9 @@ config HAVE_CONTEXT_TRACKING
 config HAVE_VIRT_CPU_ACCOUNTING
 	bool
 
+config ARCH_HAS_SCALED_CPUTIME
+	bool
+
 config HAVE_VIRT_CPU_ACCOUNTING_GEN
 	bool
 	default y if 64BIT
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 6f892b94e906..021f44ab4bfb 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -68,7 +68,7 @@ void vtime_account_user(struct task_struct *tsk)
 
 	if (ti->ac_utime) {
 		delta_utime = cycle_to_cputime(ti->ac_utime);
-		account_user_time(tsk, delta_utime, delta_utime);
+		account_user_time(tsk, delta_utime);
 		ti->ac_utime = 0;
 	}
 }
@@ -112,7 +112,7 @@ void vtime_account_system(struct task_struct *tsk)
 {
 	cputime_t delta = vtime_delta(tsk);
 
-	account_system_time(tsk, 0, delta, delta);
+	account_system_time(tsk, 0, delta);
 }
 EXPORT_SYMBOL_GPL(vtime_account_system);
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 65fba4c34cd7..c7f120aaa98f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -160,6 +160,7 @@ config PPC
 	select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select GENERIC_CPU_AUTOPROBE
 	select HAVE_VIRT_CPU_ACCOUNTING
+	select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
 	select HAVE_ARCH_HARDENED_USERCOPY
 	select HAVE_KERNEL_GZIP
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 81051986739c..be9751f1cb2a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -358,7 +358,8 @@ void vtime_account_system(struct task_struct *tsk)
 	unsigned long delta, sys_scaled, stolen;
 
 	delta = vtime_delta(tsk, &sys_scaled, &stolen);
-	account_system_time(tsk, 0, delta, sys_scaled);
+	account_system_time(tsk, 0, delta);
+	tsk->stimescaled += sys_scaled;
 	if (stolen)
 		account_steal_time(stolen);
 }
@@ -391,7 +392,8 @@ void vtime_account_user(struct task_struct *tsk)
 	acct->user_time = 0;
 	acct->user_time_scaled = 0;
 	acct->utime_sspurr = 0;
-	account_user_time(tsk, utime, utimescaled);
+	account_user_time(tsk, utime);
+	tsk->utimescaled += utimescaled;
 }
 
 #ifdef CONFIG_PPC32
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 426481d4cc86..028f97be5bae 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -171,6 +171,7 @@ config S390
 	select SYSCTL_EXCEPTION_TRACE
 	select TTY
 	select VIRT_CPU_ACCOUNTING
+	select ARCH_HAS_SCALED_CPUTIME
 	select VIRT_TO_BUS
 	select HAVE_NMI
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 856e30d8463f..1bd5dde2d5a9 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -137,8 +137,10 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 		user_scaled = (user_scaled * mult) / div;
 		system_scaled = (system_scaled * mult) / div;
 	}
-	account_user_time(tsk, user, user_scaled);
-	account_system_time(tsk, hardirq_offset, system, system_scaled);
+	account_user_time(tsk, user);
+	tsk->utimescaled += user_scaled;
+	account_system_time(tsk, hardirq_offset, system);
+	tsk->stimescaled += system_scaled;
 
 	steal = S390_lowcore.steal_timer;
 	if ((s64) steal > 0) {
@@ -202,7 +204,8 @@ void vtime_account_irq_enter(struct task_struct *tsk)
 
 		system_scaled = (system_scaled * mult) / div;
 	}
-	account_system_time(tsk, 0, system, system_scaled);
+	account_system_time(tsk, 0, system);
+	tsk->stimescaled += system_scaled;
 
 	virt_timer_forward(system);
 }
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44fda64ad434..00f776816aa3 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -78,8 +78,8 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 	return kstat_cpu(cpu).irqs_sum;
 }
 
-extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
-extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
+extern void account_user_time(struct task_struct *, cputime_t);
+extern void account_system_time(struct task_struct *, int, cputime_t);
 extern void account_steal_time(cputime_t);
 extern void account_idle_time(cputime_t);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3762fe4e3a80..f72e81395dac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1647,7 +1647,10 @@ struct task_struct {
 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
 	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
 
-	cputime_t utime, stime, utimescaled, stimescaled;
+	cputime_t utime, stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+	cputime_t utimescaled, stimescaled;
+#endif
 	cputime_t gtime;
 	struct prev_cputime prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -2240,8 +2243,6 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask);
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void task_cputime(struct task_struct *t,
 			 cputime_t *utime, cputime_t *stime);
-extern void task_cputime_scaled(struct task_struct *t,
-				cputime_t *utimescaled, cputime_t *stimescaled);
 extern cputime_t task_gtime(struct task_struct *t);
 #else
 static inline void task_cputime(struct task_struct *t,
@@ -2253,6 +2254,13 @@ static inline void task_cputime(struct task_struct *t,
 		*stime = t->stime;
 }
 
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+#endif
+
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
 static inline void task_cputime_scaled(struct task_struct *t,
 				       cputime_t *utimescaled,
 				       cputime_t *stimescaled)
@@ -2262,12 +2270,15 @@ static inline void task_cputime_scaled(struct task_struct *t,
 	if (stimescaled)
 		*stimescaled = t->stimescaled;
 }
-
-static inline cputime_t task_gtime(struct task_struct *t)
+#else
+static inline void task_cputime_scaled(struct task_struct *t,
+				       cputime_t *utimescaled,
+				       cputime_t *stimescaled)
 {
-	return t->gtime;
+	task_cputime(t, utimescaled, stimescaled);
 }
 #endif
+
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 997ac1d584f7..600e93b5e539 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1551,7 +1551,9 @@ static __latent_entropy struct task_struct *copy_process(
 	init_sigpending(&p->pending);
 
 	p->utime = p->stime = p->gtime = 0;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
 	p->utimescaled = p->stimescaled = 0;
+#endif
 	prev_cputime_init(&p->prev_cputime);
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3229c7244fdd..ba55ebf77f9a 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -128,16 +128,13 @@ static inline void task_group_account_field(struct task_struct *p, int index,
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
  * @cputime: the cpu time spent in user space since the last update
- * @cputime_scaled: cputime scaled by cpu frequency
  */
-void account_user_time(struct task_struct *p, cputime_t cputime,
-		       cputime_t cputime_scaled)
+void account_user_time(struct task_struct *p, cputime_t cputime)
 {
 	int index;
 
 	/* Add user time to process. */
 	p->utime += cputime;
-	p->utimescaled += cputime_scaled;
 	account_group_user_time(p, cputime);
 
 	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
@@ -153,16 +150,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
  * Account guest cpu time to a process.
  * @p: the process that the cpu time gets accounted to
  * @cputime: the cpu time spent in virtual machine since the last update
- * @cputime_scaled: cputime scaled by cpu frequency
  */
-static void account_guest_time(struct task_struct *p, cputime_t cputime,
-			       cputime_t cputime_scaled)
+static void account_guest_time(struct task_struct *p, cputime_t cputime)
 {
 	u64 *cpustat = kcpustat_this_cpu->cpustat;
 
 	/* Add guest time to process. */
 	p->utime += cputime;
-	p->utimescaled += cputime_scaled;
 	account_group_user_time(p, cputime);
 	p->gtime += cputime;
 
@@ -180,16 +174,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
  * Account system cpu time to a process and desired cpustat field
  * @p: the process that the cpu time gets accounted to
  * @cputime: the cpu time spent in kernel space since the last update
- * @cputime_scaled: cputime scaled by cpu frequency
- * @target_cputime64: pointer to cpustat field that has to be updated
+ * @index: pointer to cpustat field that has to be updated
  */
 static inline
-void __account_system_time(struct task_struct *p, cputime_t cputime,
-			cputime_t cputime_scaled, int index)
+void __account_system_time(struct task_struct *p, cputime_t cputime, int index)
 {
 	/* Add system time to process. */
 	p->stime += cputime;
-	p->stimescaled += cputime_scaled;
 	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
@@ -204,15 +195,14 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
  * @p: the process that the cpu time gets accounted to
  * @hardirq_offset: the offset to subtract from hardirq_count()
  * @cputime: the cpu time spent in kernel space since the last update
- * @cputime_scaled: cputime scaled by cpu frequency
  */
 void account_system_time(struct task_struct *p, int hardirq_offset,
-			 cputime_t cputime, cputime_t cputime_scaled)
+			 cputime_t cputime)
 {
 	int index;
 
 	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
-		account_guest_time(p, cputime, cputime_scaled);
+		account_guest_time(p, cputime);
 		return;
 	}
 
@@ -223,7 +213,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	else
 		index = CPUTIME_SYSTEM;
 
-	__account_system_time(p, cputime, cputime_scaled, index);
+	__account_system_time(p, cputime, index);
 }
 
 /*
@@ -410,15 +400,15 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 		 * So, we have to handle it separately here.
 		 * Also, p->stime needs to be updated for ksoftirqd.
 		 */
-		__account_system_time(p, cputime, cputime, CPUTIME_SOFTIRQ);
+		__account_system_time(p, cputime, CPUTIME_SOFTIRQ);
 	} else if (user_tick) {
-		account_user_time(p, cputime, cputime);
+		account_user_time(p, cputime);
 	} else if (p == rq->idle) {
 		account_idle_time(cputime);
 	} else if (p->flags & PF_VCPU) { /* System time or guest time */
-		account_guest_time(p, cputime, cputime);
+		account_guest_time(p, cputime);
 	} else {
-		__account_system_time(p, cputime, cputime, CPUTIME_SYSTEM);
+		__account_system_time(p, cputime, CPUTIME_SYSTEM);
 	}
 }
 
@@ -521,9 +511,9 @@ void account_process_tick(struct task_struct *p, int user_tick)
 	cputime -= steal;
 
 	if (user_tick)
-		account_user_time(p, cputime, cputime);
+		account_user_time(p, cputime);
 	else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
-		account_system_time(p, HARDIRQ_OFFSET, cputime, cputime);
+		account_system_time(p, HARDIRQ_OFFSET, cputime);
 	else
 		account_idle_time(cputime);
 }
@@ -744,7 +734,7 @@ static void __vtime_account_system(struct task_struct *tsk)
 {
 	cputime_t delta_cpu = get_vtime_delta(tsk);
 
-	account_system_time(tsk, irq_count(), delta_cpu, delta_cpu);
+	account_system_time(tsk, irq_count(), delta_cpu);
 }
 
 void vtime_account_system(struct task_struct *tsk)
@@ -765,7 +755,7 @@ void vtime_account_user(struct task_struct *tsk)
 	tsk->vtime_snap_whence = VTIME_SYS;
 	if (vtime_delta(tsk)) {
 		delta_cpu = get_vtime_delta(tsk);
-		account_user_time(tsk, delta_cpu, delta_cpu);
+		account_user_time(tsk, delta_cpu);
 	}
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
@@ -921,25 +911,4 @@ void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
 	if (stime)
 		*stime += sdelta;
 }
-
-void task_cputime_scaled(struct task_struct *t,
-			 cputime_t *utimescaled, cputime_t *stimescaled)
-{
-	cputime_t udelta, sdelta;
-
-	if (!vtime_accounting_enabled()) {
-		if (utimescaled)
-			*utimescaled = t->utimescaled;
-		if (stimescaled)
-			*stimescaled = t->stimescaled;
-		return;
-	}
-
-	fetch_task_cputime(t, utimescaled, stimescaled,
-			   &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
-	if (utimescaled)
-		*utimescaled += udelta;
-	if (stimescaled)
-		*stimescaled += sdelta;
-}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-- 
cgit v1.2.3


From 353c50ebe329daaf2c94dc41c1c481cbba2a31fd Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 15 Nov 2016 03:06:52 +0100
Subject: sched/cputime: Simplify task_cputime()

Now since fetch_task_cputime() has no other users than task_cputime(),
its code could be used directly in task_cputime().

Moreover since only 2 task_cputime() calls of 17 use a NULL argument,
we can add dummy variables to those calls and remove NULL checks from
task_cputimes().

Also remove NULL checks from task_cputimes_scaled().

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1479175612-14718-5-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apm_32.c       |  4 +--
 include/linux/sched.h          | 12 +++------
 kernel/sched/cputime.c         | 57 +++++++++++-------------------------------
 kernel/time/posix-cpu-timers.c |  4 +--
 4 files changed, 23 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index c7364bd633e1..d90749b883f5 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -906,14 +906,14 @@ static int apm_cpu_idle(struct cpuidle_device *dev,
 	static int use_apm_idle; /* = 0 */
 	static unsigned int last_jiffies; /* = 0 */
 	static unsigned int last_stime; /* = 0 */
-	cputime_t stime;
+	cputime_t stime, utime;
 
 	int apm_idle_done = 0;
 	unsigned int jiffies_since_last_check = jiffies - last_jiffies;
 	unsigned int bucket;
 
 recalc:
-	task_cputime(current, NULL, &stime);
+	task_cputime(current, &utime, &stime);
 	if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
 		use_apm_idle = 0;
 	} else if (jiffies_since_last_check > idle_period) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f72e81395dac..fe3ce46cfd03 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2248,10 +2248,8 @@ extern cputime_t task_gtime(struct task_struct *t);
 static inline void task_cputime(struct task_struct *t,
 				cputime_t *utime, cputime_t *stime)
 {
-	if (utime)
-		*utime = t->utime;
-	if (stime)
-		*stime = t->stime;
+	*utime = t->utime;
+	*stime = t->stime;
 }
 
 static inline cputime_t task_gtime(struct task_struct *t)
@@ -2265,10 +2263,8 @@ static inline void task_cputime_scaled(struct task_struct *t,
 				       cputime_t *utimescaled,
 				       cputime_t *stimescaled)
 {
-	if (utimescaled)
-		*utimescaled = t->utimescaled;
-	if (stimescaled)
-		*stimescaled = t->stimescaled;
+	*utimescaled = t->utimescaled;
+	*stimescaled = t->stimescaled;
 }
 #else
 static inline void task_cputime_scaled(struct task_struct *t,
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ba55ebf77f9a..7700a9cba335 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -851,29 +851,25 @@ cputime_t task_gtime(struct task_struct *t)
  * add up the pending nohz execution time since the last
  * cputime snapshot.
  */
-static void
-fetch_task_cputime(struct task_struct *t,
-		   cputime_t *u_dst, cputime_t *s_dst,
-		   cputime_t *u_src, cputime_t *s_src,
-		   cputime_t *udelta, cputime_t *sdelta)
+void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
 {
+	cputime_t delta;
 	unsigned int seq;
-	unsigned long long delta;
 
-	do {
-		*udelta = 0;
-		*sdelta = 0;
+	if (!vtime_accounting_enabled()) {
+		*utime = t->utime;
+		*stime = t->stime;
+		return;
+	}
 
+	do {
 		seq = read_seqcount_begin(&t->vtime_seqcount);
 
-		if (u_dst)
-			*u_dst = *u_src;
-		if (s_dst)
-			*s_dst = *s_src;
+		*utime = t->utime;
+		*stime = t->stime;
 
 		/* Task is sleeping, nothing to add */
-		if (t->vtime_snap_whence == VTIME_INACTIVE ||
-		    is_idle_task(t))
+		if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t))
 			continue;
 
 		delta = vtime_delta(t);
@@ -882,33 +878,10 @@ fetch_task_cputime(struct task_struct *t,
 		 * Task runs either in user or kernel space, add pending nohz time to
 		 * the right place.
 		 */
-		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
-			*udelta = delta;
-		} else {
-			if (t->vtime_snap_whence == VTIME_SYS)
-				*sdelta = delta;
-		}
+		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU)
+			*utime += delta;
+		else if (t->vtime_snap_whence == VTIME_SYS)
+			*stime += delta;
 	} while (read_seqcount_retry(&t->vtime_seqcount, seq));
 }
-
-
-void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
-{
-	cputime_t udelta, sdelta;
-
-	if (!vtime_accounting_enabled()) {
-		if (utime)
-			*utime = t->utime;
-		if (stime)
-			*stime = t->stime;
-		return;
-	}
-
-	fetch_task_cputime(t, utime, stime, &t->utime,
-			   &t->stime, &udelta, &sdelta);
-	if (utime)
-		*utime += udelta;
-	if (stime)
-		*stime += sdelta;
-}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 39008d78927a..e887ffc8eef3 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -133,9 +133,9 @@ static inline unsigned long long prof_ticks(struct task_struct *p)
 }
 static inline unsigned long long virt_ticks(struct task_struct *p)
 {
-	cputime_t utime;
+	cputime_t utime, stime;
 
-	task_cputime(p, &utime, NULL);
+	task_cputime(p, &utime, &stime);
 
 	return cputime_to_expires(utime);
 }
-- 
cgit v1.2.3


From 13f59a78c6d69a9bf4c8989dd5f3396f54a2fe41 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 11 Nov 2016 17:59:21 +0000
Subject: iommu: Allow taking a reference on a group directly

iommu_group_get_for_dev() expects that the IOMMU driver's device_group
callback return a group with a reference held for the given device.
Whilst allocating a new group is fine, and pci_device_group() correctly
handles reusing an existing group, there is no general means for IOMMU
drivers doing their own group lookup to take additional references on an
existing group pointer without having to also store device pointers or
resort to elaborate trickery.

Add an IOMMU-driver-specific function to fill the hole.

Acked-by: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 13 +++++++++++++
 include/linux/iommu.h |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9a2f1960873b..9408c3145483 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -551,6 +551,19 @@ struct iommu_group *iommu_group_get(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(iommu_group_get);
 
+/**
+ * iommu_group_ref_get - Increment reference on a group
+ * @group: the group to use, must not be NULL
+ *
+ * This function is called by iommu drivers to take additional references on an
+ * existing group.  Returns the given group for convenience.
+ */
+struct iommu_group *iommu_group_ref_get(struct iommu_group *group)
+{
+	kobject_get(group->devices_kobj);
+	return group;
+}
+
 /**
  * iommu_group_put - Decrement group reference
  * @group: the group to use
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 436dc21318af..431638110c6a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -253,6 +253,7 @@ extern void iommu_group_remove_device(struct device *dev);
 extern int iommu_group_for_each_dev(struct iommu_group *group, void *data,
 				    int (*fn)(struct device *, void *));
 extern struct iommu_group *iommu_group_get(struct device *dev);
+extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group);
 extern void iommu_group_put(struct iommu_group *group);
 extern int iommu_group_register_notifier(struct iommu_group *group,
 					 struct notifier_block *nb);
-- 
cgit v1.2.3


From 8a5846bf5d4756db1d4e03305217189c1c0e7160 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 14 Nov 2016 12:58:18 +0100
Subject: doc/dma-buf: Fix up include directives
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Would be great if everony could add

$ make DOCBOOKS="" htmldocs

to their build scripts to catch these. 0day should also report them,
not sure why it failed to spot this.

Fixes: f54d1867005c ("dma-buf: Rename struct fence to dma_fence")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Christian König <christian.koenig@amd.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161114115825.22050-4-daniel.vetter@ffwll.ch
---
 Documentation/driver-api/infrastructure.rst | 8 ++++----
 include/linux/dma-fence.h                   | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst
index 5d50d6733db3..a0d65eb49055 100644
--- a/Documentation/driver-api/infrastructure.rst
+++ b/Documentation/driver-api/infrastructure.rst
@@ -86,10 +86,10 @@ reservation
 fence
 ~~~~~
 
-.. kernel-doc:: drivers/dma-buf/fence.c
+.. kernel-doc:: drivers/dma-buf/dma-fence.c
    :export:
 
-.. kernel-doc:: include/linux/fence.h
+.. kernel-doc:: include/linux/dma-fence.h
    :internal:
 
 .. kernel-doc:: drivers/dma-buf/seqno-fence.c
@@ -98,10 +98,10 @@ fence
 .. kernel-doc:: include/linux/seqno-fence.h
    :internal:
 
-.. kernel-doc:: drivers/dma-buf/fence-array.c
+.. kernel-doc:: drivers/dma-buf/dma-fence-array.c
    :export:
 
-.. kernel-doc:: include/linux/fence-array.h
+.. kernel-doc:: include/linux/dma-fence-array.h
    :internal:
 
 .. kernel-doc:: drivers/dma-buf/reservation.c
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index fcf4b1971eba..d51a7d23c358 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -225,7 +225,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence)
 
 /**
  * dma_fence_get_rcu_safe  - acquire a reference to an RCU tracked fence
- * @fence:	[in]	pointer to fence to increase refcount of
+ * @fencep:	[in]	pointer to fence to increase refcount of
  *
  * Function returns NULL if no refcount could be obtained, or the fence.
  * This function handles acquiring a reference to a fence that may be
-- 
cgit v1.2.3


From 0f5225b024d4bffd682aab008c35862e8fdc1865 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 7 Oct 2016 17:43:51 +0200
Subject: locking/mutex, drm: Introduce mutex_trylock_recursive()

By popular DRM demand, introduce mutex_trylock_recursive() to fix up the
two GEM users.

Without this it is very easy for these drivers to get stuck in
low-memory situations and trigger OOM. Work is in progress to remove the
need for this in at least i915.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ding Tianhong <dingtianhong@huawei.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Jason Low <jason.low2@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@us.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Terry Rudd <terry.rudd@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <Will.Deacon@arm.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 15 ++++++++++++---
 drivers/gpu/drm/msm/msm_gem_shrinker.c   | 16 ++++++++++++----
 include/linux/mutex.h                    | 31 +++++++++++++++++++++++++++++++
 scripts/checkpatch.pl                    |  6 ++++++
 4 files changed, 61 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index e9bd2a81d03a..c450076d2f9b 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -227,11 +227,20 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 
 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 {
-	if (!mutex_trylock(&dev->struct_mutex))
+	switch (mutex_trylock_recursive(&dev->struct_mutex)) {
+	case MUTEX_TRYLOCK_FAILED:
 		return false;
 
-	*unlock = true;
-	return true;
+	case MUTEX_TRYLOCK_SUCCESS:
+		*unlock = true;
+		return true;
+
+	case MUTEX_TRYLOCK_RECURSIVE:
+		*unlock = false;
+		return true;
+	}
+
+	BUG();
 }
 
 static unsigned long
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
index 6d2e885bd58e..b77bca75bb5f 100644
--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -20,13 +20,21 @@
 
 static bool msm_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 {
-	if (!mutex_trylock(&dev->struct_mutex))
+	switch (mutex_trylock_recursive(&dev->struct_mutex)) {
+	case MUTEX_TRYLOCK_FAILED:
 		return false;
 
-	*unlock = true;
-	return true;
-}
+	case MUTEX_TRYLOCK_SUCCESS:
+		*unlock = true;
+		return true;
+
+	case MUTEX_TRYLOCK_RECURSIVE:
+		*unlock = false;
+		return true;
+	}
 
+	BUG();
+}
 
 static unsigned long
 msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 4d3bccabbea5..6a902f0a2148 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -189,4 +189,35 @@ extern void mutex_unlock(struct mutex *lock);
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
+/*
+ * These values are chosen such that FAIL and SUCCESS match the
+ * values of the regular mutex_trylock().
+ */
+enum mutex_trylock_recursive_enum {
+	MUTEX_TRYLOCK_FAILED    = 0,
+	MUTEX_TRYLOCK_SUCCESS   = 1,
+	MUTEX_TRYLOCK_RECURSIVE,
+};
+
+/**
+ * mutex_trylock_recursive - trylock variant that allows recursive locking
+ * @lock: mutex to be locked
+ *
+ * This function should not be used, _ever_. It is purely for hysterical GEM
+ * raisins, and once those are gone this will be removed.
+ *
+ * Returns:
+ *  MUTEX_TRYLOCK_FAILED    - trylock failed,
+ *  MUTEX_TRYLOCK_SUCCESS   - lock acquired,
+ *  MUTEX_TRYLOCK_RECURSIVE - we already owned the lock.
+ */
+static inline __deprecated __must_check enum mutex_trylock_recursive_enum
+mutex_trylock_recursive(struct mutex *lock)
+{
+	if (unlikely(__mutex_owner(lock) == current))
+		return MUTEX_TRYLOCK_RECURSIVE;
+
+	return mutex_trylock(lock);
+}
+
 #endif /* __LINUX_MUTEX_H */
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a8368d1c4348..23f462f64a3f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -6076,6 +6076,12 @@ sub process {
 			}
 		}
 
+# check for mutex_trylock_recursive usage
+		if ($line =~ /mutex_trylock_recursive/) {
+			ERROR("LOCKING",
+			      "recursive locking is bad, do not use this ever.\n" . $herecurr);
+		}
+
 # check for lockdep_set_novalidate_class
 		if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ ||
 		    $line =~ /__lockdep_no_validate__\s*\)/ ) {
-- 
cgit v1.2.3


From 4a4f86cc7d6bc74522f581341a2cae3119d5a0f5 Mon Sep 17 00:00:00 2001
From: pravin shelar <pshelar@ovn.org>
Date: Sun, 13 Nov 2016 20:43:52 -0800
Subject: vxlan: avoid vlan processing in vxlan device.

VxLan device does not have special handling for vlan taging on egress.
Therefore it does not make sense to expose vlan offloading feature.
This patch does not change vxlan functinality.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c     |  9 +--------
 include/linux/if_vlan.h | 16 ----------------
 2 files changed, 1 insertion(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 5264c1a49d86..7bebce190270 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1750,18 +1750,13 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
 	}
 
 	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
-			+ VXLAN_HLEN + iphdr_len
-			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
+			+ VXLAN_HLEN + iphdr_len;
 
 	/* Need space for new headers (invalidates iph ptr) */
 	err = skb_cow_head(skb, min_headroom);
 	if (unlikely(err))
 		goto out_free;
 
-	skb = vlan_hwaccel_push_inside(skb);
-	if (WARN_ON(!skb))
-		return -ENOMEM;
-
 	err = iptunnel_handle_offloads(skb, type);
 	if (err)
 		goto out_free;
@@ -2529,10 +2524,8 @@ static void vxlan_setup(struct net_device *dev)
 	dev->features   |= NETIF_F_GSO_SOFTWARE;
 
 	dev->vlan_features = dev->features;
-	dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
 	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
-	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
 	netif_keep_dst(dev);
 	dev->priv_flags |= IFF_NO_QUEUE;
 
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 3319d97d789d..8d5fcd6284ce 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -399,22 +399,6 @@ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb)
 		skb->vlan_tci = 0;
 	return skb;
 }
-/*
- * vlan_hwaccel_push_inside - pushes vlan tag to the payload
- * @skb: skbuff to tag
- *
- * Checks is tag is present in @skb->vlan_tci and if it is, it pushes the
- * VLAN tag from @skb->vlan_tci inside to the payload.
- *
- * Following the skb_unshare() example, in case of error, the calling function
- * doesn't have to worry about freeing the original skb.
- */
-static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb)
-{
-	if (skb_vlan_tag_present(skb))
-		skb = __vlan_hwaccel_push_inside(skb);
-	return skb;
-}
 
 /**
  * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting
-- 
cgit v1.2.3


From e86a8987e458a1826f509c41494b0b29a61144a7 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 15 Nov 2016 10:06:30 -0800
Subject: net: phy: Add phy_ethtool_nway_reset

This function just calls into genphy_restart_aneg() to perform an
autonegotation restart.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 11 +++++++++++
 include/linux/phy.h   |  1 +
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e6dd222fddb1..73adbaa9ac86 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -1441,3 +1441,14 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev,
 	return phy_ethtool_ksettings_set(phydev, cmd);
 }
 EXPORT_SYMBOL(phy_ethtool_set_link_ksettings);
+
+int phy_ethtool_nway_reset(struct net_device *ndev)
+{
+	struct phy_device *phydev = ndev->phydev;
+
+	if (!phydev)
+		return -ENODEV;
+
+	return genphy_restart_aneg(phydev);
+}
+EXPORT_SYMBOL(phy_ethtool_nway_reset);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 9880d73a2c3d..b9bd3b4f4ea1 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -860,6 +860,7 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev,
 				   struct ethtool_link_ksettings *cmd);
 int phy_ethtool_set_link_ksettings(struct net_device *ndev,
 				   const struct ethtool_link_ksettings *cmd);
+int phy_ethtool_nway_reset(struct net_device *ndev);
 
 int __init mdio_bus_init(void);
 void mdio_bus_exit(void);
-- 
cgit v1.2.3


From 663deb47880f2283809669563c5a52ac7c6aef1a Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Thu, 20 Oct 2016 00:34:01 -0700
Subject: pstore: Allow prz to control need for locking

In preparation of not locking at all for certain buffers depending on if
there's contention, make locking optional depending on the initialization
of the prz.

Signed-off-by: Joel Fernandes <joelaf@google.com>
[kees: moved locking flag into prz instead of via caller arguments]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/ram.c            |  5 +++--
 fs/pstore/ram_core.c       | 24 +++++++++++++++---------
 include/linux/pstore_ram.h | 10 +++++++++-
 3 files changed, 27 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 6d1393965b0a..86ac59066fba 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -432,7 +432,7 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
 	for (i = 0; i < cxt->max_dump_cnt; i++) {
 		cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0,
 						  &cxt->ecc_info,
-						  cxt->memtype);
+						  cxt->memtype, 0);
 		if (IS_ERR(cxt->przs[i])) {
 			err = PTR_ERR(cxt->przs[i]);
 			dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
@@ -469,7 +469,8 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
 		return -ENOMEM;
 	}
 
-	*prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype);
+	*prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info,
+				  cxt->memtype, 0);
 	if (IS_ERR(*prz)) {
 		int err = PTR_ERR(*prz);
 
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index cb92055e6016..a857338b7dab 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -53,9 +53,10 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
 {
 	int old;
 	int new;
-	unsigned long flags;
+	unsigned long flags = 0;
 
-	raw_spin_lock_irqsave(&prz->buffer_lock, flags);
+	if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+		raw_spin_lock_irqsave(&prz->buffer_lock, flags);
 
 	old = atomic_read(&prz->buffer->start);
 	new = old + a;
@@ -63,7 +64,8 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
 		new -= prz->buffer_size;
 	atomic_set(&prz->buffer->start, new);
 
-	raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
+	if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+		raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
 
 	return old;
 }
@@ -73,9 +75,10 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
 {
 	size_t old;
 	size_t new;
-	unsigned long flags;
+	unsigned long flags = 0;
 
-	raw_spin_lock_irqsave(&prz->buffer_lock, flags);
+	if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+		raw_spin_lock_irqsave(&prz->buffer_lock, flags);
 
 	old = atomic_read(&prz->buffer->size);
 	if (old == prz->buffer_size)
@@ -87,7 +90,8 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
 	atomic_set(&prz->buffer->size, new);
 
 exit:
-	raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
+	if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+		raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
 }
 
 static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
@@ -463,7 +467,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
 }
 
 static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
-				    struct persistent_ram_ecc_info *ecc_info)
+				    struct persistent_ram_ecc_info *ecc_info,
+				    unsigned long flags)
 {
 	int ret;
 
@@ -492,6 +497,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
 	prz->buffer->sig = sig;
 	persistent_ram_zap(prz);
 	prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock);
+	prz->flags = flags;
 
 	return 0;
 }
@@ -516,7 +522,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
 
 struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
 			u32 sig, struct persistent_ram_ecc_info *ecc_info,
-			unsigned int memtype)
+			unsigned int memtype, u32 flags)
 {
 	struct persistent_ram_zone *prz;
 	int ret = -ENOMEM;
@@ -531,7 +537,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
 	if (ret)
 		goto err;
 
-	ret = persistent_ram_post_init(prz, sig, ecc_info);
+	ret = persistent_ram_post_init(prz, sig, ecc_info, flags);
 	if (ret)
 		goto err;
 
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 244d2423dbaf..4058bf991868 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -24,6 +24,13 @@
 #include <linux/list.h>
 #include <linux/types.h>
 
+/*
+ * Choose whether access to the RAM zone requires locking or not.  If a zone
+ * can be written to from different CPUs like with ftrace for example, then
+ * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required.
+ */
+#define PRZ_FLAG_NO_LOCK	BIT(0)
+
 struct persistent_ram_buffer;
 struct rs_control;
 
@@ -40,6 +47,7 @@ struct persistent_ram_zone {
 	void *vaddr;
 	struct persistent_ram_buffer *buffer;
 	size_t buffer_size;
+	u32 flags;
 	raw_spinlock_t buffer_lock;
 
 	/* ECC correction */
@@ -56,7 +64,7 @@ struct persistent_ram_zone {
 
 struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
 			u32 sig, struct persistent_ram_ecc_info *ecc_info,
-			unsigned int memtype);
+			unsigned int memtype, u32 flags);
 void persistent_ram_free(struct persistent_ram_zone *prz);
 void persistent_ram_zap(struct persistent_ram_zone *prz);
 
-- 
cgit v1.2.3


From a1cf53ac6d156721afa86453d5e8423461881231 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Thu, 20 Oct 2016 00:34:04 -0700
Subject: ramoops: Split ftrace buffer space into per-CPU zones

If the RAMOOPS_FLAG_FTRACE_PER_CPU flag is passed to ramoops pdata, split
the ftrace space into multiple zones depending on the number of CPUs.

This speeds up the performance of function tracing by about 280% in my
tests as we avoid the locking. The trade off being lesser space available
per CPU. Let the ramoops user decide which option they want based on pdata
flag.

Signed-off-by: Joel Fernandes <joelaf@google.com>
[kees: added max_ftrace_cnt to track size, added DT logic and docs]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 .../bindings/reserved-memory/ramoops.txt           |  3 +
 fs/pstore/ram.c                                    | 72 +++++++++++++++++-----
 include/linux/pstore_ram.h                         |  3 +
 3 files changed, 61 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
index e81f821a2135..0eba562fe5c6 100644
--- a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
@@ -46,3 +46,6 @@ Optional properties:
   (defaults to buffered mappings)
 
 - no-dump-oops: if present, only dump panics (defaults to panics and oops)
+
+- flags: if present, pass ramoops behavioral flags (defaults to 0,
+  see include/linux/pstore_ram.h RAMOOPS_FLAG_* for flag values).
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 34294c32af0b..0ed3fec04c9b 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -87,7 +87,7 @@ MODULE_PARM_DESC(ramoops_ecc,
 struct ramoops_context {
 	struct persistent_ram_zone **przs;
 	struct persistent_ram_zone *cprz;
-	struct persistent_ram_zone *fprz;
+	struct persistent_ram_zone **fprzs;
 	struct persistent_ram_zone *mprz;
 	phys_addr_t phys_addr;
 	unsigned long size;
@@ -97,12 +97,14 @@ struct ramoops_context {
 	size_t ftrace_size;
 	size_t pmsg_size;
 	int dump_oops;
+	u32 flags;
 	struct persistent_ram_ecc_info ecc_info;
 	unsigned int max_dump_cnt;
 	unsigned int dump_write_cnt;
 	/* _read_cnt need clear on ramoops_pstore_open */
 	unsigned int dump_read_cnt;
 	unsigned int console_read_cnt;
+	unsigned int max_ftrace_cnt;
 	unsigned int ftrace_read_cnt;
 	unsigned int pmsg_read_cnt;
 	struct pstore_info pstore;
@@ -219,9 +221,17 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 	if (!prz_ok(prz))
 		prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt,
 					   1, id, type, PSTORE_TYPE_CONSOLE, 0);
-	if (!prz_ok(prz))
-		prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt,
-					   1, id, type, PSTORE_TYPE_FTRACE, 0);
+	if (!prz_ok(prz)) {
+		while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt && !prz) {
+			prz = ramoops_get_next_prz(cxt->fprzs,
+					&cxt->ftrace_read_cnt,
+					cxt->max_ftrace_cnt, id, type,
+					PSTORE_TYPE_FTRACE, 0);
+			if (!prz_ok(prz))
+				continue;
+		}
+	}
+
 	if (!prz_ok(prz))
 		prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt,
 					   1, id, type, PSTORE_TYPE_PMSG, 0);
@@ -283,9 +293,19 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
 		persistent_ram_write(cxt->cprz, buf, size);
 		return 0;
 	} else if (type == PSTORE_TYPE_FTRACE) {
-		if (!cxt->fprz)
+		int zonenum;
+
+		if (!cxt->fprzs)
 			return -ENOMEM;
-		persistent_ram_write(cxt->fprz, buf, size);
+		/*
+		 * Choose zone by if we're using per-cpu buffers.
+		 */
+		if (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
+			zonenum = smp_processor_id();
+		else
+			zonenum = 0;
+
+		persistent_ram_write(cxt->fprzs[zonenum], buf, size);
 		return 0;
 	} else if (type == PSTORE_TYPE_PMSG) {
 		pr_warn_ratelimited("PMSG shouldn't call %s\n", __func__);
@@ -363,7 +383,9 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
 		prz = cxt->cprz;
 		break;
 	case PSTORE_TYPE_FTRACE:
-		prz = cxt->fprz;
+		if (id >= cxt->max_ftrace_cnt)
+			return -EINVAL;
+		prz = cxt->fprzs[id];
 		break;
 	case PSTORE_TYPE_PMSG:
 		prz = cxt->mprz;
@@ -394,14 +416,22 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
 {
 	int i;
 
-	if (!cxt->przs)
-		return;
+	/* Free dump PRZs */
+	if (cxt->przs) {
+		for (i = 0; i < cxt->max_dump_cnt; i++)
+			persistent_ram_free(cxt->przs[i]);
 
-	for (i = 0; i < cxt->max_dump_cnt; i++)
-		persistent_ram_free(cxt->przs[i]);
+		kfree(cxt->przs);
+		cxt->max_dump_cnt = 0;
+	}
 
-	kfree(cxt->przs);
-	cxt->max_dump_cnt = 0;
+	/* Free ftrace PRZs */
+	if (cxt->fprzs) {
+		for (i = 0; i < cxt->max_ftrace_cnt; i++)
+			persistent_ram_free(cxt->fprzs[i]);
+		kfree(cxt->fprzs);
+		cxt->max_ftrace_cnt = 0;
+	}
 }
 
 static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
@@ -567,6 +597,7 @@ static int ramoops_parse_dt(struct platform_device *pdev,
 	parse_size("ftrace-size", pdata->ftrace_size);
 	parse_size("pmsg-size", pdata->pmsg_size);
 	parse_size("ecc-size", pdata->ecc_info.ecc_size);
+	parse_size("flags", pdata->flags);
 
 #undef parse_size
 
@@ -624,6 +655,7 @@ static int ramoops_probe(struct platform_device *pdev)
 	cxt->ftrace_size = pdata->ftrace_size;
 	cxt->pmsg_size = pdata->pmsg_size;
 	cxt->dump_oops = pdata->dump_oops;
+	cxt->flags = pdata->flags;
 	cxt->ecc_info = pdata->ecc_info;
 
 	paddr = cxt->phys_addr;
@@ -640,8 +672,14 @@ static int ramoops_probe(struct platform_device *pdev)
 	if (err)
 		goto fail_init_cprz;
 
-	err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, cxt->ftrace_size,
-			       LINUX_VERSION_CODE);
+	cxt->max_ftrace_cnt = (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
+				? nr_cpu_ids
+				: 1;
+	err = ramoops_init_przs(dev, cxt, &cxt->fprzs, &paddr, cxt->ftrace_size,
+				-1, &cxt->max_ftrace_cnt,
+				LINUX_VERSION_CODE,
+				(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
+					? PRZ_FLAG_NO_LOCK : 0);
 	if (err)
 		goto fail_init_fprz;
 
@@ -705,7 +743,6 @@ fail_clear:
 	cxt->pstore.bufsize = 0;
 	persistent_ram_free(cxt->mprz);
 fail_init_mprz:
-	persistent_ram_free(cxt->fprz);
 fail_init_fprz:
 	persistent_ram_free(cxt->cprz);
 fail_init_cprz:
@@ -724,7 +761,6 @@ static int ramoops_remove(struct platform_device *pdev)
 	cxt->pstore.bufsize = 0;
 
 	persistent_ram_free(cxt->mprz);
-	persistent_ram_free(cxt->fprz);
 	persistent_ram_free(cxt->cprz);
 	ramoops_free_przs(cxt);
 
@@ -766,6 +802,8 @@ static void ramoops_register_dummy(void)
 	dummy_data->ftrace_size = ramoops_ftrace_size;
 	dummy_data->pmsg_size = ramoops_pmsg_size;
 	dummy_data->dump_oops = dump_oops;
+	dummy_data->flags = RAMOOPS_FLAG_FTRACE_PER_CPU;
+
 	/*
 	 * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC
 	 * (using 1 byte for ECC isn't much of use anyway).
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 4058bf991868..9395f06e8372 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -86,6 +86,8 @@ ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
  * @mem_address	physical memory address to contain ramoops
  */
 
+#define RAMOOPS_FLAG_FTRACE_PER_CPU	BIT(0)
+
 struct ramoops_platform_data {
 	unsigned long	mem_size;
 	phys_addr_t	mem_address;
@@ -95,6 +97,7 @@ struct ramoops_platform_data {
 	unsigned long	ftrace_size;
 	unsigned long	pmsg_size;
 	int		dump_oops;
+	u32		flags;
 	struct persistent_ram_ecc_info ecc_info;
 };
 
-- 
cgit v1.2.3


From fbccdeb8d77d6830556bc4079eeed80298cc97dc Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Thu, 20 Oct 2016 00:34:05 -0700
Subject: pstore: Add ftrace timestamp counter

In preparation for merging the per CPU buffers into one buffer when
we retrieve the pstore ftrace data, we store the timestamp as a
counter in the ftrace pstore record.  We store the CPU number as well
if !PSTORE_CPU_IN_IP, in this case we shift the counter and may lose
ordering there but we preserve the same record size. The timestamp counter
is also racy, and not doing any locking or synchronization here results
in the benefit of lower overhead. Since we don't care much here for exact
ordering of function traces across CPUs, we don't synchronize and may lose
some counter updates but I'm ok with that.

Using trace_clock() results in much lower performance so avoid using it
since we don't want accuracy in timestamp and need a rough ordering to
perform merge.

Signed-off-by: Joel Fernandes <joelaf@google.com>
[kees: updated commit message, added comments]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/ftrace.c     |  4 +++
 fs/pstore/inode.c      |  8 ++++--
 fs/pstore/internal.h   | 34 ----------------------
 include/linux/pstore.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 85 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index d4887705bb61..31548cc09e7b 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -27,6 +27,9 @@
 #include <asm/barrier.h>
 #include "internal.h"
 
+/* This doesn't need to be atomic: speed is chosen over correctness here. */
+static u64 pstore_ftrace_stamp;
+
 static void notrace pstore_ftrace_call(unsigned long ip,
 				       unsigned long parent_ip,
 				       struct ftrace_ops *op,
@@ -42,6 +45,7 @@ static void notrace pstore_ftrace_call(unsigned long ip,
 
 	rec.ip = ip;
 	rec.parent_ip = parent_ip;
+	pstore_ftrace_write_timestamp(&rec, pstore_ftrace_stamp++);
 	pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id());
 	psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec,
 			  0, sizeof(rec), psinfo);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 1781dc50762e..0d6bbcf47d52 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -107,9 +107,11 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
 	struct pstore_ftrace_seq_data *data = v;
 	struct pstore_ftrace_record *rec = (void *)(ps->data + data->off);
 
-	seq_printf(s, "%d %08lx  %08lx  %pf <- %pF\n",
-		pstore_ftrace_decode_cpu(rec), rec->ip, rec->parent_ip,
-		(void *)rec->ip, (void *)rec->parent_ip);
+	seq_printf(s, "CPU:%d ts:%llu %08lx  %08lx  %pf <- %pF\n",
+		   pstore_ftrace_decode_cpu(rec),
+		   pstore_ftrace_read_timestamp(rec),
+		   rec->ip, rec->parent_ip, (void *)rec->ip,
+		   (void *)rec->parent_ip);
 
 	return 0;
 }
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index e38a22b31282..da416e6591c9 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -5,40 +5,6 @@
 #include <linux/time.h>
 #include <linux/pstore.h>
 
-#if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB)
-#define PSTORE_CPU_IN_IP 0x1
-#elif NR_CPUS <= 4 && defined(CONFIG_ARM)
-#define PSTORE_CPU_IN_IP 0x3
-#endif
-
-struct pstore_ftrace_record {
-	unsigned long ip;
-	unsigned long parent_ip;
-#ifndef PSTORE_CPU_IN_IP
-	unsigned int cpu;
-#endif
-};
-
-static inline void
-pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu)
-{
-#ifndef PSTORE_CPU_IN_IP
-	rec->cpu = cpu;
-#else
-	rec->ip |= cpu;
-#endif
-}
-
-static inline unsigned int
-pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
-{
-#ifndef PSTORE_CPU_IN_IP
-	return rec->cpu;
-#else
-	return rec->ip & PSTORE_CPU_IN_IP;
-#endif
-}
-
 #ifdef CONFIG_PSTORE_FTRACE
 extern void pstore_register_ftrace(void);
 extern void pstore_unregister_ftrace(void);
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 92013cc9cc8c..0da29cae009b 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -89,4 +89,80 @@ extern int pstore_register(struct pstore_info *);
 extern void pstore_unregister(struct pstore_info *);
 extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason);
 
+struct pstore_ftrace_record {
+	unsigned long ip;
+	unsigned long parent_ip;
+	u64 ts;
+};
+
+/*
+ * ftrace related stuff: Both backends and frontends need these so expose
+ * them here.
+ */
+
+#if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB)
+#define PSTORE_CPU_IN_IP 0x1
+#elif NR_CPUS <= 4 && defined(CONFIG_ARM)
+#define PSTORE_CPU_IN_IP 0x3
+#endif
+
+#define TS_CPU_SHIFT 8
+#define TS_CPU_MASK (BIT(TS_CPU_SHIFT) - 1)
+
+/*
+ * If CPU number can be stored in IP, store it there, otherwise store it in
+ * the time stamp. This means more timestamp resolution is available when
+ * the CPU can be stored in the IP.
+ */
+#ifdef PSTORE_CPU_IN_IP
+static inline void
+pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu)
+{
+	rec->ip |= cpu;
+}
+
+static inline unsigned int
+pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
+{
+	return rec->ip & PSTORE_CPU_IN_IP;
+}
+
+static inline u64
+pstore_ftrace_read_timestamp(struct pstore_ftrace_record *rec)
+{
+	return rec->ts;
+}
+
+static inline void
+pstore_ftrace_write_timestamp(struct pstore_ftrace_record *rec, u64 val)
+{
+	rec->ts = val;
+}
+#else
+static inline void
+pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu)
+{
+	rec->ts &= ~(TS_CPU_MASK);
+	rec->ts |= cpu;
+}
+
+static inline unsigned int
+pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
+{
+	return rec->ts & TS_CPU_MASK;
+}
+
+static inline u64
+pstore_ftrace_read_timestamp(struct pstore_ftrace_record *rec)
+{
+	return rec->ts >> TS_CPU_SHIFT;
+}
+
+static inline void
+pstore_ftrace_write_timestamp(struct pstore_ftrace_record *rec, u64 val)
+{
+	rec->ts = (rec->ts & TS_CPU_MASK) | (val << TS_CPU_SHIFT);
+}
+#endif
+
 #endif /*_LINUX_PSTORE_H*/
-- 
cgit v1.2.3


From d032ae8921ea792c1e6b2abb44022b2403f651f6 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Tue, 15 Nov 2016 12:31:20 -0800
Subject: ftrace: Provide API to use global filtering for ftrace ops

Currently the global_ops filtering hash is not available to outside users
registering for function tracing. Provide an API for those users to be
able to choose global filtering.

This is in preparation for pstore's ftrace feature to be able to
use the global filters.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Joel Fernandes <joelaf@google.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/ftrace.h |  2 ++
 kernel/trace/ftrace.c  | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b3d34d3e0e7e..d4a884db16a3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -398,6 +398,7 @@ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
 void ftrace_set_global_filter(unsigned char *buf, int len, int reset);
 void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
 void ftrace_free_filter(struct ftrace_ops *ops);
+void ftrace_ops_set_global_filter(struct ftrace_ops *ops);
 
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
@@ -645,6 +646,7 @@ static inline unsigned long ftrace_location(unsigned long ip)
 #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; })
 #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; })
 #define ftrace_free_filter(ops) do { } while (0)
+#define ftrace_ops_set_global_filter(ops) do { } while (0)
 
 static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
 			    size_t cnt, loff_t *ppos) { return -ENODEV; }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2050a7652a86..89d46e1c9302 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4239,6 +4239,23 @@ int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
 }
 EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
 
+/**
+ * ftrace_ops_set_global_filter - setup ops to use global filters
+ * @ops - the ops which will use the global filters
+ *
+ * ftrace users who need global function trace filtering should call this.
+ * It can set the global filter only if ops were not initialized before.
+ */
+void ftrace_ops_set_global_filter(struct ftrace_ops *ops)
+{
+	if (ops->flags & FTRACE_OPS_FL_INITIALIZED)
+		return;
+
+	ftrace_ops_init(ops);
+	ops->func_hash = &global_ops.local_hash;
+}
+EXPORT_SYMBOL_GPL(ftrace_ops_set_global_filter);
+
 static int
 ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 		 int reset, int enable)
-- 
cgit v1.2.3


From ff86aae3b4112b85d2231c23bccbc49589df1c06 Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@nxp.com>
Date: Tue, 15 Nov 2016 10:41:01 +0200
Subject: devres: add devm_alloc_percpu()

Introduce managed counterparts for alloc_percpu() and free_percpu().
Add devm_alloc_percpu() and devm_free_percpu() into the managed
interfaces list.

Signed-off-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/driver-model/devres.txt |  4 +++
 drivers/base/devres.c                 | 66 +++++++++++++++++++++++++++++++++++
 include/linux/device.h                | 19 ++++++++++
 3 files changed, 89 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 167070895498..ca9d1eb46bc0 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -332,6 +332,10 @@ MEM
 MFD
  devm_mfd_add_devices()
 
+PER-CPU MEM
+  devm_alloc_percpu()
+  devm_free_percpu()
+
 PCI
   pcim_enable_device()	: after success, all PCI ops become managed
   pcim_pin_device()	: keep PCI device enabled after release
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 8fc654f0807b..71d577025285 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -10,6 +10,7 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/percpu.h>
 
 #include "base.h"
 
@@ -985,3 +986,68 @@ void devm_free_pages(struct device *dev, unsigned long addr)
 			       &devres));
 }
 EXPORT_SYMBOL_GPL(devm_free_pages);
+
+static void devm_percpu_release(struct device *dev, void *pdata)
+{
+	void __percpu *p;
+
+	p = *(void __percpu **)pdata;
+	free_percpu(p);
+}
+
+static int devm_percpu_match(struct device *dev, void *data, void *p)
+{
+	struct devres *devr = container_of(data, struct devres, data);
+
+	return *(void **)devr->data == p;
+}
+
+/**
+ * __devm_alloc_percpu - Resource-managed alloc_percpu
+ * @dev: Device to allocate per-cpu memory for
+ * @size: Size of per-cpu memory to allocate
+ * @align: Alignment of per-cpu memory to allocate
+ *
+ * Managed alloc_percpu. Per-cpu memory allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * RETURNS:
+ * Pointer to allocated memory on success, NULL on failure.
+ */
+void __percpu *__devm_alloc_percpu(struct device *dev, size_t size,
+		size_t align)
+{
+	void *p;
+	void __percpu *pcpu;
+
+	pcpu = __alloc_percpu(size, align);
+	if (!pcpu)
+		return NULL;
+
+	p = devres_alloc(devm_percpu_release, sizeof(void *), GFP_KERNEL);
+	if (!p) {
+		free_percpu(pcpu);
+		return NULL;
+	}
+
+	*(void __percpu **)p = pcpu;
+
+	devres_add(dev, p);
+
+	return pcpu;
+}
+EXPORT_SYMBOL_GPL(__devm_alloc_percpu);
+
+/**
+ * devm_free_percpu - Resource-managed free_percpu
+ * @dev: Device this memory belongs to
+ * @pdata: Per-cpu memory to free
+ *
+ * Free memory allocated with devm_alloc_percpu().
+ */
+void devm_free_percpu(struct device *dev, void __percpu *pdata)
+{
+	WARN_ON(devres_destroy(dev, devm_percpu_release, devm_percpu_match,
+			       (void *)pdata));
+}
+EXPORT_SYMBOL_GPL(devm_free_percpu);
diff --git a/include/linux/device.h b/include/linux/device.h
index bc41e87a969b..a00105cf795e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -698,6 +698,25 @@ static inline int devm_add_action_or_reset(struct device *dev,
 	return ret;
 }
 
+/**
+ * devm_alloc_percpu - Resource-managed alloc_percpu
+ * @dev: Device to allocate per-cpu memory for
+ * @type: Type to allocate per-cpu memory for
+ *
+ * Managed alloc_percpu. Per-cpu memory allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * RETURNS:
+ * Pointer to allocated memory on success, NULL on failure.
+ */
+#define devm_alloc_percpu(dev, type)      \
+	((typeof(type) __percpu *)__devm_alloc_percpu((dev), sizeof(type), \
+						      __alignof__(type)))
+
+void __percpu *__devm_alloc_percpu(struct device *dev, size_t size,
+				   size_t align);
+void devm_free_percpu(struct device *dev, void __percpu *pdata);
+
 struct device_dma_parameters {
 	/*
 	 * a low level driver may set these to teach IOMMU code about
-- 
cgit v1.2.3


From d1cbfd771ce8297fa11e89f315392de6056a2181 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Fri, 11 Nov 2016 00:10:07 -0500
Subject: ptp_clock: Allow for it to be optional

In order to break the hard dependency between the PTP clock subsystem and
ethernet drivers capable of being clock providers, this patch provides
simple PTP stub functions to allow linkage of those drivers into the
kernel even when the PTP subsystem is configured out. Drivers must be
ready to accept NULL from ptp_clock_register() in that case.

And to make it possible for PTP to be configured out, the select statement
in those driver's Kconfig menu entries is converted to the new "imply"
statement. This way the PTP subsystem may have Kconfig dependencies of
its own, such as POSIX_TIMERS, without having to make those ethernet
drivers unavailable if POSIX timers are cconfigured out. And when support
for POSIX timers is selected again then the default config option for PTP
clock support will automatically be adjusted accordingly.

The pch_gbe driver is a bit special as it relies on extra code in
drivers/ptp/ptp_pch.c. Therefore we let the make process descend into
drivers/ptp/ even if PTP_1588_CLOCK is unselected.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: Edward Cree <ecree@solarflare.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Bolle <pebolle@tiscali.nl>
Cc: linux-kbuild@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: Michal Marek <mmarek@suse.com>
Link: http://lkml.kernel.org/r/1478841010-28605-4-git-send-email-nicolas.pitre@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/Makefile                                |  2 +-
 drivers/net/ethernet/adi/Kconfig                |  2 +-
 drivers/net/ethernet/amd/Kconfig                |  2 +-
 drivers/net/ethernet/amd/xgbe/xgbe-main.c       |  6 ++-
 drivers/net/ethernet/broadcom/Kconfig           |  4 +-
 drivers/net/ethernet/cavium/Kconfig             |  2 +-
 drivers/net/ethernet/freescale/Kconfig          |  2 +-
 drivers/net/ethernet/intel/Kconfig              | 10 ++--
 drivers/net/ethernet/mellanox/mlx4/Kconfig      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig |  2 +-
 drivers/net/ethernet/renesas/Kconfig            |  2 +-
 drivers/net/ethernet/samsung/Kconfig            |  2 +-
 drivers/net/ethernet/sfc/Kconfig                |  2 +-
 drivers/net/ethernet/stmicro/stmmac/Kconfig     |  2 +-
 drivers/net/ethernet/ti/Kconfig                 |  2 +-
 drivers/net/ethernet/tile/Kconfig               |  2 +-
 drivers/ptp/Kconfig                             |  8 +--
 include/linux/ptp_clock_kernel.h                | 65 ++++++++++++++++---------
 18 files changed, 69 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/Makefile b/drivers/Makefile
index 194d20bee7dc..060026a02f59 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -107,7 +107,7 @@ obj-$(CONFIG_INPUT)		+= input/
 obj-$(CONFIG_RTC_LIB)		+= rtc/
 obj-y				+= i2c/ media/
 obj-$(CONFIG_PPS)		+= pps/
-obj-$(CONFIG_PTP_1588_CLOCK)	+= ptp/
+obj-y				+= ptp/
 obj-$(CONFIG_W1)		+= w1/
 obj-y				+= power/
 obj-$(CONFIG_HWMON)		+= hwmon/
diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig
index 6b94ba610399..98cc8f535021 100644
--- a/drivers/net/ethernet/adi/Kconfig
+++ b/drivers/net/ethernet/adi/Kconfig
@@ -58,7 +58,7 @@ config BFIN_RX_DESC_NUM
 config BFIN_MAC_USE_HWSTAMP
 	bool "Use IEEE 1588 hwstamp"
 	depends on BFIN_MAC && BF518
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	default y
 	---help---
 	  To support the IEEE 1588 Precision Time Protocol (PTP), select y here
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index 0038709fd317..713ea7ad22c3 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -177,7 +177,7 @@ config AMD_XGBE
 	depends on ARM64 || COMPILE_TEST
 	select BITREVERSE
 	select CRC32
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports the AMD 10GbE Ethernet device found on an
 	  AMD SoC.
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 9de078819aa6..e10e569c0d5f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -773,7 +773,8 @@ static int xgbe_probe(struct platform_device *pdev)
 		goto err_wq;
 	}
 
-	xgbe_ptp_register(pdata);
+	if (IS_REACHABLE(CONFIG_PTP_1588_CLOCK))
+		xgbe_ptp_register(pdata);
 
 	xgbe_debugfs_init(pdata);
 
@@ -812,7 +813,8 @@ static int xgbe_remove(struct platform_device *pdev)
 
 	xgbe_debugfs_exit(pdata);
 
-	xgbe_ptp_unregister(pdata);
+	if (IS_REACHABLE(CONFIG_PTP_1588_CLOCK))
+		xgbe_ptp_unregister(pdata);
 
 	flush_workqueue(pdata->an_workqueue);
 	destroy_workqueue(pdata->an_workqueue);
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index bd8c80c0b71c..6a8d74aeb69f 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -110,7 +110,7 @@ config TIGON3
 	depends on PCI
 	select PHYLIB
 	select HWMON
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Broadcom Tigon3 based gigabit Ethernet cards.
 
@@ -120,7 +120,7 @@ config TIGON3
 config BNX2X
 	tristate "Broadcom NetXtremeII 10Gb support"
 	depends on PCI
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	select FW_LOADER
 	select ZLIB_INFLATE
 	select LIBCRC32C
diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index 92f411c9f0df..2e64a966617a 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -53,7 +53,7 @@ config	THUNDER_NIC_RGX
 config LIQUIDIO
 	tristate "Cavium LiquidIO support"
 	depends on 64BIT
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	select FW_LOADER
 	select LIBCRC32C
 	---help---
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index d1ca45fbb164..5eb9280973fb 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -25,7 +25,7 @@ config FEC
 		   ARCH_MXC || SOC_IMX28)
 	default ARCH_MXC || SOC_IMX28 if ARM
 	select PHYLIB
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  Say Y here if you want to use the built-in 10/100 Fast ethernet
 	  controller on some Motorola ColdFire and Freescale i.MX processors.
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index c0e17433f623..1349b45f014d 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -58,7 +58,7 @@ config E1000E
 	tristate "Intel(R) PRO/1000 PCI-Express Gigabit Ethernet support"
 	depends on PCI && (!SPARC32 || BROKEN)
 	select CRC32
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports the PCI-Express Intel(R) PRO/1000 gigabit
 	  ethernet family of adapters. For PCI or PCI-X e1000 adapters,
@@ -83,7 +83,7 @@ config E1000E_HWTS
 config IGB
 	tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support"
 	depends on PCI
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	select I2C
 	select I2C_ALGOBIT
 	---help---
@@ -156,7 +156,7 @@ config IXGBE
 	tristate "Intel(R) 10GbE PCI Express adapters support"
 	depends on PCI
 	select MDIO
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Intel(R) 10GbE PCI Express family of
 	  adapters.  For more information on how to identify your adapter, go
@@ -213,7 +213,7 @@ config IXGBEVF
 
 config I40E
 	tristate "Intel(R) Ethernet Controller XL710 Family support"
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	depends on PCI
 	---help---
 	  This driver supports Intel(R) Ethernet Controller XL710 Family of
@@ -264,7 +264,7 @@ config FM10K
 	tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support"
 	default n
 	depends on PCI_MSI
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Intel(R) FM10000 Ethernet Switch Host
 	  Interface.  For more information on how to identify your adapter,
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index 5098e7f21987..22b1cc012bc9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -7,7 +7,7 @@ config MLX4_EN
 	depends on MAY_USE_DEVLINK
 	depends on PCI
 	select MLX4_CORE
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Mellanox Technologies ConnectX Ethernet
 	  devices.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index aae46884bf93..2cd841590e70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -14,7 +14,7 @@ config MLX5_CORE
 config MLX5_CORE_EN
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
 	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	default n
 	---help---
 	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig
index 85ec447c2d18..27be51f0a421 100644
--- a/drivers/net/ethernet/renesas/Kconfig
+++ b/drivers/net/ethernet/renesas/Kconfig
@@ -37,7 +37,7 @@ config RAVB
 	select MII
 	select MDIO_BITBANG
 	select PHYLIB
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	help
 	  Renesas Ethernet AVB device driver.
 	  This driver supports the following SoCs:
diff --git a/drivers/net/ethernet/samsung/Kconfig b/drivers/net/ethernet/samsung/Kconfig
index 2360d8150777..fbd5e06654c6 100644
--- a/drivers/net/ethernet/samsung/Kconfig
+++ b/drivers/net/ethernet/samsung/Kconfig
@@ -21,7 +21,7 @@ config SXGBE_ETH
 	depends on HAS_IOMEM && HAS_DMA
 	select PHYLIB
 	select CRC32
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This is the driver for the SXGBE 10G Ethernet IP block found on
 	  Samsung platforms.
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 4dd92b7b80f4..83f4766a1da0 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -5,7 +5,7 @@ config SFC
 	select CRC32
 	select I2C
 	select I2C_ALGOBIT
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports 10/40-gigabit Ethernet cards based on
 	  the Solarflare SFC4000, SFC9000-family and SFC9100-family
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 3818c5e06eba..139c85fa6a4d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -4,7 +4,7 @@ config STMMAC_ETH
 	select MII
 	select PHYLIB
 	select CRC32
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	select RESET_CONTROLLER
 	---help---
 	  This is the driver for the Ethernet IPs are built around a
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 9904d740d528..61b835a7e6ae 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -76,7 +76,7 @@ config TI_CPSW
 config TI_CPTS
 	bool "TI Common Platform Time Sync (CPTS) Support"
 	depends on TI_CPSW
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports the Common Platform Time Sync unit of
 	  the CPSW Ethernet Switch. The unit can time stamp PTP UDP/IPv4
diff --git a/drivers/net/ethernet/tile/Kconfig b/drivers/net/ethernet/tile/Kconfig
index f59a6c265331..bdfeaf3d4fce 100644
--- a/drivers/net/ethernet/tile/Kconfig
+++ b/drivers/net/ethernet/tile/Kconfig
@@ -9,7 +9,7 @@ config TILE_NET
 	select CRC32
 	select TILE_GXIO_MPIPE if TILEGX
 	select HIGH_RES_TIMERS if TILEGX
-	select PTP_1588_CLOCK if TILEGX
+	imply PTP_1588_CLOCK if TILEGX
 	---help---
 	  This is a standard Linux network device driver for the
 	  on-chip Tilera Gigabit Ethernet and XAUI interfaces.
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index ee3de3421f2d..0f7492f8ea23 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -28,7 +28,7 @@ config PTP_1588_CLOCK
 config PTP_1588_CLOCK_GIANFAR
 	tristate "Freescale eTSEC as PTP clock"
 	depends on GIANFAR
-	select PTP_1588_CLOCK
+	depends on PTP_1588_CLOCK
 	default y
 	help
 	  This driver adds support for using the eTSEC as a PTP
@@ -42,7 +42,7 @@ config PTP_1588_CLOCK_GIANFAR
 config PTP_1588_CLOCK_IXP46X
 	tristate "Intel IXP46x as PTP clock"
 	depends on IXP4XX_ETH
-	select PTP_1588_CLOCK
+	depends on PTP_1588_CLOCK
 	default y
 	help
 	  This driver adds support for using the IXP46X as a PTP
@@ -60,7 +60,7 @@ config DP83640_PHY
 	tristate "Driver for the National Semiconductor DP83640 PHYTER"
 	depends on NETWORK_PHY_TIMESTAMPING
 	depends on PHYLIB
-	select PTP_1588_CLOCK
+	depends on PTP_1588_CLOCK
 	---help---
 	  Supports the DP83640 PHYTER with IEEE 1588 features.
 
@@ -76,7 +76,7 @@ config PTP_1588_CLOCK_PCH
 	tristate "Intel PCH EG20T as PTP clock"
 	depends on X86_32 || COMPILE_TEST
 	depends on HAS_IOMEM && NET
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	help
 	  This driver adds support for using the PCH EG20T as a PTP
 	  clock. The hardware supports time stamping of PTP packets
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 5ad54fc66cf0..96699526d342 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -122,30 +122,6 @@ struct ptp_clock_info {
 
 struct ptp_clock;
 
-/**
- * ptp_clock_register() - register a PTP hardware clock driver
- *
- * @info:   Structure describing the new clock.
- * @parent: Pointer to the parent device of the new clock.
- *
- * Returns a valid pointer on success or PTR_ERR on failure.  If PHC
- * support is missing at the configuration level, this function
- * returns NULL, and drivers are expected to gracefully handle that
- * case separately.
- */
-
-extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
-					    struct device *parent);
-
-/**
- * ptp_clock_unregister() - unregister a PTP hardware clock driver
- *
- * @ptp:  The clock to remove from service.
- */
-
-extern int ptp_clock_unregister(struct ptp_clock *ptp);
-
-
 enum ptp_clock_events {
 	PTP_CLOCK_ALARM,
 	PTP_CLOCK_EXTTS,
@@ -171,6 +147,31 @@ struct ptp_clock_event {
 	};
 };
 
+#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
+
+/**
+ * ptp_clock_register() - register a PTP hardware clock driver
+ *
+ * @info:   Structure describing the new clock.
+ * @parent: Pointer to the parent device of the new clock.
+ *
+ * Returns a valid pointer on success or PTR_ERR on failure.  If PHC
+ * support is missing at the configuration level, this function
+ * returns NULL, and drivers are expected to gracefully handle that
+ * case separately.
+ */
+
+extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+					    struct device *parent);
+
+/**
+ * ptp_clock_unregister() - unregister a PTP hardware clock driver
+ *
+ * @ptp:  The clock to remove from service.
+ */
+
+extern int ptp_clock_unregister(struct ptp_clock *ptp);
+
 /**
  * ptp_clock_event() - notify the PTP layer about an event
  *
@@ -202,4 +203,20 @@ extern int ptp_clock_index(struct ptp_clock *ptp);
 int ptp_find_pin(struct ptp_clock *ptp,
 		 enum ptp_pin_function func, unsigned int chan);
 
+#else
+static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+						   struct device *parent)
+{ return NULL; }
+static inline int ptp_clock_unregister(struct ptp_clock *ptp)
+{ return 0; }
+static inline void ptp_clock_event(struct ptp_clock *ptp,
+				   struct ptp_clock_event *event)
+{ }
+static inline int ptp_clock_index(struct ptp_clock *ptp)
+{ return -1; }
+static inline int ptp_find_pin(struct ptp_clock *ptp,
+			       enum ptp_pin_function func, unsigned int chan)
+{ return -1; }
+#endif
+
 #endif
-- 
cgit v1.2.3


From 74ba181e61c6accf9066d6980f44588de2f854f6 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Fri, 11 Nov 2016 00:10:08 -0500
Subject: timer: Move sys_alarm from timer.c to itimer.c

Move the only user of alarm_setitimer to itimer.c where it is defined.
This allows for making alarm_setitimer static, and dropping it from the
build when __ARCH_WANT_SYS_ALARM is not defined.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Paul Bolle <pebolle@tiscali.nl>
Cc: linux-kbuild@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Edward Cree <ecree@solarflare.com>
Link: http://lkml.kernel.org/r/1478841010-28605-5-git-send-email-nicolas.pitre@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/time.h |  2 --
 kernel/time/itimer.c | 15 ++++++++++++++-
 kernel/time/timer.c  | 13 -------------
 3 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 4cea09d94208..23f0f5ce3090 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -172,8 +172,6 @@ extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
 extern int do_getitimer(int which, struct itimerval *value);
 
-extern unsigned int alarm_setitimer(unsigned int seconds);
-
 extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags);
 
 struct tms;
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 1d5c7204ddc9..2b9f45bc955d 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -238,6 +238,8 @@ again:
 	return 0;
 }
 
+#ifdef __ARCH_WANT_SYS_ALARM
+
 /**
  * alarm_setitimer - set alarm in seconds
  *
@@ -250,7 +252,7 @@ again:
  * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
  * negative timeval settings which would cause immediate expiry.
  */
-unsigned int alarm_setitimer(unsigned int seconds)
+static unsigned int alarm_setitimer(unsigned int seconds)
 {
 	struct itimerval it_new, it_old;
 
@@ -275,6 +277,17 @@ unsigned int alarm_setitimer(unsigned int seconds)
 	return it_old.it_value.tv_sec;
 }
 
+/*
+ * For backwards compatibility?  This can be done in libc so Alpha
+ * and all newer ports shouldn't need it.
+ */
+SYSCALL_DEFINE1(alarm, unsigned int, seconds)
+{
+	return alarm_setitimer(seconds);
+}
+
+#endif
+
 SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
 		struct itimerval __user *, ovalue)
 {
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 88aab86a4594..42d27aa242b9 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1662,19 +1662,6 @@ void run_local_timers(void)
 	raise_softirq(TIMER_SOFTIRQ);
 }
 
-#ifdef __ARCH_WANT_SYS_ALARM
-
-/*
- * For backwards compatibility?  This can be done in libc so Alpha
- * and all newer ports shouldn't need it.
- */
-SYSCALL_DEFINE1(alarm, unsigned int, seconds)
-{
-	return alarm_setitimer(seconds);
-}
-
-#endif
-
 static void process_timeout(unsigned long __data)
 {
 	wake_up_process((struct task_struct *)__data);
-- 
cgit v1.2.3


From 0e285d36bd2bfee0b95433ccc9065c878164f5b2 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 10 Nov 2016 18:44:47 +0100
Subject: x86/mcheck: Move CPU_DEAD to hotplug state machine

This moves the last piece of the old hotplug notifier code in MCE to the
new hotplug state machine.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: rt@linutronix.de
Cc: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/20161110174447.11848-8-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 42 ++++++++++++++--------------------------
 include/linux/cpuhotplug.h       |  1 +
 2 files changed, 16 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 78955f501ff2..b888e2f6af41 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2506,26 +2506,14 @@ static void mce_reenable_cpu(void)
 	}
 }
 
-/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int
-mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static int mce_cpu_dead(unsigned int cpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
+	mce_intel_hcpu_update(cpu);
 
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DEAD:
-		mce_intel_hcpu_update(cpu);
-
-		/* intentionally ignoring frozen here */
-		if (!(action & CPU_TASKS_FROZEN))
-			cmci_rediscover();
-		break;
-	case CPU_DOWN_PREPARE:
-
-		break;
-	}
-
-	return NOTIFY_OK;
+	/* intentionally ignoring frozen here */
+	if (!cpuhp_tasks_frozen)
+		cmci_rediscover();
+	return 0;
 }
 
 static int mce_cpu_online(unsigned int cpu)
@@ -2556,10 +2544,6 @@ static int mce_cpu_pre_down(unsigned int cpu)
 	return 0;
 }
 
-static struct notifier_block mce_cpu_notifier = {
-	.notifier_call = mce_cpu_callback,
-};
-
 static __init void mce_init_banks(void)
 {
 	int i;
@@ -2599,16 +2583,17 @@ static __init int mcheck_init_device(void)
 	if (err)
 		goto err_out_mem;
 
+	err = cpuhp_setup_state(CPUHP_X86_MCE_DEAD, "x86/mce:dead", NULL,
+				mce_cpu_dead);
+	if (err)
+		goto err_out_mem;
+
 	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online",
 				mce_cpu_online, mce_cpu_pre_down);
 	if (err < 0)
-		goto err_out_mem;
+		goto err_out_online;
 	hp_online = err;
 
-	cpu_notifier_register_begin();
-	__register_hotcpu_notifier(&mce_cpu_notifier);
-	cpu_notifier_register_done();
-
 	register_syscore_ops(&mce_syscore_ops);
 
 	/* register character device /dev/mcelog */
@@ -2622,6 +2607,9 @@ err_register:
 	unregister_syscore_ops(&mce_syscore_ops);
 	cpuhp_remove_state(hp_online);
 
+err_out_online:
+	cpuhp_remove_state(CPUHP_X86_MCE_DEAD);
+
 err_out_mem:
 	free_cpumask_var(mce_device_initialized);
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 3410d83cc2e2..79b96f647d64 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -16,6 +16,7 @@ enum cpuhp_state {
 	CPUHP_PERF_SUPERH,
 	CPUHP_X86_HPET_DEAD,
 	CPUHP_X86_APB_DEAD,
+	CPUHP_X86_MCE_DEAD,
 	CPUHP_VIRT_NET_DEAD,
 	CPUHP_SLUB_DEAD,
 	CPUHP_MM_WRITEBACK_DEAD,
-- 
cgit v1.2.3


From 43496d35513b25ad468bef91e51a39d61a0d8464 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 16 Nov 2016 10:36:05 +0100
Subject: locking/mutex: Don't mark mutex_trylock_recursive() as deprecated,
 temporarily
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Until the DRM drivers are fixed to not use mutex_trylock_recursive(),
allyes/modconfig builds will emit an API deprecation warning:

 drivers/gpu/drm/i915/i915_gem_shrinker.c: In function ‘i915_gem_shrinker_lock’:
 drivers/gpu/drm/i915/i915_gem_shrinker.c:230:2: warning: ‘mutex_trylock_recursive’ is deprecated [-Wdeprecated-declarations]
   switch (mutex_trylock_recursive(&dev->struct_mutex)) {
	    ^

Don't pollute the kernel log until the DRM code is fixed. Hopefully
the checkpatch warning is enough to keep people from using this new
API, and we'll be NAK-ing new users as well.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ding Tianhong <dingtianhong@huawei.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Jason Low <jason.low2@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@us.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Terry Rudd <terry.rudd@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <Will.Deacon@arm.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 6a902f0a2148..b97870f2debd 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -211,7 +211,7 @@ enum mutex_trylock_recursive_enum {
  *  MUTEX_TRYLOCK_SUCCESS   - lock acquired,
  *  MUTEX_TRYLOCK_RECURSIVE - we already owned the lock.
  */
-static inline __deprecated __must_check enum mutex_trylock_recursive_enum
+static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum
 mutex_trylock_recursive(struct mutex *lock)
 {
 	if (unlikely(__mutex_owner(lock) == current))
-- 
cgit v1.2.3


From 98838d95075a5295f3478ceba18bcccf472e30f4 Mon Sep 17 00:00:00 2001
From: Ed Blake <ed.blake@imgtec.com>
Date: Thu, 10 Nov 2016 18:07:54 +0000
Subject: serial: 8250: Add IrDA to UART capabilities

Add an IrDA UART capability flag and change the type of
uart_8250_port.capabilities to be u32 rather than unsigned short to
accommodate the additional flag.

Signed-off-by: Ed Blake <ed.blake@imgtec.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/platform/ce4100/ce4100.c   | 2 +-
 drivers/tty/serial/8250/8250.h      | 1 +
 drivers/tty/serial/8250/8250_core.c | 4 ++--
 include/linux/serial_8250.h         | 4 ++--
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index b27bccd4390f..821cb41f00e6 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -89,7 +89,7 @@ static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
 }
 
 static void ce4100_serial_fixup(int port, struct uart_port *up,
-	unsigned short *capabilites)
+	u32 *capabilites)
 {
 #ifdef CONFIG_EARLY_PRINTK
 	/*
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 124e6e695a3a..ce8d4ffcc425 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -80,6 +80,7 @@ struct serial8250_config {
 #define UART_CAP_RTOIE	(1 << 13)	/* UART needs IER bit 4 set (Xscale, Tegra) */
 #define UART_CAP_HFIFO	(1 << 14)	/* UART has a "hidden" FIFO */
 #define UART_CAP_RPM	(1 << 15)	/* Runtime PM is active while idle */
+#define UART_CAP_IRDA	(1 << 16)	/* UART supports IrDA line discipline */
 
 #define UART_BUG_QUOT	(1 << 0)	/* UART has buggy quot LSB */
 #define UART_BUG_TXEN	(1 << 1)	/* UART has buggy TX IIR status */
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 240a361b674f..13d04bf9547d 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -425,10 +425,10 @@ struct uart_8250_port *serial8250_get_port(int line)
 EXPORT_SYMBOL_GPL(serial8250_get_port);
 
 static void (*serial8250_isa_config)(int port, struct uart_port *up,
-	unsigned short *capabilities);
+	u32 *capabilities);
 
 void serial8250_set_isa_configurator(
-	void (*v)(int port, struct uart_port *up, unsigned short *capabilities))
+	void (*v)(int port, struct uart_port *up, u32 *capabilities))
 {
 	serial8250_isa_config = v;
 }
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 48ec7651989b..04185e03d7be 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -94,7 +94,7 @@ struct uart_8250_port {
 	struct uart_port	port;
 	struct timer_list	timer;		/* "no irq" timer */
 	struct list_head	list;		/* ports on this IRQ */
-	unsigned short		capabilities;	/* port capabilities */
+	u32			capabilities;	/* port capabilities */
 	unsigned short		bugs;		/* port bugs */
 	bool			fifo_bug;	/* min RX trigger if enabled */
 	unsigned int		tx_loadsz;	/* transmit fifo load size */
@@ -168,6 +168,6 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
 
 extern void serial8250_set_isa_configurator(void (*v)
 					(int port, struct uart_port *up,
-						unsigned short *capabilities));
+						u32 *capabilities));
 
 #endif
-- 
cgit v1.2.3


From db405a8f8bf70daf57ed88808a2bf9c5fe308c70 Mon Sep 17 00:00:00 2001
From: Ed Blake <ed.blake@imgtec.com>
Date: Thu, 10 Nov 2016 18:07:55 +0000
Subject: serial: 8250: Expose set_ldisc function

Expose set_ldisc() function so that it can be overridden with a
platform specific implementation.

Signed-off-by: Ed Blake <ed.blake@imgtec.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c |  3 +++
 drivers/tty/serial/8250/8250_port.c | 12 ++++++++++--
 include/linux/serial_8250.h         |  4 ++++
 include/linux/serial_core.h         |  2 ++
 4 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 13d04bf9547d..61569a765d9e 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -830,6 +830,7 @@ static int serial8250_probe(struct platform_device *dev)
 		uart.port.handle_irq	= p->handle_irq;
 		uart.port.handle_break	= p->handle_break;
 		uart.port.set_termios	= p->set_termios;
+		uart.port.set_ldisc	= p->set_ldisc;
 		uart.port.get_mctrl	= p->get_mctrl;
 		uart.port.pm		= p->pm;
 		uart.port.dev		= &dev->dev;
@@ -1023,6 +1024,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		/*  Possibly override set_termios call */
 		if (up->port.set_termios)
 			uart->port.set_termios = up->port.set_termios;
+		if (up->port.set_ldisc)
+			uart->port.set_ldisc = up->port.set_ldisc;
 		if (up->port.get_mctrl)
 			uart->port.get_mctrl = up->port.get_mctrl;
 		if (up->port.set_mctrl)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 4a326034c51b..fe4399b41df6 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2693,8 +2693,7 @@ serial8250_set_termios(struct uart_port *port, struct ktermios *termios,
 		serial8250_do_set_termios(port, termios, old);
 }
 
-static void
-serial8250_set_ldisc(struct uart_port *port, struct ktermios *termios)
+void serial8250_do_set_ldisc(struct uart_port *port, struct ktermios *termios)
 {
 	if (termios->c_line == N_PPS) {
 		port->flags |= UPF_HARDPPS_CD;
@@ -2710,7 +2709,16 @@ serial8250_set_ldisc(struct uart_port *port, struct ktermios *termios)
 		}
 	}
 }
+EXPORT_SYMBOL_GPL(serial8250_do_set_ldisc);
 
+static void
+serial8250_set_ldisc(struct uart_port *port, struct ktermios *termios)
+{
+	if (port->set_ldisc)
+		port->set_ldisc(port, termios);
+	else
+		serial8250_do_set_ldisc(port, termios);
+}
 
 void serial8250_do_pm(struct uart_port *port, unsigned int state,
 		      unsigned int oldstate)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 04185e03d7be..61fbb440449c 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -36,6 +36,8 @@ struct plat_serial8250_port {
 	void		(*set_termios)(struct uart_port *,
 			               struct ktermios *new,
 			               struct ktermios *old);
+	void		(*set_ldisc)(struct uart_port *,
+				     struct ktermios *);
 	unsigned int	(*get_mctrl)(struct uart_port *);
 	int		(*handle_irq)(struct uart_port *);
 	void		(*pm)(struct uart_port *, unsigned int state,
@@ -149,6 +151,8 @@ extern int early_serial8250_setup(struct earlycon_device *device,
 					 const char *options);
 extern void serial8250_do_set_termios(struct uart_port *port,
 		struct ktermios *termios, struct ktermios *old);
+extern void serial8250_do_set_ldisc(struct uart_port *port,
+				    struct ktermios *termios);
 extern unsigned int serial8250_do_get_mctrl(struct uart_port *port);
 extern int serial8250_do_startup(struct uart_port *port);
 extern void serial8250_do_shutdown(struct uart_port *port);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 344201437017..5d494888a612 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -123,6 +123,8 @@ struct uart_port {
 	void			(*set_termios)(struct uart_port *,
 				               struct ktermios *new,
 				               struct ktermios *old);
+	void			(*set_ldisc)(struct uart_port *,
+					     struct ktermios *);
 	unsigned int		(*get_mctrl)(struct uart_port *);
 	void			(*set_mctrl)(struct uart_port *, unsigned int);
 	int			(*startup)(struct uart_port *port);
-- 
cgit v1.2.3


From f4062625ede8f0280d8246437f4070c8eb7fe9f3 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 1 Nov 2016 07:59:34 -0200
Subject: [media] cec: add flag to cec_log_addrs to enable RC passthrough

By default the CEC_MSG_USER_CONTROL_PRESSED/RELEASED messages
are passed on to the follower(s) only. If the new
CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU flag is set in the
flags field of struct cec_log_addrs then these messages are also
passed on to the remote control input subsystem and they will appear
as keystrokes.

This used to be the default behavior, but now you have to explicitly
enable it. This is done to force the caller to think about possible
security issues (e.g. if these messages are used to enter passwords).

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst | 10 ++++++++++
 drivers/staging/media/cec/TODO                            |  2 --
 drivers/staging/media/cec/cec-adap.c                      |  6 ++++--
 drivers/staging/media/cec/cec-api.c                       |  3 ++-
 include/linux/cec.h                                       |  2 ++
 5 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
index af35f7185439..571ae57b75ae 100644
--- a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
@@ -166,6 +166,16 @@ logical address types are already defined will return with error ``EBUSY``.
 	it will go back to the unconfigured state. If this flag is set, then it will
 	fallback to the Unregistered logical address. Note that if the Unregistered
 	logical address was explicitly requested, then this flag has no effect.
+    * .. _`CEC-LOG-ADDRS-FL-ALLOW-RC-PASSTHRU`:
+
+      - ``CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU``
+      - 2
+      - By default the ``CEC_MSG_USER_CONTROL_PRESSED`` and ``CEC_MSG_USER_CONTROL_RELEASED``
+        messages are only passed on to the follower(s), if any. If this flag is set,
+	then these messages are also passed on to the remote control input subsystem
+	and will appear as keystrokes. This features needs to be enabled explicitly.
+	If CEC is used to enter e.g. passwords, then you may not want to enable this
+	to avoid trivial snooping of the keystrokes.
 
 .. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
 
diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO
index 13224694a8ae..084120687d06 100644
--- a/drivers/staging/media/cec/TODO
+++ b/drivers/staging/media/cec/TODO
@@ -13,8 +13,6 @@ Hopefully this will happen later in 2016.
 Other TODOs:
 
 - There are two possible replies to CEC_MSG_INITIATE_ARC. How to handle that?
-- Add a flag to inhibit passing CEC RC messages to the rc subsystem.
-  Applications should be able to choose this when calling S_LOG_ADDRS.
 - If the reply field of cec_msg is set then when the reply arrives it
   is only sent to the filehandle that transmitted the original message
   and not to any followers. Should this behavior change or perhaps
diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c
index 611e07b78bfe..589e4576fbe9 100644
--- a/drivers/staging/media/cec/cec-adap.c
+++ b/drivers/staging/media/cec/cec-adap.c
@@ -1478,7 +1478,8 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
 	}
 
 	case CEC_MSG_USER_CONTROL_PRESSED:
-		if (!(adap->capabilities & CEC_CAP_RC))
+		if (!(adap->capabilities & CEC_CAP_RC) ||
+		    !(adap->log_addrs.flags & CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU))
 			break;
 
 #if IS_REACHABLE(CONFIG_RC_CORE)
@@ -1515,7 +1516,8 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
 		break;
 
 	case CEC_MSG_USER_CONTROL_RELEASED:
-		if (!(adap->capabilities & CEC_CAP_RC))
+		if (!(adap->capabilities & CEC_CAP_RC) ||
+		    !(adap->log_addrs.flags & CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU))
 			break;
 #if IS_REACHABLE(CONFIG_RC_CORE)
 		rc_keyup(adap->rc);
diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c
index e274e2f22398..040ca7ddf2b0 100644
--- a/drivers/staging/media/cec/cec-api.c
+++ b/drivers/staging/media/cec/cec-api.c
@@ -162,7 +162,8 @@ static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh,
 		return -ENOTTY;
 	if (copy_from_user(&log_addrs, parg, sizeof(log_addrs)))
 		return -EFAULT;
-	log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK;
+	log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK |
+			   CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU;
 	mutex_lock(&adap->lock);
 	if (!adap->is_configuring &&
 	    (!log_addrs.num_log_addrs || !adap->is_configured) &&
diff --git a/include/linux/cec.h b/include/linux/cec.h
index 851968e803fa..825455fae3cc 100644
--- a/include/linux/cec.h
+++ b/include/linux/cec.h
@@ -391,6 +391,8 @@ struct cec_log_addrs {
 
 /* Allow a fallback to unregistered */
 #define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK	(1 << 0)
+/* Passthrough RC messages to the input subsystem */
+#define CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU	(1 << 1)
 
 /* Events */
 
-- 
cgit v1.2.3


From adc0c622783978ab0c740af77f98fc8f65c87d66 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 1 Nov 2016 08:55:05 -0200
Subject: [media] cec: add CEC_MSG_FL_REPLY_TO_FOLLOWERS

Give the caller more control over how replies to a transmit are
handled. By default the reply will only go to the filehandle that
called CEC_TRANSMIT. If this new flag is set, then the reply will
also go to all followers.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/media/uapi/cec/cec-ioc-receive.rst | 22 +++++++++++++++++++++-
 drivers/staging/media/cec/TODO                   |  4 ----
 drivers/staging/media/cec/cec-adap.c             |  6 +++---
 drivers/staging/media/cec/cec-api.c              |  1 +
 include/linux/cec.h                              |  5 ++++-
 5 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/media/uapi/cec/cec-ioc-receive.rst b/Documentation/media/uapi/cec/cec-ioc-receive.rst
index 21a88df3d9d9..b4dffd2f7dfa 100644
--- a/Documentation/media/uapi/cec/cec-ioc-receive.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-receive.rst
@@ -119,7 +119,7 @@ result.
 	transmit.
     * - __u32
       - ``flags``
-      - Flags. No flags are defined yet, so set this to 0.
+      - Flags. See :ref:`cec-msg-flags` for a list of available flags.
     * - __u8
       - ``tx_status``
       - The status bits of the transmitted message. See
@@ -180,6 +180,26 @@ result.
 	valid if the :ref:`CEC_TX_STATUS_ERROR <CEC-TX-STATUS-ERROR>` status bit is set.
 
 
+.. _cec-msg-flags:
+
+.. flat-table:: Flags for struct cec_msg
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       3 1 4
+
+    * .. _`CEC-MSG-FL-REPLY-TO-FOLLOWERS`:
+
+      - ``CEC_MSG_FL_REPLY_TO_FOLLOWERS``
+      - 1
+      - If a CEC transmit expects a reply, then by default that reply is only sent to
+	the filehandle that called :ref:`ioctl CEC_TRANSMIT <CEC_TRANSMIT>`. If this
+	flag is set, then the reply is also sent to all followers, if any. If the
+	filehandle that called :ref:`ioctl CEC_TRANSMIT <CEC_TRANSMIT>` is also a
+	follower, then that filehandle will receive the reply twice: once as the
+	result of the :ref:`ioctl CEC_TRANSMIT <CEC_TRANSMIT>`, and once via
+	:ref:`ioctl CEC_RECEIVE <CEC_RECEIVE>`.
+
+
 .. tabularcolumns:: |p{5.6cm}|p{0.9cm}|p{11.0cm}|
 
 .. _cec-tx-status:
diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO
index 084120687d06..ce69001b0428 100644
--- a/drivers/staging/media/cec/TODO
+++ b/drivers/staging/media/cec/TODO
@@ -13,10 +13,6 @@ Hopefully this will happen later in 2016.
 Other TODOs:
 
 - There are two possible replies to CEC_MSG_INITIATE_ARC. How to handle that?
-- If the reply field of cec_msg is set then when the reply arrives it
-  is only sent to the filehandle that transmitted the original message
-  and not to any followers. Should this behavior change or perhaps
-  controlled through a cec_msg flag?
 - Should CEC_LOG_ADDR_TYPE_SPECIFIC be replaced by TYPE_2ND_TV and TYPE_PROCESSOR?
   And also TYPE_SWITCH and TYPE_CDC_ONLY in addition to the TYPE_UNREGISTERED?
   This should give the framework more information about the device type
diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c
index 589e4576fbe9..6aceb1d4a7a0 100644
--- a/drivers/staging/media/cec/cec-adap.c
+++ b/drivers/staging/media/cec/cec-adap.c
@@ -587,7 +587,6 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg,
 	msg->tx_nack_cnt = 0;
 	msg->tx_low_drive_cnt = 0;
 	msg->tx_error_cnt = 0;
-	msg->flags = 0;
 	msg->sequence = ++adap->sequence;
 	if (!msg->sequence)
 		msg->sequence = ++adap->sequence;
@@ -823,6 +822,7 @@ void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg)
 			dst->rx_status = msg->rx_status;
 			if (abort)
 				dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT;
+			msg->flags = dst->flags;
 			/* Remove it from the wait_queue */
 			list_del_init(&data->list);
 
@@ -1575,8 +1575,8 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
 	}
 
 skip_processing:
-	/* If this was a reply, then we're done */
-	if (is_reply)
+	/* If this was a reply, then we're done, unless otherwise specified */
+	if (is_reply && !(msg->flags & CEC_MSG_FL_REPLY_TO_FOLLOWERS))
 		return 0;
 
 	/*
diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c
index 040ca7ddf2b0..54148a6b3326 100644
--- a/drivers/staging/media/cec/cec-api.c
+++ b/drivers/staging/media/cec/cec-api.c
@@ -190,6 +190,7 @@ static long cec_transmit(struct cec_adapter *adap, struct cec_fh *fh,
 		return -ENOTTY;
 	if (copy_from_user(&msg, parg, sizeof(msg)))
 		return -EFAULT;
+	msg.flags &= CEC_MSG_FL_REPLY_TO_FOLLOWERS;
 	mutex_lock(&adap->lock);
 	if (!adap->is_configured)
 		err = -ENONET;
diff --git a/include/linux/cec.h b/include/linux/cec.h
index 825455fae3cc..3f2f076027b1 100644
--- a/include/linux/cec.h
+++ b/include/linux/cec.h
@@ -175,7 +175,10 @@ static inline void cec_msg_set_reply_to(struct cec_msg *msg,
 	msg->reply = msg->timeout = 0;
 }
 
-/* cec status field */
+/* cec_msg flags field */
+#define CEC_MSG_FL_REPLY_TO_FOLLOWERS	(1 << 0)
+
+/* cec_msg tx/rx_status field */
 #define CEC_TX_STATUS_OK		(1 << 0)
 #define CEC_TX_STATUS_ARB_LOST		(1 << 1)
 #define CEC_TX_STATUS_NACK		(1 << 2)
-- 
cgit v1.2.3


From a69a168a1bd470cb8a8c5f2ff4b54463de615226 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 2 Nov 2016 07:41:41 -0200
Subject: [media] cec: add proper support for CDC-Only CEC devices

CDC-Only CEC devices are CEC devices that can only handle CDC messages,
all other messages are ignored.

Add a flag to signal that this is a CDC-Only device and act accordingly.

Also add helper functions to identify if a CEC device is configured as a
CDC-Only device, a second TV, a switch or a processor, since these variations
cannot be determined by the logical address alone.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/staging/media/cec/TODO       |  4 ---
 drivers/staging/media/cec/cec-adap.c | 31 ++++++++++++++++++++-
 drivers/staging/media/cec/cec-api.c  |  9 ++++++-
 include/linux/cec.h                  | 52 ++++++++++++++++++++++++++++++++++++
 4 files changed, 90 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO
index 5a4cfdf8d15e..504d35c7ae95 100644
--- a/drivers/staging/media/cec/TODO
+++ b/drivers/staging/media/cec/TODO
@@ -1,9 +1,5 @@
 TODOs:
 
-- Should CEC_LOG_ADDR_TYPE_SPECIFIC be replaced by TYPE_2ND_TV and TYPE_PROCESSOR?
-  And also TYPE_SWITCH and TYPE_CDC_ONLY in addition to the TYPE_UNREGISTERED?
-  This should give the framework more information about the device type
-  since SPECIFIC and UNREGISTERED give no useful information.
 - Once this is out of staging this should no longer be a separate
   config option, instead it should be selected by drivers that want it.
 - Revisit the IS_REACHABLE(RC_CORE): perhaps the RC_CORE support should
diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c
index a65d8667b78e..054cd06e2247 100644
--- a/drivers/staging/media/cec/cec-adap.c
+++ b/drivers/staging/media/cec/cec-adap.c
@@ -1233,7 +1233,8 @@ configured:
 	mutex_unlock(&adap->lock);
 
 	for (i = 0; i < las->num_log_addrs; i++) {
-		if (las->log_addr[i] == CEC_LOG_ADDR_INVALID)
+		if (las->log_addr[i] == CEC_LOG_ADDR_INVALID ||
+		    (las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY))
 			continue;
 
 		/*
@@ -1355,6 +1356,29 @@ int __cec_s_log_addrs(struct cec_adapter *adap,
 		return 0;
 	}
 
+	if (log_addrs->flags & CEC_LOG_ADDRS_FL_CDC_ONLY) {
+		/*
+		 * Sanitize log_addrs fields if a CDC-Only device is
+		 * requested.
+		 */
+		log_addrs->num_log_addrs = 1;
+		log_addrs->osd_name[0] = '\0';
+		log_addrs->vendor_id = CEC_VENDOR_ID_NONE;
+		log_addrs->log_addr_type[0] = CEC_LOG_ADDR_TYPE_UNREGISTERED;
+		/*
+		 * This is just an internal convention since a CDC-Only device
+		 * doesn't have to be a switch. But switches already use
+		 * unregistered, so it makes some kind of sense to pick this
+		 * as the primary device. Since a CDC-Only device never sends
+		 * any 'normal' CEC messages this primary device type is never
+		 * sent over the CEC bus.
+		 */
+		log_addrs->primary_device_type[0] = CEC_OP_PRIM_DEVTYPE_SWITCH;
+		log_addrs->all_device_types[0] = 0;
+		log_addrs->features[0][0] = 0;
+		log_addrs->features[0][1] = 0;
+	}
+
 	/* Ensure the osd name is 0-terminated */
 	log_addrs->osd_name[sizeof(log_addrs->osd_name) - 1] = '\0';
 
@@ -1575,6 +1599,11 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
 
 	dprintk(1, "cec_receive_notify: %*ph\n", msg->len, msg->msg);
 
+	/* If this is a CDC-Only device, then ignore any non-CDC messages */
+	if (cec_is_cdc_only(&adap->log_addrs) &&
+	    msg->msg[1] != CEC_MSG_CDC_MESSAGE)
+		return 0;
+
 	if (adap->ops->received) {
 		/* Allow drivers to process the message first */
 		if (adap->ops->received(adap, msg) != -ENOMSG)
diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c
index 54148a6b3326..d4bc4ee2c6e5 100644
--- a/drivers/staging/media/cec/cec-api.c
+++ b/drivers/staging/media/cec/cec-api.c
@@ -163,7 +163,8 @@ static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh,
 	if (copy_from_user(&log_addrs, parg, sizeof(log_addrs)))
 		return -EFAULT;
 	log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK |
-			   CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU;
+			   CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU |
+			   CEC_LOG_ADDRS_FL_CDC_ONLY;
 	mutex_lock(&adap->lock);
 	if (!adap->is_configuring &&
 	    (!log_addrs.num_log_addrs || !adap->is_configured) &&
@@ -190,6 +191,12 @@ static long cec_transmit(struct cec_adapter *adap, struct cec_fh *fh,
 		return -ENOTTY;
 	if (copy_from_user(&msg, parg, sizeof(msg)))
 		return -EFAULT;
+
+	/* A CDC-Only device can only send CDC messages */
+	if ((adap->log_addrs.flags & CEC_LOG_ADDRS_FL_CDC_ONLY) &&
+	    (msg.len == 1 || msg.msg[1] != CEC_MSG_CDC_MESSAGE))
+		return -EINVAL;
+
 	msg.flags &= CEC_MSG_FL_REPLY_TO_FOLLOWERS;
 	mutex_lock(&adap->lock);
 	if (!adap->is_configured)
diff --git a/include/linux/cec.h b/include/linux/cec.h
index 3f2f076027b1..9c87711c0e1c 100644
--- a/include/linux/cec.h
+++ b/include/linux/cec.h
@@ -396,6 +396,8 @@ struct cec_log_addrs {
 #define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK	(1 << 0)
 /* Passthrough RC messages to the input subsystem */
 #define CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU	(1 << 1)
+/* CDC-Only device: supports only CDC messages */
+#define CEC_LOG_ADDRS_FL_CDC_ONLY		(1 << 2)
 
 /* Events */
 
@@ -1016,4 +1018,54 @@ struct cec_event {
 #define CEC_OP_HPD_ERROR_OTHER				3
 #define CEC_OP_HPD_ERROR_NONE_NO_VIDEO			4
 
+/* End of Messages */
+
+/* Helper functions to identify the 'special' CEC devices */
+
+static inline bool cec_is_2nd_tv(const struct cec_log_addrs *las)
+{
+	/*
+	 * It is a second TV if the logical address is 14 or 15 and the
+	 * primary device type is a TV.
+	 */
+	return las->num_log_addrs &&
+	       las->log_addr[0] >= CEC_LOG_ADDR_SPECIFIC &&
+	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_TV;
+}
+
+static inline bool cec_is_processor(const struct cec_log_addrs *las)
+{
+	/*
+	 * It is a processor if the logical address is 12-15 and the
+	 * primary device type is a Processor.
+	 */
+	return las->num_log_addrs &&
+	       las->log_addr[0] >= CEC_LOG_ADDR_BACKUP_1 &&
+	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_PROCESSOR;
+}
+
+static inline bool cec_is_switch(const struct cec_log_addrs *las)
+{
+	/*
+	 * It is a switch if the logical address is 15 and the
+	 * primary device type is a Switch and the CDC-Only flag is not set.
+	 */
+	return las->num_log_addrs == 1 &&
+	       las->log_addr[0] == CEC_LOG_ADDR_UNREGISTERED &&
+	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_SWITCH &&
+	       !(las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY);
+}
+
+static inline bool cec_is_cdc_only(const struct cec_log_addrs *las)
+{
+	/*
+	 * It is a CDC-only device if the logical address is 15 and the
+	 * primary device type is a Switch and the CDC-Only flag is set.
+	 */
+	return las->num_log_addrs == 1 &&
+	       las->log_addr[0] == CEC_LOG_ADDR_UNREGISTERED &&
+	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_SWITCH &&
+	       (las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY);
+}
+
 #endif
-- 
cgit v1.2.3


From 0dbacebede1e4e44bf500f94d692fad05eb2c293 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 2 Nov 2016 08:25:28 -0200
Subject: [media] cec: move the CEC framework out of staging and to media

The last open issues have been addressed, so it is time to move
this out of staging and into the mainline and to move the public
cec headers to include/uapi/linux.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/media/Makefile             |    2 +-
 drivers/media/Kconfig                    |   16 +
 drivers/media/Makefile                   |    4 +
 drivers/media/cec/Makefile               |    5 +
 drivers/media/cec/cec-adap.c             | 1856 ++++++++++++++++++++++++++++
 drivers/media/cec/cec-api.c              |  588 +++++++++
 drivers/media/cec/cec-core.c             |  411 +++++++
 drivers/media/cec/cec-priv.h             |   56 +
 drivers/media/i2c/Kconfig                |    6 +-
 drivers/media/platform/vivid/Kconfig     |    2 +-
 drivers/staging/media/Kconfig            |    2 -
 drivers/staging/media/Makefile           |    1 -
 drivers/staging/media/cec/Kconfig        |   12 -
 drivers/staging/media/cec/Makefile       |    5 -
 drivers/staging/media/cec/TODO           |    9 -
 drivers/staging/media/cec/cec-adap.c     | 1856 ----------------------------
 drivers/staging/media/cec/cec-api.c      |  588 ---------
 drivers/staging/media/cec/cec-core.c     |  411 -------
 drivers/staging/media/cec/cec-priv.h     |   56 -
 drivers/staging/media/pulse8-cec/Kconfig |    2 +-
 drivers/staging/media/s5p-cec/Kconfig    |    2 +-
 drivers/staging/media/st-cec/Kconfig     |    2 +-
 include/linux/cec-funcs.h                | 1971 ------------------------------
 include/linux/cec.h                      | 1071 ----------------
 include/media/cec.h                      |    2 +-
 include/uapi/linux/Kbuild                |    2 +
 include/uapi/linux/cec-funcs.h           | 1965 +++++++++++++++++++++++++++++
 include/uapi/linux/cec.h                 | 1065 ++++++++++++++++
 28 files changed, 5977 insertions(+), 5991 deletions(-)
 create mode 100644 drivers/media/cec/Makefile
 create mode 100644 drivers/media/cec/cec-adap.c
 create mode 100644 drivers/media/cec/cec-api.c
 create mode 100644 drivers/media/cec/cec-core.c
 create mode 100644 drivers/media/cec/cec-priv.h
 delete mode 100644 drivers/staging/media/cec/Kconfig
 delete mode 100644 drivers/staging/media/cec/Makefile
 delete mode 100644 drivers/staging/media/cec/TODO
 delete mode 100644 drivers/staging/media/cec/cec-adap.c
 delete mode 100644 drivers/staging/media/cec/cec-api.c
 delete mode 100644 drivers/staging/media/cec/cec-core.c
 delete mode 100644 drivers/staging/media/cec/cec-priv.h
 delete mode 100644 include/linux/cec-funcs.h
 delete mode 100644 include/linux/cec.h
 create mode 100644 include/uapi/linux/cec-funcs.h
 create mode 100644 include/uapi/linux/cec.h

(limited to 'include/linux')

diff --git a/Documentation/media/Makefile b/Documentation/media/Makefile
index a7fb35291f6c..61afa052c501 100644
--- a/Documentation/media/Makefile
+++ b/Documentation/media/Makefile
@@ -51,7 +51,7 @@ $(BUILDDIR)/videodev2.h.rst: ${UAPI}/videodev2.h ${PARSER} $(SRC_DIR)/videodev2.
 $(BUILDDIR)/media.h.rst: ${UAPI}/media.h ${PARSER} $(SRC_DIR)/media.h.rst.exceptions
 	@$($(quiet)gen_rst)
 
-$(BUILDDIR)/cec.h.rst: ${KAPI}/cec.h ${PARSER} $(SRC_DIR)/cec.h.rst.exceptions
+$(BUILDDIR)/cec.h.rst: ${UAPI}/cec.h ${PARSER} $(SRC_DIR)/cec.h.rst.exceptions
 	@$($(quiet)gen_rst)
 
 $(BUILDDIR)/lirc.h.rst: ${UAPI}/lirc.h ${PARSER} $(SRC_DIR)/lirc.h.rst.exceptions
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index 7b8540291217..bc643cbf813e 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -80,6 +80,22 @@ config MEDIA_RC_SUPPORT
 
 	  Say Y when you have a TV or an IR device.
 
+config MEDIA_CEC_SUPPORT
+	bool "HDMI CEC support"
+	select MEDIA_CEC_EDID
+	---help---
+	  Enable support for HDMI CEC (Consumer Electronics Control),
+	  which is an optional HDMI feature.
+
+	  Say Y when you have an HDMI receiver, transmitter or a USB CEC
+	  adapter that supports HDMI CEC.
+
+config MEDIA_CEC_DEBUG
+	bool "HDMI CEC debugfs interface"
+	depends on MEDIA_CEC_SUPPORT && DEBUG_FS
+	---help---
+	  Turns on the DebugFS interface for CEC devices.
+
 config MEDIA_CEC_EDID
 	bool
 
diff --git a/drivers/media/Makefile b/drivers/media/Makefile
index 0deaa93efdee..d87ccb8eeabe 100644
--- a/drivers/media/Makefile
+++ b/drivers/media/Makefile
@@ -6,6 +6,10 @@ ifeq ($(CONFIG_MEDIA_CEC_EDID),y)
   obj-$(CONFIG_MEDIA_SUPPORT) += cec-edid.o
 endif
 
+ifeq ($(CONFIG_MEDIA_CEC_SUPPORT),y)
+  obj-$(CONFIG_MEDIA_SUPPORT) += cec/
+endif
+
 media-objs	:= media-device.o media-devnode.o media-entity.o
 
 #
diff --git a/drivers/media/cec/Makefile b/drivers/media/cec/Makefile
new file mode 100644
index 000000000000..d6686337275f
--- /dev/null
+++ b/drivers/media/cec/Makefile
@@ -0,0 +1,5 @@
+cec-objs := cec-core.o cec-adap.o cec-api.o
+
+ifeq ($(CONFIG_MEDIA_CEC_SUPPORT),y)
+  obj-$(CONFIG_MEDIA_SUPPORT) += cec.o
+endif
diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
new file mode 100644
index 000000000000..054cd06e2247
--- /dev/null
+++ b/drivers/media/cec/cec-adap.c
@@ -0,0 +1,1856 @@
+/*
+ * cec-adap.c - HDMI Consumer Electronics Control framework - CEC adapter
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/ktime.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "cec-priv.h"
+
+static int cec_report_features(struct cec_adapter *adap, unsigned int la_idx);
+static int cec_report_phys_addr(struct cec_adapter *adap, unsigned int la_idx);
+
+/*
+ * 400 ms is the time it takes for one 16 byte message to be
+ * transferred and 5 is the maximum number of retries. Add
+ * another 100 ms as a margin. So if the transmit doesn't
+ * finish before that time something is really wrong and we
+ * have to time out.
+ *
+ * This is a sign that something it really wrong and a warning
+ * will be issued.
+ */
+#define CEC_XFER_TIMEOUT_MS (5 * 400 + 100)
+
+#define call_op(adap, op, arg...) \
+	(adap->ops->op ? adap->ops->op(adap, ## arg) : 0)
+
+#define call_void_op(adap, op, arg...)			\
+	do {						\
+		if (adap->ops->op)			\
+			adap->ops->op(adap, ## arg);	\
+	} while (0)
+
+static int cec_log_addr2idx(const struct cec_adapter *adap, u8 log_addr)
+{
+	int i;
+
+	for (i = 0; i < adap->log_addrs.num_log_addrs; i++)
+		if (adap->log_addrs.log_addr[i] == log_addr)
+			return i;
+	return -1;
+}
+
+static unsigned int cec_log_addr2dev(const struct cec_adapter *adap, u8 log_addr)
+{
+	int i = cec_log_addr2idx(adap, log_addr);
+
+	return adap->log_addrs.primary_device_type[i < 0 ? 0 : i];
+}
+
+/*
+ * Queue a new event for this filehandle. If ts == 0, then set it
+ * to the current time.
+ *
+ * The two events that are currently defined do not need to keep track
+ * of intermediate events, so no actual queue of events is needed,
+ * instead just store the latest state and the total number of lost
+ * messages.
+ *
+ * Should new events be added in the future that require intermediate
+ * results to be queued as well, then a proper queue data structure is
+ * required. But until then, just keep it simple.
+ */
+void cec_queue_event_fh(struct cec_fh *fh,
+			const struct cec_event *new_ev, u64 ts)
+{
+	struct cec_event *ev = &fh->events[new_ev->event - 1];
+
+	if (ts == 0)
+		ts = ktime_get_ns();
+
+	mutex_lock(&fh->lock);
+	if (new_ev->event == CEC_EVENT_LOST_MSGS &&
+	    fh->pending_events & (1 << new_ev->event)) {
+		/*
+		 * If there is already a lost_msgs event, then just
+		 * update the lost_msgs count. This effectively
+		 * merges the old and new events into one.
+		 */
+		ev->lost_msgs.lost_msgs += new_ev->lost_msgs.lost_msgs;
+		goto unlock;
+	}
+
+	/*
+	 * Intermediate states are not interesting, so just
+	 * overwrite any older event.
+	 */
+	*ev = *new_ev;
+	ev->ts = ts;
+	fh->pending_events |= 1 << new_ev->event;
+
+unlock:
+	mutex_unlock(&fh->lock);
+	wake_up_interruptible(&fh->wait);
+}
+
+/* Queue a new event for all open filehandles. */
+static void cec_queue_event(struct cec_adapter *adap,
+			    const struct cec_event *ev)
+{
+	u64 ts = ktime_get_ns();
+	struct cec_fh *fh;
+
+	mutex_lock(&adap->devnode.lock);
+	list_for_each_entry(fh, &adap->devnode.fhs, list)
+		cec_queue_event_fh(fh, ev, ts);
+	mutex_unlock(&adap->devnode.lock);
+}
+
+/*
+ * Queue a new message for this filehandle. If there is no more room
+ * in the queue, then send the LOST_MSGS event instead.
+ */
+static void cec_queue_msg_fh(struct cec_fh *fh, const struct cec_msg *msg)
+{
+	static const struct cec_event ev_lost_msg = {
+		.ts = 0,
+		.event = CEC_EVENT_LOST_MSGS,
+		.flags = 0,
+		{
+			.lost_msgs.lost_msgs = 1,
+		},
+	};
+	struct cec_msg_entry *entry;
+
+	mutex_lock(&fh->lock);
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		goto lost_msgs;
+
+	entry->msg = *msg;
+	/* Add new msg at the end of the queue */
+	list_add_tail(&entry->list, &fh->msgs);
+
+	/*
+	 * if the queue now has more than CEC_MAX_MSG_RX_QUEUE_SZ
+	 * messages, drop the oldest one and send a lost message event.
+	 */
+	if (fh->queued_msgs == CEC_MAX_MSG_RX_QUEUE_SZ) {
+		list_del(&entry->list);
+		goto lost_msgs;
+	}
+	fh->queued_msgs++;
+	mutex_unlock(&fh->lock);
+	wake_up_interruptible(&fh->wait);
+	return;
+
+lost_msgs:
+	mutex_unlock(&fh->lock);
+	cec_queue_event_fh(fh, &ev_lost_msg, 0);
+}
+
+/*
+ * Queue the message for those filehandles that are in monitor mode.
+ * If valid_la is true (this message is for us or was sent by us),
+ * then pass it on to any monitoring filehandle. If this message
+ * isn't for us or from us, then only give it to filehandles that
+ * are in MONITOR_ALL mode.
+ *
+ * This can only happen if the CEC_CAP_MONITOR_ALL capability is
+ * set and the CEC adapter was placed in 'monitor all' mode.
+ */
+static void cec_queue_msg_monitor(struct cec_adapter *adap,
+				  const struct cec_msg *msg,
+				  bool valid_la)
+{
+	struct cec_fh *fh;
+	u32 monitor_mode = valid_la ? CEC_MODE_MONITOR :
+				      CEC_MODE_MONITOR_ALL;
+
+	mutex_lock(&adap->devnode.lock);
+	list_for_each_entry(fh, &adap->devnode.fhs, list) {
+		if (fh->mode_follower >= monitor_mode)
+			cec_queue_msg_fh(fh, msg);
+	}
+	mutex_unlock(&adap->devnode.lock);
+}
+
+/*
+ * Queue the message for follower filehandles.
+ */
+static void cec_queue_msg_followers(struct cec_adapter *adap,
+				    const struct cec_msg *msg)
+{
+	struct cec_fh *fh;
+
+	mutex_lock(&adap->devnode.lock);
+	list_for_each_entry(fh, &adap->devnode.fhs, list) {
+		if (fh->mode_follower == CEC_MODE_FOLLOWER)
+			cec_queue_msg_fh(fh, msg);
+	}
+	mutex_unlock(&adap->devnode.lock);
+}
+
+/* Notify userspace of an adapter state change. */
+static void cec_post_state_event(struct cec_adapter *adap)
+{
+	struct cec_event ev = {
+		.event = CEC_EVENT_STATE_CHANGE,
+	};
+
+	ev.state_change.phys_addr = adap->phys_addr;
+	ev.state_change.log_addr_mask = adap->log_addrs.log_addr_mask;
+	cec_queue_event(adap, &ev);
+}
+
+/*
+ * A CEC transmit (and a possible wait for reply) completed.
+ * If this was in blocking mode, then complete it, otherwise
+ * queue the message for userspace to dequeue later.
+ *
+ * This function is called with adap->lock held.
+ */
+static void cec_data_completed(struct cec_data *data)
+{
+	/*
+	 * Delete this transmit from the filehandle's xfer_list since
+	 * we're done with it.
+	 *
+	 * Note that if the filehandle is closed before this transmit
+	 * finished, then the release() function will set data->fh to NULL.
+	 * Without that we would be referring to a closed filehandle.
+	 */
+	if (data->fh)
+		list_del(&data->xfer_list);
+
+	if (data->blocking) {
+		/*
+		 * Someone is blocking so mark the message as completed
+		 * and call complete.
+		 */
+		data->completed = true;
+		complete(&data->c);
+	} else {
+		/*
+		 * No blocking, so just queue the message if needed and
+		 * free the memory.
+		 */
+		if (data->fh)
+			cec_queue_msg_fh(data->fh, &data->msg);
+		kfree(data);
+	}
+}
+
+/*
+ * A pending CEC transmit needs to be cancelled, either because the CEC
+ * adapter is disabled or the transmit takes an impossibly long time to
+ * finish.
+ *
+ * This function is called with adap->lock held.
+ */
+static void cec_data_cancel(struct cec_data *data)
+{
+	/*
+	 * It's either the current transmit, or it is a pending
+	 * transmit. Take the appropriate action to clear it.
+	 */
+	if (data->adap->transmitting == data) {
+		data->adap->transmitting = NULL;
+	} else {
+		list_del_init(&data->list);
+		if (!(data->msg.tx_status & CEC_TX_STATUS_OK))
+			data->adap->transmit_queue_sz--;
+	}
+
+	/* Mark it as an error */
+	data->msg.tx_ts = ktime_get_ns();
+	data->msg.tx_status = CEC_TX_STATUS_ERROR |
+			      CEC_TX_STATUS_MAX_RETRIES;
+	data->attempts = 0;
+	data->msg.tx_error_cnt = 1;
+	/* Queue transmitted message for monitoring purposes */
+	cec_queue_msg_monitor(data->adap, &data->msg, 1);
+
+	cec_data_completed(data);
+}
+
+/*
+ * Main CEC state machine
+ *
+ * Wait until the thread should be stopped, or we are not transmitting and
+ * a new transmit message is queued up, in which case we start transmitting
+ * that message. When the adapter finished transmitting the message it will
+ * call cec_transmit_done().
+ *
+ * If the adapter is disabled, then remove all queued messages instead.
+ *
+ * If the current transmit times out, then cancel that transmit.
+ */
+int cec_thread_func(void *_adap)
+{
+	struct cec_adapter *adap = _adap;
+
+	for (;;) {
+		unsigned int signal_free_time;
+		struct cec_data *data;
+		bool timeout = false;
+		u8 attempts;
+
+		if (adap->transmitting) {
+			int err;
+
+			/*
+			 * We are transmitting a message, so add a timeout
+			 * to prevent the state machine to get stuck waiting
+			 * for this message to finalize and add a check to
+			 * see if the adapter is disabled in which case the
+			 * transmit should be canceled.
+			 */
+			err = wait_event_interruptible_timeout(adap->kthread_waitq,
+				kthread_should_stop() ||
+				(!adap->is_configured && !adap->is_configuring) ||
+				(!adap->transmitting &&
+				 !list_empty(&adap->transmit_queue)),
+				msecs_to_jiffies(CEC_XFER_TIMEOUT_MS));
+			timeout = err == 0;
+		} else {
+			/* Otherwise we just wait for something to happen. */
+			wait_event_interruptible(adap->kthread_waitq,
+				kthread_should_stop() ||
+				(!adap->transmitting &&
+				 !list_empty(&adap->transmit_queue)));
+		}
+
+		mutex_lock(&adap->lock);
+
+		if ((!adap->is_configured && !adap->is_configuring) ||
+		    kthread_should_stop()) {
+			/*
+			 * If the adapter is disabled, or we're asked to stop,
+			 * then cancel any pending transmits.
+			 */
+			while (!list_empty(&adap->transmit_queue)) {
+				data = list_first_entry(&adap->transmit_queue,
+							struct cec_data, list);
+				cec_data_cancel(data);
+			}
+			if (adap->transmitting)
+				cec_data_cancel(adap->transmitting);
+
+			/*
+			 * Cancel the pending timeout work. We have to unlock
+			 * the mutex when flushing the work since
+			 * cec_wait_timeout() will take it. This is OK since
+			 * no new entries can be added to wait_queue as long
+			 * as adap->transmitting is NULL, which it is due to
+			 * the cec_data_cancel() above.
+			 */
+			while (!list_empty(&adap->wait_queue)) {
+				data = list_first_entry(&adap->wait_queue,
+							struct cec_data, list);
+
+				if (!cancel_delayed_work(&data->work)) {
+					mutex_unlock(&adap->lock);
+					flush_scheduled_work();
+					mutex_lock(&adap->lock);
+				}
+				cec_data_cancel(data);
+			}
+			goto unlock;
+		}
+
+		if (adap->transmitting && timeout) {
+			/*
+			 * If we timeout, then log that. This really shouldn't
+			 * happen and is an indication of a faulty CEC adapter
+			 * driver, or the CEC bus is in some weird state.
+			 */
+			dprintk(0, "message %*ph timed out!\n",
+				adap->transmitting->msg.len,
+				adap->transmitting->msg.msg);
+			/* Just give up on this. */
+			cec_data_cancel(adap->transmitting);
+			goto unlock;
+		}
+
+		/*
+		 * If we are still transmitting, or there is nothing new to
+		 * transmit, then just continue waiting.
+		 */
+		if (adap->transmitting || list_empty(&adap->transmit_queue))
+			goto unlock;
+
+		/* Get a new message to transmit */
+		data = list_first_entry(&adap->transmit_queue,
+					struct cec_data, list);
+		list_del_init(&data->list);
+		adap->transmit_queue_sz--;
+		/* Make this the current transmitting message */
+		adap->transmitting = data;
+
+		/*
+		 * Suggested number of attempts as per the CEC 2.0 spec:
+		 * 4 attempts is the default, except for 'secondary poll
+		 * messages', i.e. poll messages not sent during the adapter
+		 * configuration phase when it allocates logical addresses.
+		 */
+		if (data->msg.len == 1 && adap->is_configured)
+			attempts = 2;
+		else
+			attempts = 4;
+
+		/* Set the suggested signal free time */
+		if (data->attempts) {
+			/* should be >= 3 data bit periods for a retry */
+			signal_free_time = CEC_SIGNAL_FREE_TIME_RETRY;
+		} else if (data->new_initiator) {
+			/* should be >= 5 data bit periods for new initiator */
+			signal_free_time = CEC_SIGNAL_FREE_TIME_NEW_INITIATOR;
+		} else {
+			/*
+			 * should be >= 7 data bit periods for sending another
+			 * frame immediately after another.
+			 */
+			signal_free_time = CEC_SIGNAL_FREE_TIME_NEXT_XFER;
+		}
+		if (data->attempts == 0)
+			data->attempts = attempts;
+
+		/* Tell the adapter to transmit, cancel on error */
+		if (adap->ops->adap_transmit(adap, data->attempts,
+					     signal_free_time, &data->msg))
+			cec_data_cancel(data);
+
+unlock:
+		mutex_unlock(&adap->lock);
+
+		if (kthread_should_stop())
+			break;
+	}
+	return 0;
+}
+
+/*
+ * Called by the CEC adapter if a transmit finished.
+ */
+void cec_transmit_done(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt,
+		       u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt)
+{
+	struct cec_data *data;
+	struct cec_msg *msg;
+	u64 ts = ktime_get_ns();
+
+	dprintk(2, "cec_transmit_done %02x\n", status);
+	mutex_lock(&adap->lock);
+	data = adap->transmitting;
+	if (!data) {
+		/*
+		 * This can happen if a transmit was issued and the cable is
+		 * unplugged while the transmit is ongoing. Ignore this
+		 * transmit in that case.
+		 */
+		dprintk(1, "cec_transmit_done without an ongoing transmit!\n");
+		goto unlock;
+	}
+
+	msg = &data->msg;
+
+	/* Drivers must fill in the status! */
+	WARN_ON(status == 0);
+	msg->tx_ts = ts;
+	msg->tx_status |= status;
+	msg->tx_arb_lost_cnt += arb_lost_cnt;
+	msg->tx_nack_cnt += nack_cnt;
+	msg->tx_low_drive_cnt += low_drive_cnt;
+	msg->tx_error_cnt += error_cnt;
+
+	/* Mark that we're done with this transmit */
+	adap->transmitting = NULL;
+
+	/*
+	 * If there are still retry attempts left and there was an error and
+	 * the hardware didn't signal that it retried itself (by setting
+	 * CEC_TX_STATUS_MAX_RETRIES), then we will retry ourselves.
+	 */
+	if (data->attempts > 1 &&
+	    !(status & (CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_OK))) {
+		/* Retry this message */
+		data->attempts--;
+		/* Add the message in front of the transmit queue */
+		list_add(&data->list, &adap->transmit_queue);
+		adap->transmit_queue_sz++;
+		goto wake_thread;
+	}
+
+	data->attempts = 0;
+
+	/* Always set CEC_TX_STATUS_MAX_RETRIES on error */
+	if (!(status & CEC_TX_STATUS_OK))
+		msg->tx_status |= CEC_TX_STATUS_MAX_RETRIES;
+
+	/* Queue transmitted message for monitoring purposes */
+	cec_queue_msg_monitor(adap, msg, 1);
+
+	if ((status & CEC_TX_STATUS_OK) && adap->is_configured &&
+	    msg->timeout) {
+		/*
+		 * Queue the message into the wait queue if we want to wait
+		 * for a reply.
+		 */
+		list_add_tail(&data->list, &adap->wait_queue);
+		schedule_delayed_work(&data->work,
+				      msecs_to_jiffies(msg->timeout));
+	} else {
+		/* Otherwise we're done */
+		cec_data_completed(data);
+	}
+
+wake_thread:
+	/*
+	 * Wake up the main thread to see if another message is ready
+	 * for transmitting or to retry the current message.
+	 */
+	wake_up_interruptible(&adap->kthread_waitq);
+unlock:
+	mutex_unlock(&adap->lock);
+}
+EXPORT_SYMBOL_GPL(cec_transmit_done);
+
+/*
+ * Called when waiting for a reply times out.
+ */
+static void cec_wait_timeout(struct work_struct *work)
+{
+	struct cec_data *data = container_of(work, struct cec_data, work.work);
+	struct cec_adapter *adap = data->adap;
+
+	mutex_lock(&adap->lock);
+	/*
+	 * Sanity check in case the timeout and the arrival of the message
+	 * happened at the same time.
+	 */
+	if (list_empty(&data->list))
+		goto unlock;
+
+	/* Mark the message as timed out */
+	list_del_init(&data->list);
+	data->msg.rx_ts = ktime_get_ns();
+	data->msg.rx_status = CEC_RX_STATUS_TIMEOUT;
+	cec_data_completed(data);
+unlock:
+	mutex_unlock(&adap->lock);
+}
+
+/*
+ * Transmit a message. The fh argument may be NULL if the transmit is not
+ * associated with a specific filehandle.
+ *
+ * This function is called with adap->lock held.
+ */
+int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg,
+			struct cec_fh *fh, bool block)
+{
+	struct cec_data *data;
+	u8 last_initiator = 0xff;
+	unsigned int timeout;
+	int res = 0;
+
+	msg->rx_ts = 0;
+	msg->tx_ts = 0;
+	msg->rx_status = 0;
+	msg->tx_status = 0;
+	msg->tx_arb_lost_cnt = 0;
+	msg->tx_nack_cnt = 0;
+	msg->tx_low_drive_cnt = 0;
+	msg->tx_error_cnt = 0;
+	msg->sequence = ++adap->sequence;
+	if (!msg->sequence)
+		msg->sequence = ++adap->sequence;
+
+	if (msg->reply && msg->timeout == 0) {
+		/* Make sure the timeout isn't 0. */
+		msg->timeout = 1000;
+	}
+
+	/* Sanity checks */
+	if (msg->len == 0 || msg->len > CEC_MAX_MSG_SIZE) {
+		dprintk(1, "cec_transmit_msg: invalid length %d\n", msg->len);
+		return -EINVAL;
+	}
+	if (msg->timeout && msg->len == 1) {
+		dprintk(1, "cec_transmit_msg: can't reply for poll msg\n");
+		return -EINVAL;
+	}
+	memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len);
+	if (msg->len == 1) {
+		if (cec_msg_initiator(msg) != 0xf ||
+		    cec_msg_destination(msg) == 0xf) {
+			dprintk(1, "cec_transmit_msg: invalid poll message\n");
+			return -EINVAL;
+		}
+		if (cec_has_log_addr(adap, cec_msg_destination(msg))) {
+			/*
+			 * If the destination is a logical address our adapter
+			 * has already claimed, then just NACK this.
+			 * It depends on the hardware what it will do with a
+			 * POLL to itself (some OK this), so it is just as
+			 * easy to handle it here so the behavior will be
+			 * consistent.
+			 */
+			msg->tx_ts = ktime_get_ns();
+			msg->tx_status = CEC_TX_STATUS_NACK |
+					 CEC_TX_STATUS_MAX_RETRIES;
+			msg->tx_nack_cnt = 1;
+			return 0;
+		}
+	}
+	if (msg->len > 1 && !cec_msg_is_broadcast(msg) &&
+	    cec_has_log_addr(adap, cec_msg_destination(msg))) {
+		dprintk(1, "cec_transmit_msg: destination is the adapter itself\n");
+		return -EINVAL;
+	}
+	if (cec_msg_initiator(msg) != 0xf &&
+	    !cec_has_log_addr(adap, cec_msg_initiator(msg))) {
+		dprintk(1, "cec_transmit_msg: initiator has unknown logical address %d\n",
+			cec_msg_initiator(msg));
+		return -EINVAL;
+	}
+	if (!adap->is_configured && !adap->is_configuring)
+		return -ENONET;
+
+	if (adap->transmit_queue_sz >= CEC_MAX_MSG_TX_QUEUE_SZ)
+		return -EBUSY;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	if (msg->len > 1 && msg->msg[1] == CEC_MSG_CDC_MESSAGE) {
+		msg->msg[2] = adap->phys_addr >> 8;
+		msg->msg[3] = adap->phys_addr & 0xff;
+	}
+
+	if (msg->timeout)
+		dprintk(2, "cec_transmit_msg: %*ph (wait for 0x%02x%s)\n",
+			msg->len, msg->msg, msg->reply, !block ? ", nb" : "");
+	else
+		dprintk(2, "cec_transmit_msg: %*ph%s\n",
+			msg->len, msg->msg, !block ? " (nb)" : "");
+
+	data->msg = *msg;
+	data->fh = fh;
+	data->adap = adap;
+	data->blocking = block;
+
+	/*
+	 * Determine if this message follows a message from the same
+	 * initiator. Needed to determine the free signal time later on.
+	 */
+	if (msg->len > 1) {
+		if (!(list_empty(&adap->transmit_queue))) {
+			const struct cec_data *last;
+
+			last = list_last_entry(&adap->transmit_queue,
+					       const struct cec_data, list);
+			last_initiator = cec_msg_initiator(&last->msg);
+		} else if (adap->transmitting) {
+			last_initiator =
+				cec_msg_initiator(&adap->transmitting->msg);
+		}
+	}
+	data->new_initiator = last_initiator != cec_msg_initiator(msg);
+	init_completion(&data->c);
+	INIT_DELAYED_WORK(&data->work, cec_wait_timeout);
+
+	if (fh)
+		list_add_tail(&data->xfer_list, &fh->xfer_list);
+	list_add_tail(&data->list, &adap->transmit_queue);
+	adap->transmit_queue_sz++;
+	if (!adap->transmitting)
+		wake_up_interruptible(&adap->kthread_waitq);
+
+	/* All done if we don't need to block waiting for completion */
+	if (!block)
+		return 0;
+
+	/*
+	 * If we don't get a completion before this time something is really
+	 * wrong and we time out.
+	 */
+	timeout = CEC_XFER_TIMEOUT_MS;
+	/* Add the requested timeout if we have to wait for a reply as well */
+	if (msg->timeout)
+		timeout += msg->timeout;
+
+	/*
+	 * Release the lock and wait, retake the lock afterwards.
+	 */
+	mutex_unlock(&adap->lock);
+	res = wait_for_completion_killable_timeout(&data->c,
+						   msecs_to_jiffies(timeout));
+	mutex_lock(&adap->lock);
+
+	if (data->completed) {
+		/* The transmit completed (possibly with an error) */
+		*msg = data->msg;
+		kfree(data);
+		return 0;
+	}
+	/*
+	 * The wait for completion timed out or was interrupted, so mark this
+	 * as non-blocking and disconnect from the filehandle since it is
+	 * still 'in flight'. When it finally completes it will just drop the
+	 * result silently.
+	 */
+	data->blocking = false;
+	if (data->fh)
+		list_del(&data->xfer_list);
+	data->fh = NULL;
+
+	if (res == 0) { /* timed out */
+		/* Check if the reply or the transmit failed */
+		if (msg->timeout && (msg->tx_status & CEC_TX_STATUS_OK))
+			msg->rx_status = CEC_RX_STATUS_TIMEOUT;
+		else
+			msg->tx_status = CEC_TX_STATUS_MAX_RETRIES;
+	}
+	return res > 0 ? 0 : res;
+}
+
+/* Helper function to be used by drivers and this framework. */
+int cec_transmit_msg(struct cec_adapter *adap, struct cec_msg *msg,
+		     bool block)
+{
+	int ret;
+
+	mutex_lock(&adap->lock);
+	ret = cec_transmit_msg_fh(adap, msg, NULL, block);
+	mutex_unlock(&adap->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cec_transmit_msg);
+
+/*
+ * I don't like forward references but without this the low-level
+ * cec_received_msg() function would come after a bunch of high-level
+ * CEC protocol handling functions. That was very confusing.
+ */
+static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
+			      bool is_reply);
+
+#define DIRECTED	0x80
+#define BCAST1_4	0x40
+#define BCAST2_0	0x20	/* broadcast only allowed for >= 2.0 */
+#define BCAST		(BCAST1_4 | BCAST2_0)
+#define BOTH		(BCAST | DIRECTED)
+
+/*
+ * Specify minimum length and whether the message is directed, broadcast
+ * or both. Messages that do not match the criteria are ignored as per
+ * the CEC specification.
+ */
+static const u8 cec_msg_size[256] = {
+	[CEC_MSG_ACTIVE_SOURCE] = 4 | BCAST,
+	[CEC_MSG_IMAGE_VIEW_ON] = 2 | DIRECTED,
+	[CEC_MSG_TEXT_VIEW_ON] = 2 | DIRECTED,
+	[CEC_MSG_INACTIVE_SOURCE] = 4 | DIRECTED,
+	[CEC_MSG_REQUEST_ACTIVE_SOURCE] = 2 | BCAST,
+	[CEC_MSG_ROUTING_CHANGE] = 6 | BCAST,
+	[CEC_MSG_ROUTING_INFORMATION] = 4 | BCAST,
+	[CEC_MSG_SET_STREAM_PATH] = 4 | BCAST,
+	[CEC_MSG_STANDBY] = 2 | BOTH,
+	[CEC_MSG_RECORD_OFF] = 2 | DIRECTED,
+	[CEC_MSG_RECORD_ON] = 3 | DIRECTED,
+	[CEC_MSG_RECORD_STATUS] = 3 | DIRECTED,
+	[CEC_MSG_RECORD_TV_SCREEN] = 2 | DIRECTED,
+	[CEC_MSG_CLEAR_ANALOGUE_TIMER] = 13 | DIRECTED,
+	[CEC_MSG_CLEAR_DIGITAL_TIMER] = 16 | DIRECTED,
+	[CEC_MSG_CLEAR_EXT_TIMER] = 13 | DIRECTED,
+	[CEC_MSG_SET_ANALOGUE_TIMER] = 13 | DIRECTED,
+	[CEC_MSG_SET_DIGITAL_TIMER] = 16 | DIRECTED,
+	[CEC_MSG_SET_EXT_TIMER] = 13 | DIRECTED,
+	[CEC_MSG_SET_TIMER_PROGRAM_TITLE] = 2 | DIRECTED,
+	[CEC_MSG_TIMER_CLEARED_STATUS] = 3 | DIRECTED,
+	[CEC_MSG_TIMER_STATUS] = 3 | DIRECTED,
+	[CEC_MSG_CEC_VERSION] = 3 | DIRECTED,
+	[CEC_MSG_GET_CEC_VERSION] = 2 | DIRECTED,
+	[CEC_MSG_GIVE_PHYSICAL_ADDR] = 2 | DIRECTED,
+	[CEC_MSG_GET_MENU_LANGUAGE] = 2 | DIRECTED,
+	[CEC_MSG_REPORT_PHYSICAL_ADDR] = 5 | BCAST,
+	[CEC_MSG_SET_MENU_LANGUAGE] = 5 | BCAST,
+	[CEC_MSG_REPORT_FEATURES] = 6 | BCAST,
+	[CEC_MSG_GIVE_FEATURES] = 2 | DIRECTED,
+	[CEC_MSG_DECK_CONTROL] = 3 | DIRECTED,
+	[CEC_MSG_DECK_STATUS] = 3 | DIRECTED,
+	[CEC_MSG_GIVE_DECK_STATUS] = 3 | DIRECTED,
+	[CEC_MSG_PLAY] = 3 | DIRECTED,
+	[CEC_MSG_GIVE_TUNER_DEVICE_STATUS] = 3 | DIRECTED,
+	[CEC_MSG_SELECT_ANALOGUE_SERVICE] = 6 | DIRECTED,
+	[CEC_MSG_SELECT_DIGITAL_SERVICE] = 9 | DIRECTED,
+	[CEC_MSG_TUNER_DEVICE_STATUS] = 7 | DIRECTED,
+	[CEC_MSG_TUNER_STEP_DECREMENT] = 2 | DIRECTED,
+	[CEC_MSG_TUNER_STEP_INCREMENT] = 2 | DIRECTED,
+	[CEC_MSG_DEVICE_VENDOR_ID] = 5 | BCAST,
+	[CEC_MSG_GIVE_DEVICE_VENDOR_ID] = 2 | DIRECTED,
+	[CEC_MSG_VENDOR_COMMAND] = 2 | DIRECTED,
+	[CEC_MSG_VENDOR_COMMAND_WITH_ID] = 5 | BOTH,
+	[CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN] = 2 | BOTH,
+	[CEC_MSG_VENDOR_REMOTE_BUTTON_UP] = 2 | BOTH,
+	[CEC_MSG_SET_OSD_STRING] = 3 | DIRECTED,
+	[CEC_MSG_GIVE_OSD_NAME] = 2 | DIRECTED,
+	[CEC_MSG_SET_OSD_NAME] = 2 | DIRECTED,
+	[CEC_MSG_MENU_REQUEST] = 3 | DIRECTED,
+	[CEC_MSG_MENU_STATUS] = 3 | DIRECTED,
+	[CEC_MSG_USER_CONTROL_PRESSED] = 3 | DIRECTED,
+	[CEC_MSG_USER_CONTROL_RELEASED] = 2 | DIRECTED,
+	[CEC_MSG_GIVE_DEVICE_POWER_STATUS] = 2 | DIRECTED,
+	[CEC_MSG_REPORT_POWER_STATUS] = 3 | DIRECTED | BCAST2_0,
+	[CEC_MSG_FEATURE_ABORT] = 4 | DIRECTED,
+	[CEC_MSG_ABORT] = 2 | DIRECTED,
+	[CEC_MSG_GIVE_AUDIO_STATUS] = 2 | DIRECTED,
+	[CEC_MSG_GIVE_SYSTEM_AUDIO_MODE_STATUS] = 2 | DIRECTED,
+	[CEC_MSG_REPORT_AUDIO_STATUS] = 3 | DIRECTED,
+	[CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR] = 2 | DIRECTED,
+	[CEC_MSG_REQUEST_SHORT_AUDIO_DESCRIPTOR] = 2 | DIRECTED,
+	[CEC_MSG_SET_SYSTEM_AUDIO_MODE] = 3 | BOTH,
+	[CEC_MSG_SYSTEM_AUDIO_MODE_REQUEST] = 2 | DIRECTED,
+	[CEC_MSG_SYSTEM_AUDIO_MODE_STATUS] = 3 | DIRECTED,
+	[CEC_MSG_SET_AUDIO_RATE] = 3 | DIRECTED,
+	[CEC_MSG_INITIATE_ARC] = 2 | DIRECTED,
+	[CEC_MSG_REPORT_ARC_INITIATED] = 2 | DIRECTED,
+	[CEC_MSG_REPORT_ARC_TERMINATED] = 2 | DIRECTED,
+	[CEC_MSG_REQUEST_ARC_INITIATION] = 2 | DIRECTED,
+	[CEC_MSG_REQUEST_ARC_TERMINATION] = 2 | DIRECTED,
+	[CEC_MSG_TERMINATE_ARC] = 2 | DIRECTED,
+	[CEC_MSG_REQUEST_CURRENT_LATENCY] = 4 | BCAST,
+	[CEC_MSG_REPORT_CURRENT_LATENCY] = 7 | BCAST,
+	[CEC_MSG_CDC_MESSAGE] = 2 | BCAST,
+};
+
+/* Called by the CEC adapter if a message is received */
+void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg)
+{
+	struct cec_data *data;
+	u8 msg_init = cec_msg_initiator(msg);
+	u8 msg_dest = cec_msg_destination(msg);
+	u8 cmd = msg->msg[1];
+	bool is_reply = false;
+	bool valid_la = true;
+	u8 min_len = 0;
+
+	if (WARN_ON(!msg->len || msg->len > CEC_MAX_MSG_SIZE))
+		return;
+
+	msg->rx_ts = ktime_get_ns();
+	msg->rx_status = CEC_RX_STATUS_OK;
+	msg->sequence = msg->reply = msg->timeout = 0;
+	msg->tx_status = 0;
+	msg->tx_ts = 0;
+	msg->flags = 0;
+	memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len);
+
+	mutex_lock(&adap->lock);
+	dprintk(2, "cec_received_msg: %*ph\n", msg->len, msg->msg);
+
+	/* Check if this message was for us (directed or broadcast). */
+	if (!cec_msg_is_broadcast(msg))
+		valid_la = cec_has_log_addr(adap, msg_dest);
+
+	/*
+	 * Check if the length is not too short or if the message is a
+	 * broadcast message where a directed message was expected or
+	 * vice versa. If so, then the message has to be ignored (according
+	 * to section CEC 7.3 and CEC 12.2).
+	 */
+	if (valid_la && msg->len > 1 && cec_msg_size[cmd]) {
+		u8 dir_fl = cec_msg_size[cmd] & BOTH;
+
+		min_len = cec_msg_size[cmd] & 0x1f;
+		if (msg->len < min_len)
+			valid_la = false;
+		else if (!cec_msg_is_broadcast(msg) && !(dir_fl & DIRECTED))
+			valid_la = false;
+		else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST1_4))
+			valid_la = false;
+		else if (cec_msg_is_broadcast(msg) &&
+			 adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0 &&
+			 !(dir_fl & BCAST2_0))
+			valid_la = false;
+	}
+	if (valid_la && min_len) {
+		/* These messages have special length requirements */
+		switch (cmd) {
+		case CEC_MSG_TIMER_STATUS:
+			if (msg->msg[2] & 0x10) {
+				switch (msg->msg[2] & 0xf) {
+				case CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE:
+				case CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE:
+					if (msg->len < 5)
+						valid_la = false;
+					break;
+				}
+			} else if ((msg->msg[2] & 0xf) == CEC_OP_PROG_ERROR_DUPLICATE) {
+				if (msg->len < 5)
+					valid_la = false;
+			}
+			break;
+		case CEC_MSG_RECORD_ON:
+			switch (msg->msg[2]) {
+			case CEC_OP_RECORD_SRC_OWN:
+				break;
+			case CEC_OP_RECORD_SRC_DIGITAL:
+				if (msg->len < 10)
+					valid_la = false;
+				break;
+			case CEC_OP_RECORD_SRC_ANALOG:
+				if (msg->len < 7)
+					valid_la = false;
+				break;
+			case CEC_OP_RECORD_SRC_EXT_PLUG:
+				if (msg->len < 4)
+					valid_la = false;
+				break;
+			case CEC_OP_RECORD_SRC_EXT_PHYS_ADDR:
+				if (msg->len < 5)
+					valid_la = false;
+				break;
+			}
+			break;
+		}
+	}
+
+	/* It's a valid message and not a poll or CDC message */
+	if (valid_la && msg->len > 1 && cmd != CEC_MSG_CDC_MESSAGE) {
+		bool abort = cmd == CEC_MSG_FEATURE_ABORT;
+
+		/* The aborted command is in msg[2] */
+		if (abort)
+			cmd = msg->msg[2];
+
+		/*
+		 * Walk over all transmitted messages that are waiting for a
+		 * reply.
+		 */
+		list_for_each_entry(data, &adap->wait_queue, list) {
+			struct cec_msg *dst = &data->msg;
+
+			/*
+			 * The *only* CEC message that has two possible replies
+			 * is CEC_MSG_INITIATE_ARC.
+			 * In this case allow either of the two replies.
+			 */
+			if (!abort && dst->msg[1] == CEC_MSG_INITIATE_ARC &&
+			    (cmd == CEC_MSG_REPORT_ARC_INITIATED ||
+			     cmd == CEC_MSG_REPORT_ARC_TERMINATED) &&
+			    (dst->reply == CEC_MSG_REPORT_ARC_INITIATED ||
+			     dst->reply == CEC_MSG_REPORT_ARC_TERMINATED))
+				dst->reply = cmd;
+
+			/* Does the command match? */
+			if ((abort && cmd != dst->msg[1]) ||
+			    (!abort && cmd != dst->reply))
+				continue;
+
+			/* Does the addressing match? */
+			if (msg_init != cec_msg_destination(dst) &&
+			    !cec_msg_is_broadcast(dst))
+				continue;
+
+			/* We got a reply */
+			memcpy(dst->msg, msg->msg, msg->len);
+			dst->len = msg->len;
+			dst->rx_ts = msg->rx_ts;
+			dst->rx_status = msg->rx_status;
+			if (abort)
+				dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT;
+			msg->flags = dst->flags;
+			/* Remove it from the wait_queue */
+			list_del_init(&data->list);
+
+			/* Cancel the pending timeout work */
+			if (!cancel_delayed_work(&data->work)) {
+				mutex_unlock(&adap->lock);
+				flush_scheduled_work();
+				mutex_lock(&adap->lock);
+			}
+			/*
+			 * Mark this as a reply, provided someone is still
+			 * waiting for the answer.
+			 */
+			if (data->fh)
+				is_reply = true;
+			cec_data_completed(data);
+			break;
+		}
+	}
+	mutex_unlock(&adap->lock);
+
+	/* Pass the message on to any monitoring filehandles */
+	cec_queue_msg_monitor(adap, msg, valid_la);
+
+	/* We're done if it is not for us or a poll message */
+	if (!valid_la || msg->len <= 1)
+		return;
+
+	if (adap->log_addrs.log_addr_mask == 0)
+		return;
+
+	/*
+	 * Process the message on the protocol level. If is_reply is true,
+	 * then cec_receive_notify() won't pass on the reply to the listener(s)
+	 * since that was already done by cec_data_completed() above.
+	 */
+	cec_receive_notify(adap, msg, is_reply);
+}
+EXPORT_SYMBOL_GPL(cec_received_msg);
+
+/* Logical Address Handling */
+
+/*
+ * Attempt to claim a specific logical address.
+ *
+ * This function is called with adap->lock held.
+ */
+static int cec_config_log_addr(struct cec_adapter *adap,
+			       unsigned int idx,
+			       unsigned int log_addr)
+{
+	struct cec_log_addrs *las = &adap->log_addrs;
+	struct cec_msg msg = { };
+	int err;
+
+	if (cec_has_log_addr(adap, log_addr))
+		return 0;
+
+	/* Send poll message */
+	msg.len = 1;
+	msg.msg[0] = 0xf0 | log_addr;
+	err = cec_transmit_msg_fh(adap, &msg, NULL, true);
+
+	/*
+	 * While trying to poll the physical address was reset
+	 * and the adapter was unconfigured, so bail out.
+	 */
+	if (!adap->is_configuring)
+		return -EINTR;
+
+	if (err)
+		return err;
+
+	if (msg.tx_status & CEC_TX_STATUS_OK)
+		return 0;
+
+	/*
+	 * Message not acknowledged, so this logical
+	 * address is free to use.
+	 */
+	err = adap->ops->adap_log_addr(adap, log_addr);
+	if (err)
+		return err;
+
+	las->log_addr[idx] = log_addr;
+	las->log_addr_mask |= 1 << log_addr;
+	adap->phys_addrs[log_addr] = adap->phys_addr;
+
+	dprintk(2, "claimed addr %d (%d)\n", log_addr,
+		las->primary_device_type[idx]);
+	return 1;
+}
+
+/*
+ * Unconfigure the adapter: clear all logical addresses and send
+ * the state changed event.
+ *
+ * This function is called with adap->lock held.
+ */
+static void cec_adap_unconfigure(struct cec_adapter *adap)
+{
+	WARN_ON(adap->ops->adap_log_addr(adap, CEC_LOG_ADDR_INVALID));
+	adap->log_addrs.log_addr_mask = 0;
+	adap->is_configuring = false;
+	adap->is_configured = false;
+	memset(adap->phys_addrs, 0xff, sizeof(adap->phys_addrs));
+	wake_up_interruptible(&adap->kthread_waitq);
+	cec_post_state_event(adap);
+}
+
+/*
+ * Attempt to claim the required logical addresses.
+ */
+static int cec_config_thread_func(void *arg)
+{
+	/* The various LAs for each type of device */
+	static const u8 tv_log_addrs[] = {
+		CEC_LOG_ADDR_TV, CEC_LOG_ADDR_SPECIFIC,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 record_log_addrs[] = {
+		CEC_LOG_ADDR_RECORD_1, CEC_LOG_ADDR_RECORD_2,
+		CEC_LOG_ADDR_RECORD_3,
+		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 tuner_log_addrs[] = {
+		CEC_LOG_ADDR_TUNER_1, CEC_LOG_ADDR_TUNER_2,
+		CEC_LOG_ADDR_TUNER_3, CEC_LOG_ADDR_TUNER_4,
+		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 playback_log_addrs[] = {
+		CEC_LOG_ADDR_PLAYBACK_1, CEC_LOG_ADDR_PLAYBACK_2,
+		CEC_LOG_ADDR_PLAYBACK_3,
+		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 audiosystem_log_addrs[] = {
+		CEC_LOG_ADDR_AUDIOSYSTEM,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 specific_use_log_addrs[] = {
+		CEC_LOG_ADDR_SPECIFIC,
+		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 *type2addrs[6] = {
+		[CEC_LOG_ADDR_TYPE_TV] = tv_log_addrs,
+		[CEC_LOG_ADDR_TYPE_RECORD] = record_log_addrs,
+		[CEC_LOG_ADDR_TYPE_TUNER] = tuner_log_addrs,
+		[CEC_LOG_ADDR_TYPE_PLAYBACK] = playback_log_addrs,
+		[CEC_LOG_ADDR_TYPE_AUDIOSYSTEM] = audiosystem_log_addrs,
+		[CEC_LOG_ADDR_TYPE_SPECIFIC] = specific_use_log_addrs,
+	};
+	static const u16 type2mask[] = {
+		[CEC_LOG_ADDR_TYPE_TV] = CEC_LOG_ADDR_MASK_TV,
+		[CEC_LOG_ADDR_TYPE_RECORD] = CEC_LOG_ADDR_MASK_RECORD,
+		[CEC_LOG_ADDR_TYPE_TUNER] = CEC_LOG_ADDR_MASK_TUNER,
+		[CEC_LOG_ADDR_TYPE_PLAYBACK] = CEC_LOG_ADDR_MASK_PLAYBACK,
+		[CEC_LOG_ADDR_TYPE_AUDIOSYSTEM] = CEC_LOG_ADDR_MASK_AUDIOSYSTEM,
+		[CEC_LOG_ADDR_TYPE_SPECIFIC] = CEC_LOG_ADDR_MASK_SPECIFIC,
+	};
+	struct cec_adapter *adap = arg;
+	struct cec_log_addrs *las = &adap->log_addrs;
+	int err;
+	int i, j;
+
+	mutex_lock(&adap->lock);
+	dprintk(1, "physical address: %x.%x.%x.%x, claim %d logical addresses\n",
+		cec_phys_addr_exp(adap->phys_addr), las->num_log_addrs);
+	las->log_addr_mask = 0;
+
+	if (las->log_addr_type[0] == CEC_LOG_ADDR_TYPE_UNREGISTERED)
+		goto configured;
+
+	for (i = 0; i < las->num_log_addrs; i++) {
+		unsigned int type = las->log_addr_type[i];
+		const u8 *la_list;
+		u8 last_la;
+
+		/*
+		 * The TV functionality can only map to physical address 0.
+		 * For any other address, try the Specific functionality
+		 * instead as per the spec.
+		 */
+		if (adap->phys_addr && type == CEC_LOG_ADDR_TYPE_TV)
+			type = CEC_LOG_ADDR_TYPE_SPECIFIC;
+
+		la_list = type2addrs[type];
+		last_la = las->log_addr[i];
+		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
+		if (last_la == CEC_LOG_ADDR_INVALID ||
+		    last_la == CEC_LOG_ADDR_UNREGISTERED ||
+		    !(last_la & type2mask[type]))
+			last_la = la_list[0];
+
+		err = cec_config_log_addr(adap, i, last_la);
+		if (err > 0) /* Reused last LA */
+			continue;
+
+		if (err < 0)
+			goto unconfigure;
+
+		for (j = 0; la_list[j] != CEC_LOG_ADDR_INVALID; j++) {
+			/* Tried this one already, skip it */
+			if (la_list[j] == last_la)
+				continue;
+			/* The backup addresses are CEC 2.0 specific */
+			if ((la_list[j] == CEC_LOG_ADDR_BACKUP_1 ||
+			     la_list[j] == CEC_LOG_ADDR_BACKUP_2) &&
+			    las->cec_version < CEC_OP_CEC_VERSION_2_0)
+				continue;
+
+			err = cec_config_log_addr(adap, i, la_list[j]);
+			if (err == 0) /* LA is in use */
+				continue;
+			if (err < 0)
+				goto unconfigure;
+			/* Done, claimed an LA */
+			break;
+		}
+
+		if (la_list[j] == CEC_LOG_ADDR_INVALID)
+			dprintk(1, "could not claim LA %d\n", i);
+	}
+
+	if (adap->log_addrs.log_addr_mask == 0 &&
+	    !(las->flags & CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK))
+		goto unconfigure;
+
+configured:
+	if (adap->log_addrs.log_addr_mask == 0) {
+		/* Fall back to unregistered */
+		las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED;
+		las->log_addr_mask = 1 << las->log_addr[0];
+		for (i = 1; i < las->num_log_addrs; i++)
+			las->log_addr[i] = CEC_LOG_ADDR_INVALID;
+	}
+	adap->is_configured = true;
+	adap->is_configuring = false;
+	cec_post_state_event(adap);
+	mutex_unlock(&adap->lock);
+
+	for (i = 0; i < las->num_log_addrs; i++) {
+		if (las->log_addr[i] == CEC_LOG_ADDR_INVALID ||
+		    (las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY))
+			continue;
+
+		/*
+		 * Report Features must come first according
+		 * to CEC 2.0
+		 */
+		if (las->log_addr[i] != CEC_LOG_ADDR_UNREGISTERED)
+			cec_report_features(adap, i);
+		cec_report_phys_addr(adap, i);
+	}
+	for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++)
+		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
+	mutex_lock(&adap->lock);
+	adap->kthread_config = NULL;
+	mutex_unlock(&adap->lock);
+	complete(&adap->config_completion);
+	return 0;
+
+unconfigure:
+	for (i = 0; i < las->num_log_addrs; i++)
+		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
+	cec_adap_unconfigure(adap);
+	adap->kthread_config = NULL;
+	mutex_unlock(&adap->lock);
+	complete(&adap->config_completion);
+	return 0;
+}
+
+/*
+ * Called from either __cec_s_phys_addr or __cec_s_log_addrs to claim the
+ * logical addresses.
+ *
+ * This function is called with adap->lock held.
+ */
+static void cec_claim_log_addrs(struct cec_adapter *adap, bool block)
+{
+	if (WARN_ON(adap->is_configuring || adap->is_configured))
+		return;
+
+	init_completion(&adap->config_completion);
+
+	/* Ready to kick off the thread */
+	adap->is_configuring = true;
+	adap->kthread_config = kthread_run(cec_config_thread_func, adap,
+					   "ceccfg-%s", adap->name);
+	if (IS_ERR(adap->kthread_config)) {
+		adap->kthread_config = NULL;
+	} else if (block) {
+		mutex_unlock(&adap->lock);
+		wait_for_completion(&adap->config_completion);
+		mutex_lock(&adap->lock);
+	}
+}
+
+/* Set a new physical address and send an event notifying userspace of this.
+ *
+ * This function is called with adap->lock held.
+ */
+void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block)
+{
+	if (phys_addr == adap->phys_addr || adap->devnode.unregistered)
+		return;
+
+	if (phys_addr == CEC_PHYS_ADDR_INVALID ||
+	    adap->phys_addr != CEC_PHYS_ADDR_INVALID) {
+		adap->phys_addr = CEC_PHYS_ADDR_INVALID;
+		cec_post_state_event(adap);
+		cec_adap_unconfigure(adap);
+		/* Disabling monitor all mode should always succeed */
+		if (adap->monitor_all_cnt)
+			WARN_ON(call_op(adap, adap_monitor_all_enable, false));
+		WARN_ON(adap->ops->adap_enable(adap, false));
+		if (phys_addr == CEC_PHYS_ADDR_INVALID)
+			return;
+	}
+
+	if (adap->ops->adap_enable(adap, true))
+		return;
+
+	if (adap->monitor_all_cnt &&
+	    call_op(adap, adap_monitor_all_enable, true)) {
+		WARN_ON(adap->ops->adap_enable(adap, false));
+		return;
+	}
+	adap->phys_addr = phys_addr;
+	cec_post_state_event(adap);
+	if (adap->log_addrs.num_log_addrs)
+		cec_claim_log_addrs(adap, block);
+}
+
+void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block)
+{
+	if (IS_ERR_OR_NULL(adap))
+		return;
+
+	mutex_lock(&adap->lock);
+	__cec_s_phys_addr(adap, phys_addr, block);
+	mutex_unlock(&adap->lock);
+}
+EXPORT_SYMBOL_GPL(cec_s_phys_addr);
+
+/*
+ * Called from either the ioctl or a driver to set the logical addresses.
+ *
+ * This function is called with adap->lock held.
+ */
+int __cec_s_log_addrs(struct cec_adapter *adap,
+		      struct cec_log_addrs *log_addrs, bool block)
+{
+	u16 type_mask = 0;
+	int i;
+
+	if (adap->devnode.unregistered)
+		return -ENODEV;
+
+	if (!log_addrs || log_addrs->num_log_addrs == 0) {
+		adap->log_addrs.num_log_addrs = 0;
+		cec_adap_unconfigure(adap);
+		return 0;
+	}
+
+	if (log_addrs->flags & CEC_LOG_ADDRS_FL_CDC_ONLY) {
+		/*
+		 * Sanitize log_addrs fields if a CDC-Only device is
+		 * requested.
+		 */
+		log_addrs->num_log_addrs = 1;
+		log_addrs->osd_name[0] = '\0';
+		log_addrs->vendor_id = CEC_VENDOR_ID_NONE;
+		log_addrs->log_addr_type[0] = CEC_LOG_ADDR_TYPE_UNREGISTERED;
+		/*
+		 * This is just an internal convention since a CDC-Only device
+		 * doesn't have to be a switch. But switches already use
+		 * unregistered, so it makes some kind of sense to pick this
+		 * as the primary device. Since a CDC-Only device never sends
+		 * any 'normal' CEC messages this primary device type is never
+		 * sent over the CEC bus.
+		 */
+		log_addrs->primary_device_type[0] = CEC_OP_PRIM_DEVTYPE_SWITCH;
+		log_addrs->all_device_types[0] = 0;
+		log_addrs->features[0][0] = 0;
+		log_addrs->features[0][1] = 0;
+	}
+
+	/* Ensure the osd name is 0-terminated */
+	log_addrs->osd_name[sizeof(log_addrs->osd_name) - 1] = '\0';
+
+	/* Sanity checks */
+	if (log_addrs->num_log_addrs > adap->available_log_addrs) {
+		dprintk(1, "num_log_addrs > %d\n", adap->available_log_addrs);
+		return -EINVAL;
+	}
+
+	/*
+	 * Vendor ID is a 24 bit number, so check if the value is
+	 * within the correct range.
+	 */
+	if (log_addrs->vendor_id != CEC_VENDOR_ID_NONE &&
+	    (log_addrs->vendor_id & 0xff000000) != 0)
+		return -EINVAL;
+
+	if (log_addrs->cec_version != CEC_OP_CEC_VERSION_1_4 &&
+	    log_addrs->cec_version != CEC_OP_CEC_VERSION_2_0)
+		return -EINVAL;
+
+	if (log_addrs->num_log_addrs > 1)
+		for (i = 0; i < log_addrs->num_log_addrs; i++)
+			if (log_addrs->log_addr_type[i] ==
+					CEC_LOG_ADDR_TYPE_UNREGISTERED) {
+				dprintk(1, "num_log_addrs > 1 can't be combined with unregistered LA\n");
+				return -EINVAL;
+			}
+
+	for (i = 0; i < log_addrs->num_log_addrs; i++) {
+		const u8 feature_sz = ARRAY_SIZE(log_addrs->features[0]);
+		u8 *features = log_addrs->features[i];
+		bool op_is_dev_features = false;
+
+		log_addrs->log_addr[i] = CEC_LOG_ADDR_INVALID;
+		if (type_mask & (1 << log_addrs->log_addr_type[i])) {
+			dprintk(1, "duplicate logical address type\n");
+			return -EINVAL;
+		}
+		type_mask |= 1 << log_addrs->log_addr_type[i];
+		if ((type_mask & (1 << CEC_LOG_ADDR_TYPE_RECORD)) &&
+		    (type_mask & (1 << CEC_LOG_ADDR_TYPE_PLAYBACK))) {
+			/* Record already contains the playback functionality */
+			dprintk(1, "invalid record + playback combination\n");
+			return -EINVAL;
+		}
+		if (log_addrs->primary_device_type[i] >
+					CEC_OP_PRIM_DEVTYPE_PROCESSOR) {
+			dprintk(1, "unknown primary device type\n");
+			return -EINVAL;
+		}
+		if (log_addrs->primary_device_type[i] == 2) {
+			dprintk(1, "invalid primary device type\n");
+			return -EINVAL;
+		}
+		if (log_addrs->log_addr_type[i] > CEC_LOG_ADDR_TYPE_UNREGISTERED) {
+			dprintk(1, "unknown logical address type\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < feature_sz; i++) {
+			if ((features[i] & 0x80) == 0) {
+				if (op_is_dev_features)
+					break;
+				op_is_dev_features = true;
+			}
+		}
+		if (!op_is_dev_features || i == feature_sz) {
+			dprintk(1, "malformed features\n");
+			return -EINVAL;
+		}
+		/* Zero unused part of the feature array */
+		memset(features + i + 1, 0, feature_sz - i - 1);
+	}
+
+	if (log_addrs->cec_version >= CEC_OP_CEC_VERSION_2_0) {
+		if (log_addrs->num_log_addrs > 2) {
+			dprintk(1, "CEC 2.0 allows no more than 2 logical addresses\n");
+			return -EINVAL;
+		}
+		if (log_addrs->num_log_addrs == 2) {
+			if (!(type_mask & ((1 << CEC_LOG_ADDR_TYPE_AUDIOSYSTEM) |
+					   (1 << CEC_LOG_ADDR_TYPE_TV)))) {
+				dprintk(1, "Two LAs is only allowed for audiosystem and TV\n");
+				return -EINVAL;
+			}
+			if (!(type_mask & ((1 << CEC_LOG_ADDR_TYPE_PLAYBACK) |
+					   (1 << CEC_LOG_ADDR_TYPE_RECORD)))) {
+				dprintk(1, "An audiosystem/TV can only be combined with record or playback\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* Zero unused LAs */
+	for (i = log_addrs->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) {
+		log_addrs->primary_device_type[i] = 0;
+		log_addrs->log_addr_type[i] = 0;
+		log_addrs->all_device_types[i] = 0;
+		memset(log_addrs->features[i], 0,
+		       sizeof(log_addrs->features[i]));
+	}
+
+	log_addrs->log_addr_mask = adap->log_addrs.log_addr_mask;
+	adap->log_addrs = *log_addrs;
+	if (adap->phys_addr != CEC_PHYS_ADDR_INVALID)
+		cec_claim_log_addrs(adap, block);
+	return 0;
+}
+
+int cec_s_log_addrs(struct cec_adapter *adap,
+		    struct cec_log_addrs *log_addrs, bool block)
+{
+	int err;
+
+	mutex_lock(&adap->lock);
+	err = __cec_s_log_addrs(adap, log_addrs, block);
+	mutex_unlock(&adap->lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(cec_s_log_addrs);
+
+/* High-level core CEC message handling */
+
+/* Transmit the Report Features message */
+static int cec_report_features(struct cec_adapter *adap, unsigned int la_idx)
+{
+	struct cec_msg msg = { };
+	const struct cec_log_addrs *las = &adap->log_addrs;
+	const u8 *features = las->features[la_idx];
+	bool op_is_dev_features = false;
+	unsigned int idx;
+
+	/* This is 2.0 and up only */
+	if (adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0)
+		return 0;
+
+	/* Report Features */
+	msg.msg[0] = (las->log_addr[la_idx] << 4) | 0x0f;
+	msg.len = 4;
+	msg.msg[1] = CEC_MSG_REPORT_FEATURES;
+	msg.msg[2] = adap->log_addrs.cec_version;
+	msg.msg[3] = las->all_device_types[la_idx];
+
+	/* Write RC Profiles first, then Device Features */
+	for (idx = 0; idx < ARRAY_SIZE(las->features[0]); idx++) {
+		msg.msg[msg.len++] = features[idx];
+		if ((features[idx] & CEC_OP_FEAT_EXT) == 0) {
+			if (op_is_dev_features)
+				break;
+			op_is_dev_features = true;
+		}
+	}
+	return cec_transmit_msg(adap, &msg, false);
+}
+
+/* Transmit the Report Physical Address message */
+static int cec_report_phys_addr(struct cec_adapter *adap, unsigned int la_idx)
+{
+	const struct cec_log_addrs *las = &adap->log_addrs;
+	struct cec_msg msg = { };
+
+	/* Report Physical Address */
+	msg.msg[0] = (las->log_addr[la_idx] << 4) | 0x0f;
+	cec_msg_report_physical_addr(&msg, adap->phys_addr,
+				     las->primary_device_type[la_idx]);
+	dprintk(2, "config: la %d pa %x.%x.%x.%x\n",
+		las->log_addr[la_idx],
+			cec_phys_addr_exp(adap->phys_addr));
+	return cec_transmit_msg(adap, &msg, false);
+}
+
+/* Transmit the Feature Abort message */
+static int cec_feature_abort_reason(struct cec_adapter *adap,
+				    struct cec_msg *msg, u8 reason)
+{
+	struct cec_msg tx_msg = { };
+
+	/*
+	 * Don't reply with CEC_MSG_FEATURE_ABORT to a CEC_MSG_FEATURE_ABORT
+	 * message!
+	 */
+	if (msg->msg[1] == CEC_MSG_FEATURE_ABORT)
+		return 0;
+	cec_msg_set_reply_to(&tx_msg, msg);
+	cec_msg_feature_abort(&tx_msg, msg->msg[1], reason);
+	return cec_transmit_msg(adap, &tx_msg, false);
+}
+
+static int cec_feature_abort(struct cec_adapter *adap, struct cec_msg *msg)
+{
+	return cec_feature_abort_reason(adap, msg,
+					CEC_OP_ABORT_UNRECOGNIZED_OP);
+}
+
+static int cec_feature_refused(struct cec_adapter *adap, struct cec_msg *msg)
+{
+	return cec_feature_abort_reason(adap, msg,
+					CEC_OP_ABORT_REFUSED);
+}
+
+/*
+ * Called when a CEC message is received. This function will do any
+ * necessary core processing. The is_reply bool is true if this message
+ * is a reply to an earlier transmit.
+ *
+ * The message is either a broadcast message or a valid directed message.
+ */
+static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
+			      bool is_reply)
+{
+	bool is_broadcast = cec_msg_is_broadcast(msg);
+	u8 dest_laddr = cec_msg_destination(msg);
+	u8 init_laddr = cec_msg_initiator(msg);
+	u8 devtype = cec_log_addr2dev(adap, dest_laddr);
+	int la_idx = cec_log_addr2idx(adap, dest_laddr);
+	bool from_unregistered = init_laddr == 0xf;
+	struct cec_msg tx_cec_msg = { };
+
+	dprintk(1, "cec_receive_notify: %*ph\n", msg->len, msg->msg);
+
+	/* If this is a CDC-Only device, then ignore any non-CDC messages */
+	if (cec_is_cdc_only(&adap->log_addrs) &&
+	    msg->msg[1] != CEC_MSG_CDC_MESSAGE)
+		return 0;
+
+	if (adap->ops->received) {
+		/* Allow drivers to process the message first */
+		if (adap->ops->received(adap, msg) != -ENOMSG)
+			return 0;
+	}
+
+	/*
+	 * REPORT_PHYSICAL_ADDR, CEC_MSG_USER_CONTROL_PRESSED and
+	 * CEC_MSG_USER_CONTROL_RELEASED messages always have to be
+	 * handled by the CEC core, even if the passthrough mode is on.
+	 * The others are just ignored if passthrough mode is on.
+	 */
+	switch (msg->msg[1]) {
+	case CEC_MSG_GET_CEC_VERSION:
+	case CEC_MSG_GIVE_DEVICE_VENDOR_ID:
+	case CEC_MSG_ABORT:
+	case CEC_MSG_GIVE_DEVICE_POWER_STATUS:
+	case CEC_MSG_GIVE_PHYSICAL_ADDR:
+	case CEC_MSG_GIVE_OSD_NAME:
+	case CEC_MSG_GIVE_FEATURES:
+		/*
+		 * Skip processing these messages if the passthrough mode
+		 * is on.
+		 */
+		if (adap->passthrough)
+			goto skip_processing;
+		/* Ignore if addressing is wrong */
+		if (is_broadcast || from_unregistered)
+			return 0;
+		break;
+
+	case CEC_MSG_USER_CONTROL_PRESSED:
+	case CEC_MSG_USER_CONTROL_RELEASED:
+		/* Wrong addressing mode: don't process */
+		if (is_broadcast || from_unregistered)
+			goto skip_processing;
+		break;
+
+	case CEC_MSG_REPORT_PHYSICAL_ADDR:
+		/*
+		 * This message is always processed, regardless of the
+		 * passthrough setting.
+		 *
+		 * Exception: don't process if wrong addressing mode.
+		 */
+		if (!is_broadcast)
+			goto skip_processing;
+		break;
+
+	default:
+		break;
+	}
+
+	cec_msg_set_reply_to(&tx_cec_msg, msg);
+
+	switch (msg->msg[1]) {
+	/* The following messages are processed but still passed through */
+	case CEC_MSG_REPORT_PHYSICAL_ADDR: {
+		u16 pa = (msg->msg[2] << 8) | msg->msg[3];
+
+		if (!from_unregistered)
+			adap->phys_addrs[init_laddr] = pa;
+		dprintk(1, "Reported physical address %x.%x.%x.%x for logical address %d\n",
+			cec_phys_addr_exp(pa), init_laddr);
+		break;
+	}
+
+	case CEC_MSG_USER_CONTROL_PRESSED:
+		if (!(adap->capabilities & CEC_CAP_RC) ||
+		    !(adap->log_addrs.flags & CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU))
+			break;
+
+#if IS_REACHABLE(CONFIG_RC_CORE)
+		switch (msg->msg[2]) {
+		/*
+		 * Play function, this message can have variable length
+		 * depending on the specific play function that is used.
+		 */
+		case 0x60:
+			if (msg->len == 2)
+				rc_keydown(adap->rc, RC_TYPE_CEC,
+					   msg->msg[2], 0);
+			else
+				rc_keydown(adap->rc, RC_TYPE_CEC,
+					   msg->msg[2] << 8 | msg->msg[3], 0);
+			break;
+		/*
+		 * Other function messages that are not handled.
+		 * Currently the RC framework does not allow to supply an
+		 * additional parameter to a keypress. These "keys" contain
+		 * other information such as channel number, an input number
+		 * etc.
+		 * For the time being these messages are not processed by the
+		 * framework and are simply forwarded to the user space.
+		 */
+		case 0x56: case 0x57:
+		case 0x67: case 0x68: case 0x69: case 0x6a:
+			break;
+		default:
+			rc_keydown(adap->rc, RC_TYPE_CEC, msg->msg[2], 0);
+			break;
+		}
+#endif
+		break;
+
+	case CEC_MSG_USER_CONTROL_RELEASED:
+		if (!(adap->capabilities & CEC_CAP_RC) ||
+		    !(adap->log_addrs.flags & CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU))
+			break;
+#if IS_REACHABLE(CONFIG_RC_CORE)
+		rc_keyup(adap->rc);
+#endif
+		break;
+
+	/*
+	 * The remaining messages are only processed if the passthrough mode
+	 * is off.
+	 */
+	case CEC_MSG_GET_CEC_VERSION:
+		cec_msg_cec_version(&tx_cec_msg, adap->log_addrs.cec_version);
+		return cec_transmit_msg(adap, &tx_cec_msg, false);
+
+	case CEC_MSG_GIVE_PHYSICAL_ADDR:
+		/* Do nothing for CEC switches using addr 15 */
+		if (devtype == CEC_OP_PRIM_DEVTYPE_SWITCH && dest_laddr == 15)
+			return 0;
+		cec_msg_report_physical_addr(&tx_cec_msg, adap->phys_addr, devtype);
+		return cec_transmit_msg(adap, &tx_cec_msg, false);
+
+	case CEC_MSG_GIVE_DEVICE_VENDOR_ID:
+		if (adap->log_addrs.vendor_id == CEC_VENDOR_ID_NONE)
+			return cec_feature_abort(adap, msg);
+		cec_msg_device_vendor_id(&tx_cec_msg, adap->log_addrs.vendor_id);
+		return cec_transmit_msg(adap, &tx_cec_msg, false);
+
+	case CEC_MSG_ABORT:
+		/* Do nothing for CEC switches */
+		if (devtype == CEC_OP_PRIM_DEVTYPE_SWITCH)
+			return 0;
+		return cec_feature_refused(adap, msg);
+
+	case CEC_MSG_GIVE_OSD_NAME: {
+		if (adap->log_addrs.osd_name[0] == 0)
+			return cec_feature_abort(adap, msg);
+		cec_msg_set_osd_name(&tx_cec_msg, adap->log_addrs.osd_name);
+		return cec_transmit_msg(adap, &tx_cec_msg, false);
+	}
+
+	case CEC_MSG_GIVE_FEATURES:
+		if (adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0)
+			return cec_report_features(adap, la_idx);
+		return 0;
+
+	default:
+		/*
+		 * Unprocessed messages are aborted if userspace isn't doing
+		 * any processing either.
+		 */
+		if (!is_broadcast && !is_reply && !adap->follower_cnt &&
+		    !adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT)
+			return cec_feature_abort(adap, msg);
+		break;
+	}
+
+skip_processing:
+	/* If this was a reply, then we're done, unless otherwise specified */
+	if (is_reply && !(msg->flags & CEC_MSG_FL_REPLY_TO_FOLLOWERS))
+		return 0;
+
+	/*
+	 * Send to the exclusive follower if there is one, otherwise send
+	 * to all followers.
+	 */
+	if (adap->cec_follower)
+		cec_queue_msg_fh(adap->cec_follower, msg);
+	else
+		cec_queue_msg_followers(adap, msg);
+	return 0;
+}
+
+/*
+ * Helper functions to keep track of the 'monitor all' use count.
+ *
+ * These functions are called with adap->lock held.
+ */
+int cec_monitor_all_cnt_inc(struct cec_adapter *adap)
+{
+	int ret = 0;
+
+	if (adap->monitor_all_cnt == 0)
+		ret = call_op(adap, adap_monitor_all_enable, 1);
+	if (ret == 0)
+		adap->monitor_all_cnt++;
+	return ret;
+}
+
+void cec_monitor_all_cnt_dec(struct cec_adapter *adap)
+{
+	adap->monitor_all_cnt--;
+	if (adap->monitor_all_cnt == 0)
+		WARN_ON(call_op(adap, adap_monitor_all_enable, 0));
+}
+
+#ifdef CONFIG_MEDIA_CEC_DEBUG
+/*
+ * Log the current state of the CEC adapter.
+ * Very useful for debugging.
+ */
+int cec_adap_status(struct seq_file *file, void *priv)
+{
+	struct cec_adapter *adap = dev_get_drvdata(file->private);
+	struct cec_data *data;
+
+	mutex_lock(&adap->lock);
+	seq_printf(file, "configured: %d\n", adap->is_configured);
+	seq_printf(file, "configuring: %d\n", adap->is_configuring);
+	seq_printf(file, "phys_addr: %x.%x.%x.%x\n",
+		   cec_phys_addr_exp(adap->phys_addr));
+	seq_printf(file, "number of LAs: %d\n", adap->log_addrs.num_log_addrs);
+	seq_printf(file, "LA mask: 0x%04x\n", adap->log_addrs.log_addr_mask);
+	if (adap->cec_follower)
+		seq_printf(file, "has CEC follower%s\n",
+			   adap->passthrough ? " (in passthrough mode)" : "");
+	if (adap->cec_initiator)
+		seq_puts(file, "has CEC initiator\n");
+	if (adap->monitor_all_cnt)
+		seq_printf(file, "file handles in Monitor All mode: %u\n",
+			   adap->monitor_all_cnt);
+	data = adap->transmitting;
+	if (data)
+		seq_printf(file, "transmitting message: %*ph (reply: %02x, timeout: %ums)\n",
+			   data->msg.len, data->msg.msg, data->msg.reply,
+			   data->msg.timeout);
+	seq_printf(file, "pending transmits: %u\n", adap->transmit_queue_sz);
+	list_for_each_entry(data, &adap->transmit_queue, list) {
+		seq_printf(file, "queued tx message: %*ph (reply: %02x, timeout: %ums)\n",
+			   data->msg.len, data->msg.msg, data->msg.reply,
+			   data->msg.timeout);
+	}
+	list_for_each_entry(data, &adap->wait_queue, list) {
+		seq_printf(file, "message waiting for reply: %*ph (reply: %02x, timeout: %ums)\n",
+			   data->msg.len, data->msg.msg, data->msg.reply,
+			   data->msg.timeout);
+	}
+
+	call_void_op(adap, adap_status, file);
+	mutex_unlock(&adap->lock);
+	return 0;
+}
+#endif
diff --git a/drivers/media/cec/cec-api.c b/drivers/media/cec/cec-api.c
new file mode 100644
index 000000000000..d4bc4ee2c6e5
--- /dev/null
+++ b/drivers/media/cec/cec-api.c
@@ -0,0 +1,588 @@
+/*
+ * cec-api.c - HDMI Consumer Electronics Control framework - API
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/ktime.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+
+#include "cec-priv.h"
+
+static inline struct cec_devnode *cec_devnode_data(struct file *filp)
+{
+	struct cec_fh *fh = filp->private_data;
+
+	return &fh->adap->devnode;
+}
+
+/* CEC file operations */
+
+static unsigned int cec_poll(struct file *filp,
+			     struct poll_table_struct *poll)
+{
+	struct cec_devnode *devnode = cec_devnode_data(filp);
+	struct cec_fh *fh = filp->private_data;
+	struct cec_adapter *adap = fh->adap;
+	unsigned int res = 0;
+
+	if (!devnode->registered)
+		return POLLERR | POLLHUP;
+	mutex_lock(&adap->lock);
+	if (adap->is_configured &&
+	    adap->transmit_queue_sz < CEC_MAX_MSG_TX_QUEUE_SZ)
+		res |= POLLOUT | POLLWRNORM;
+	if (fh->queued_msgs)
+		res |= POLLIN | POLLRDNORM;
+	if (fh->pending_events)
+		res |= POLLPRI;
+	poll_wait(filp, &fh->wait, poll);
+	mutex_unlock(&adap->lock);
+	return res;
+}
+
+static bool cec_is_busy(const struct cec_adapter *adap,
+			const struct cec_fh *fh)
+{
+	bool valid_initiator = adap->cec_initiator && adap->cec_initiator == fh;
+	bool valid_follower = adap->cec_follower && adap->cec_follower == fh;
+
+	/*
+	 * Exclusive initiators and followers can always access the CEC adapter
+	 */
+	if (valid_initiator || valid_follower)
+		return false;
+	/*
+	 * All others can only access the CEC adapter if there is no
+	 * exclusive initiator and they are in INITIATOR mode.
+	 */
+	return adap->cec_initiator ||
+	       fh->mode_initiator == CEC_MODE_NO_INITIATOR;
+}
+
+static long cec_adap_g_caps(struct cec_adapter *adap,
+			    struct cec_caps __user *parg)
+{
+	struct cec_caps caps = {};
+
+	strlcpy(caps.driver, adap->devnode.parent->driver->name,
+		sizeof(caps.driver));
+	strlcpy(caps.name, adap->name, sizeof(caps.name));
+	caps.available_log_addrs = adap->available_log_addrs;
+	caps.capabilities = adap->capabilities;
+	caps.version = LINUX_VERSION_CODE;
+	if (copy_to_user(parg, &caps, sizeof(caps)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_adap_g_phys_addr(struct cec_adapter *adap,
+				 __u16 __user *parg)
+{
+	u16 phys_addr;
+
+	mutex_lock(&adap->lock);
+	phys_addr = adap->phys_addr;
+	mutex_unlock(&adap->lock);
+	if (copy_to_user(parg, &phys_addr, sizeof(phys_addr)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_adap_s_phys_addr(struct cec_adapter *adap, struct cec_fh *fh,
+				 bool block, __u16 __user *parg)
+{
+	u16 phys_addr;
+	long err;
+
+	if (!(adap->capabilities & CEC_CAP_PHYS_ADDR))
+		return -ENOTTY;
+	if (copy_from_user(&phys_addr, parg, sizeof(phys_addr)))
+		return -EFAULT;
+
+	err = cec_phys_addr_validate(phys_addr, NULL, NULL);
+	if (err)
+		return err;
+	mutex_lock(&adap->lock);
+	if (cec_is_busy(adap, fh))
+		err = -EBUSY;
+	else
+		__cec_s_phys_addr(adap, phys_addr, block);
+	mutex_unlock(&adap->lock);
+	return err;
+}
+
+static long cec_adap_g_log_addrs(struct cec_adapter *adap,
+				 struct cec_log_addrs __user *parg)
+{
+	struct cec_log_addrs log_addrs;
+
+	mutex_lock(&adap->lock);
+	log_addrs = adap->log_addrs;
+	if (!adap->is_configured)
+		memset(log_addrs.log_addr, CEC_LOG_ADDR_INVALID,
+		       sizeof(log_addrs.log_addr));
+	mutex_unlock(&adap->lock);
+
+	if (copy_to_user(parg, &log_addrs, sizeof(log_addrs)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh,
+				 bool block, struct cec_log_addrs __user *parg)
+{
+	struct cec_log_addrs log_addrs;
+	long err = -EBUSY;
+
+	if (!(adap->capabilities & CEC_CAP_LOG_ADDRS))
+		return -ENOTTY;
+	if (copy_from_user(&log_addrs, parg, sizeof(log_addrs)))
+		return -EFAULT;
+	log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK |
+			   CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU |
+			   CEC_LOG_ADDRS_FL_CDC_ONLY;
+	mutex_lock(&adap->lock);
+	if (!adap->is_configuring &&
+	    (!log_addrs.num_log_addrs || !adap->is_configured) &&
+	    !cec_is_busy(adap, fh)) {
+		err = __cec_s_log_addrs(adap, &log_addrs, block);
+		if (!err)
+			log_addrs = adap->log_addrs;
+	}
+	mutex_unlock(&adap->lock);
+	if (err)
+		return err;
+	if (copy_to_user(parg, &log_addrs, sizeof(log_addrs)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_transmit(struct cec_adapter *adap, struct cec_fh *fh,
+			 bool block, struct cec_msg __user *parg)
+{
+	struct cec_msg msg = {};
+	long err = 0;
+
+	if (!(adap->capabilities & CEC_CAP_TRANSMIT))
+		return -ENOTTY;
+	if (copy_from_user(&msg, parg, sizeof(msg)))
+		return -EFAULT;
+
+	/* A CDC-Only device can only send CDC messages */
+	if ((adap->log_addrs.flags & CEC_LOG_ADDRS_FL_CDC_ONLY) &&
+	    (msg.len == 1 || msg.msg[1] != CEC_MSG_CDC_MESSAGE))
+		return -EINVAL;
+
+	msg.flags &= CEC_MSG_FL_REPLY_TO_FOLLOWERS;
+	mutex_lock(&adap->lock);
+	if (!adap->is_configured)
+		err = -ENONET;
+	else if (cec_is_busy(adap, fh))
+		err = -EBUSY;
+	else
+		err = cec_transmit_msg_fh(adap, &msg, fh, block);
+	mutex_unlock(&adap->lock);
+	if (err)
+		return err;
+	if (copy_to_user(parg, &msg, sizeof(msg)))
+		return -EFAULT;
+	return 0;
+}
+
+/* Called by CEC_RECEIVE: wait for a message to arrive */
+static int cec_receive_msg(struct cec_fh *fh, struct cec_msg *msg, bool block)
+{
+	u32 timeout = msg->timeout;
+	int res;
+
+	do {
+		mutex_lock(&fh->lock);
+		/* Are there received messages queued up? */
+		if (fh->queued_msgs) {
+			/* Yes, return the first one */
+			struct cec_msg_entry *entry =
+				list_first_entry(&fh->msgs,
+						 struct cec_msg_entry, list);
+
+			list_del(&entry->list);
+			*msg = entry->msg;
+			kfree(entry);
+			fh->queued_msgs--;
+			mutex_unlock(&fh->lock);
+			/* restore original timeout value */
+			msg->timeout = timeout;
+			return 0;
+		}
+
+		/* No, return EAGAIN in non-blocking mode or wait */
+		mutex_unlock(&fh->lock);
+
+		/* Return when in non-blocking mode */
+		if (!block)
+			return -EAGAIN;
+
+		if (msg->timeout) {
+			/* The user specified a timeout */
+			res = wait_event_interruptible_timeout(fh->wait,
+							       fh->queued_msgs,
+				msecs_to_jiffies(msg->timeout));
+			if (res == 0)
+				res = -ETIMEDOUT;
+			else if (res > 0)
+				res = 0;
+		} else {
+			/* Wait indefinitely */
+			res = wait_event_interruptible(fh->wait,
+						       fh->queued_msgs);
+		}
+		/* Exit on error, otherwise loop to get the new message */
+	} while (!res);
+	return res;
+}
+
+static long cec_receive(struct cec_adapter *adap, struct cec_fh *fh,
+			bool block, struct cec_msg __user *parg)
+{
+	struct cec_msg msg = {};
+	long err = 0;
+
+	if (copy_from_user(&msg, parg, sizeof(msg)))
+		return -EFAULT;
+	mutex_lock(&adap->lock);
+	if (!adap->is_configured && fh->mode_follower < CEC_MODE_MONITOR)
+		err = -ENONET;
+	mutex_unlock(&adap->lock);
+	if (err)
+		return err;
+
+	err = cec_receive_msg(fh, &msg, block);
+	if (err)
+		return err;
+	if (copy_to_user(parg, &msg, sizeof(msg)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_dqevent(struct cec_adapter *adap, struct cec_fh *fh,
+			bool block, struct cec_event __user *parg)
+{
+	struct cec_event *ev = NULL;
+	u64 ts = ~0ULL;
+	unsigned int i;
+	long err = 0;
+
+	mutex_lock(&fh->lock);
+	while (!fh->pending_events && block) {
+		mutex_unlock(&fh->lock);
+		err = wait_event_interruptible(fh->wait, fh->pending_events);
+		if (err)
+			return err;
+		mutex_lock(&fh->lock);
+	}
+
+	/* Find the oldest event */
+	for (i = 0; i < CEC_NUM_EVENTS; i++) {
+		if (fh->pending_events & (1 << (i + 1)) &&
+		    fh->events[i].ts <= ts) {
+			ev = &fh->events[i];
+			ts = ev->ts;
+		}
+	}
+	if (!ev) {
+		err = -EAGAIN;
+		goto unlock;
+	}
+
+	if (copy_to_user(parg, ev, sizeof(*ev))) {
+		err = -EFAULT;
+		goto unlock;
+	}
+
+	fh->pending_events &= ~(1 << ev->event);
+
+unlock:
+	mutex_unlock(&fh->lock);
+	return err;
+}
+
+static long cec_g_mode(struct cec_adapter *adap, struct cec_fh *fh,
+		       u32 __user *parg)
+{
+	u32 mode = fh->mode_initiator | fh->mode_follower;
+
+	if (copy_to_user(parg, &mode, sizeof(mode)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_s_mode(struct cec_adapter *adap, struct cec_fh *fh,
+		       u32 __user *parg)
+{
+	u32 mode;
+	u8 mode_initiator;
+	u8 mode_follower;
+	long err = 0;
+
+	if (copy_from_user(&mode, parg, sizeof(mode)))
+		return -EFAULT;
+	if (mode & ~(CEC_MODE_INITIATOR_MSK | CEC_MODE_FOLLOWER_MSK))
+		return -EINVAL;
+
+	mode_initiator = mode & CEC_MODE_INITIATOR_MSK;
+	mode_follower = mode & CEC_MODE_FOLLOWER_MSK;
+
+	if (mode_initiator > CEC_MODE_EXCL_INITIATOR ||
+	    mode_follower > CEC_MODE_MONITOR_ALL)
+		return -EINVAL;
+
+	if (mode_follower == CEC_MODE_MONITOR_ALL &&
+	    !(adap->capabilities & CEC_CAP_MONITOR_ALL))
+		return -EINVAL;
+
+	/* Follower modes should always be able to send CEC messages */
+	if ((mode_initiator == CEC_MODE_NO_INITIATOR ||
+	     !(adap->capabilities & CEC_CAP_TRANSMIT)) &&
+	    mode_follower >= CEC_MODE_FOLLOWER &&
+	    mode_follower <= CEC_MODE_EXCL_FOLLOWER_PASSTHRU)
+		return -EINVAL;
+
+	/* Monitor modes require CEC_MODE_NO_INITIATOR */
+	if (mode_initiator && mode_follower >= CEC_MODE_MONITOR)
+		return -EINVAL;
+
+	/* Monitor modes require CAP_NET_ADMIN */
+	if (mode_follower >= CEC_MODE_MONITOR && !capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&adap->lock);
+	/*
+	 * You can't become exclusive follower if someone else already
+	 * has that job.
+	 */
+	if ((mode_follower == CEC_MODE_EXCL_FOLLOWER ||
+	     mode_follower == CEC_MODE_EXCL_FOLLOWER_PASSTHRU) &&
+	    adap->cec_follower && adap->cec_follower != fh)
+		err = -EBUSY;
+	/*
+	 * You can't become exclusive initiator if someone else already
+	 * has that job.
+	 */
+	if (mode_initiator == CEC_MODE_EXCL_INITIATOR &&
+	    adap->cec_initiator && adap->cec_initiator != fh)
+		err = -EBUSY;
+
+	if (!err) {
+		bool old_mon_all = fh->mode_follower == CEC_MODE_MONITOR_ALL;
+		bool new_mon_all = mode_follower == CEC_MODE_MONITOR_ALL;
+
+		if (old_mon_all != new_mon_all) {
+			if (new_mon_all)
+				err = cec_monitor_all_cnt_inc(adap);
+			else
+				cec_monitor_all_cnt_dec(adap);
+		}
+	}
+
+	if (err) {
+		mutex_unlock(&adap->lock);
+		return err;
+	}
+
+	if (fh->mode_follower == CEC_MODE_FOLLOWER)
+		adap->follower_cnt--;
+	if (mode_follower == CEC_MODE_FOLLOWER)
+		adap->follower_cnt++;
+	if (mode_follower == CEC_MODE_EXCL_FOLLOWER ||
+	    mode_follower == CEC_MODE_EXCL_FOLLOWER_PASSTHRU) {
+		adap->passthrough =
+			mode_follower == CEC_MODE_EXCL_FOLLOWER_PASSTHRU;
+		adap->cec_follower = fh;
+	} else if (adap->cec_follower == fh) {
+		adap->passthrough = false;
+		adap->cec_follower = NULL;
+	}
+	if (mode_initiator == CEC_MODE_EXCL_INITIATOR)
+		adap->cec_initiator = fh;
+	else if (adap->cec_initiator == fh)
+		adap->cec_initiator = NULL;
+	fh->mode_initiator = mode_initiator;
+	fh->mode_follower = mode_follower;
+	mutex_unlock(&adap->lock);
+	return 0;
+}
+
+static long cec_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct cec_devnode *devnode = cec_devnode_data(filp);
+	struct cec_fh *fh = filp->private_data;
+	struct cec_adapter *adap = fh->adap;
+	bool block = !(filp->f_flags & O_NONBLOCK);
+	void __user *parg = (void __user *)arg;
+
+	if (!devnode->registered)
+		return -ENODEV;
+
+	switch (cmd) {
+	case CEC_ADAP_G_CAPS:
+		return cec_adap_g_caps(adap, parg);
+
+	case CEC_ADAP_G_PHYS_ADDR:
+		return cec_adap_g_phys_addr(adap, parg);
+
+	case CEC_ADAP_S_PHYS_ADDR:
+		return cec_adap_s_phys_addr(adap, fh, block, parg);
+
+	case CEC_ADAP_G_LOG_ADDRS:
+		return cec_adap_g_log_addrs(adap, parg);
+
+	case CEC_ADAP_S_LOG_ADDRS:
+		return cec_adap_s_log_addrs(adap, fh, block, parg);
+
+	case CEC_TRANSMIT:
+		return cec_transmit(adap, fh, block, parg);
+
+	case CEC_RECEIVE:
+		return cec_receive(adap, fh, block, parg);
+
+	case CEC_DQEVENT:
+		return cec_dqevent(adap, fh, block, parg);
+
+	case CEC_G_MODE:
+		return cec_g_mode(adap, fh, parg);
+
+	case CEC_S_MODE:
+		return cec_s_mode(adap, fh, parg);
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+static int cec_open(struct inode *inode, struct file *filp)
+{
+	struct cec_devnode *devnode =
+		container_of(inode->i_cdev, struct cec_devnode, cdev);
+	struct cec_adapter *adap = to_cec_adapter(devnode);
+	struct cec_fh *fh = kzalloc(sizeof(*fh), GFP_KERNEL);
+	/*
+	 * Initial events that are automatically sent when the cec device is
+	 * opened.
+	 */
+	struct cec_event ev_state = {
+		.event = CEC_EVENT_STATE_CHANGE,
+		.flags = CEC_EVENT_FL_INITIAL_STATE,
+	};
+	int err;
+
+	if (!fh)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&fh->msgs);
+	INIT_LIST_HEAD(&fh->xfer_list);
+	mutex_init(&fh->lock);
+	init_waitqueue_head(&fh->wait);
+
+	fh->mode_initiator = CEC_MODE_INITIATOR;
+	fh->adap = adap;
+
+	err = cec_get_device(devnode);
+	if (err) {
+		kfree(fh);
+		return err;
+	}
+
+	filp->private_data = fh;
+
+	mutex_lock(&devnode->lock);
+	/* Queue up initial state events */
+	ev_state.state_change.phys_addr = adap->phys_addr;
+	ev_state.state_change.log_addr_mask = adap->log_addrs.log_addr_mask;
+	cec_queue_event_fh(fh, &ev_state, 0);
+
+	list_add(&fh->list, &devnode->fhs);
+	mutex_unlock(&devnode->lock);
+
+	return 0;
+}
+
+/* Override for the release function */
+static int cec_release(struct inode *inode, struct file *filp)
+{
+	struct cec_devnode *devnode = cec_devnode_data(filp);
+	struct cec_adapter *adap = to_cec_adapter(devnode);
+	struct cec_fh *fh = filp->private_data;
+
+	mutex_lock(&adap->lock);
+	if (adap->cec_initiator == fh)
+		adap->cec_initiator = NULL;
+	if (adap->cec_follower == fh) {
+		adap->cec_follower = NULL;
+		adap->passthrough = false;
+	}
+	if (fh->mode_follower == CEC_MODE_FOLLOWER)
+		adap->follower_cnt--;
+	if (fh->mode_follower == CEC_MODE_MONITOR_ALL)
+		cec_monitor_all_cnt_dec(adap);
+	mutex_unlock(&adap->lock);
+
+	mutex_lock(&devnode->lock);
+	list_del(&fh->list);
+	mutex_unlock(&devnode->lock);
+
+	/* Unhook pending transmits from this filehandle. */
+	mutex_lock(&adap->lock);
+	while (!list_empty(&fh->xfer_list)) {
+		struct cec_data *data =
+			list_first_entry(&fh->xfer_list, struct cec_data, xfer_list);
+
+		data->blocking = false;
+		data->fh = NULL;
+		list_del(&data->xfer_list);
+	}
+	mutex_unlock(&adap->lock);
+	while (!list_empty(&fh->msgs)) {
+		struct cec_msg_entry *entry =
+			list_first_entry(&fh->msgs, struct cec_msg_entry, list);
+
+		list_del(&entry->list);
+		kfree(entry);
+	}
+	kfree(fh);
+
+	cec_put_device(devnode);
+	filp->private_data = NULL;
+	return 0;
+}
+
+const struct file_operations cec_devnode_fops = {
+	.owner = THIS_MODULE,
+	.open = cec_open,
+	.unlocked_ioctl = cec_ioctl,
+	.release = cec_release,
+	.poll = cec_poll,
+	.llseek = no_llseek,
+};
diff --git a/drivers/media/cec/cec-core.c b/drivers/media/cec/cec-core.c
new file mode 100644
index 000000000000..b0137e247dc9
--- /dev/null
+++ b/drivers/media/cec/cec-core.c
@@ -0,0 +1,411 @@
+/*
+ * cec-core.c - HDMI Consumer Electronics Control framework - Core
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "cec-priv.h"
+
+#define CEC_NUM_DEVICES	256
+#define CEC_NAME	"cec"
+
+int cec_debug;
+module_param_named(debug, cec_debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug level (0-2)");
+
+static dev_t cec_dev_t;
+
+/* Active devices */
+static DEFINE_MUTEX(cec_devnode_lock);
+static DECLARE_BITMAP(cec_devnode_nums, CEC_NUM_DEVICES);
+
+static struct dentry *top_cec_dir;
+
+/* dev to cec_devnode */
+#define to_cec_devnode(cd) container_of(cd, struct cec_devnode, dev)
+
+int cec_get_device(struct cec_devnode *devnode)
+{
+	/*
+	 * Check if the cec device is available. This needs to be done with
+	 * the devnode->lock held to prevent an open/unregister race:
+	 * without the lock, the device could be unregistered and freed between
+	 * the devnode->registered check and get_device() calls, leading to
+	 * a crash.
+	 */
+	mutex_lock(&devnode->lock);
+	/*
+	 * return ENXIO if the cec device has been removed
+	 * already or if it is not registered anymore.
+	 */
+	if (!devnode->registered) {
+		mutex_unlock(&devnode->lock);
+		return -ENXIO;
+	}
+	/* and increase the device refcount */
+	get_device(&devnode->dev);
+	mutex_unlock(&devnode->lock);
+	return 0;
+}
+
+void cec_put_device(struct cec_devnode *devnode)
+{
+	put_device(&devnode->dev);
+}
+
+/* Called when the last user of the cec device exits. */
+static void cec_devnode_release(struct device *cd)
+{
+	struct cec_devnode *devnode = to_cec_devnode(cd);
+
+	mutex_lock(&cec_devnode_lock);
+	/* Mark device node number as free */
+	clear_bit(devnode->minor, cec_devnode_nums);
+	mutex_unlock(&cec_devnode_lock);
+
+	cec_delete_adapter(to_cec_adapter(devnode));
+}
+
+static struct bus_type cec_bus_type = {
+	.name = CEC_NAME,
+};
+
+/*
+ * Register a cec device node
+ *
+ * The registration code assigns minor numbers and registers the new device node
+ * with the kernel. An error is returned if no free minor number can be found,
+ * or if the registration of the device node fails.
+ *
+ * Zero is returned on success.
+ *
+ * Note that if the cec_devnode_register call fails, the release() callback of
+ * the cec_devnode structure is *not* called, so the caller is responsible for
+ * freeing any data.
+ */
+static int __must_check cec_devnode_register(struct cec_devnode *devnode,
+					     struct module *owner)
+{
+	int minor;
+	int ret;
+
+	/* Initialization */
+	INIT_LIST_HEAD(&devnode->fhs);
+	mutex_init(&devnode->lock);
+
+	/* Part 1: Find a free minor number */
+	mutex_lock(&cec_devnode_lock);
+	minor = find_next_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES, 0);
+	if (minor == CEC_NUM_DEVICES) {
+		mutex_unlock(&cec_devnode_lock);
+		pr_err("could not get a free minor\n");
+		return -ENFILE;
+	}
+
+	set_bit(minor, cec_devnode_nums);
+	mutex_unlock(&cec_devnode_lock);
+
+	devnode->minor = minor;
+	devnode->dev.bus = &cec_bus_type;
+	devnode->dev.devt = MKDEV(MAJOR(cec_dev_t), minor);
+	devnode->dev.release = cec_devnode_release;
+	devnode->dev.parent = devnode->parent;
+	dev_set_name(&devnode->dev, "cec%d", devnode->minor);
+	device_initialize(&devnode->dev);
+
+	/* Part 2: Initialize and register the character device */
+	cdev_init(&devnode->cdev, &cec_devnode_fops);
+	devnode->cdev.kobj.parent = &devnode->dev.kobj;
+	devnode->cdev.owner = owner;
+
+	ret = cdev_add(&devnode->cdev, devnode->dev.devt, 1);
+	if (ret < 0) {
+		pr_err("%s: cdev_add failed\n", __func__);
+		goto clr_bit;
+	}
+
+	ret = device_add(&devnode->dev);
+	if (ret)
+		goto cdev_del;
+
+	devnode->registered = true;
+	return 0;
+
+cdev_del:
+	cdev_del(&devnode->cdev);
+clr_bit:
+	mutex_lock(&cec_devnode_lock);
+	clear_bit(devnode->minor, cec_devnode_nums);
+	mutex_unlock(&cec_devnode_lock);
+	return ret;
+}
+
+/*
+ * Unregister a cec device node
+ *
+ * This unregisters the passed device. Future open calls will be met with
+ * errors.
+ *
+ * This function can safely be called if the device node has never been
+ * registered or has already been unregistered.
+ */
+static void cec_devnode_unregister(struct cec_devnode *devnode)
+{
+	struct cec_fh *fh;
+
+	mutex_lock(&devnode->lock);
+
+	/* Check if devnode was never registered or already unregistered */
+	if (!devnode->registered || devnode->unregistered) {
+		mutex_unlock(&devnode->lock);
+		return;
+	}
+
+	list_for_each_entry(fh, &devnode->fhs, list)
+		wake_up_interruptible(&fh->wait);
+
+	devnode->registered = false;
+	devnode->unregistered = true;
+	mutex_unlock(&devnode->lock);
+
+	device_del(&devnode->dev);
+	cdev_del(&devnode->cdev);
+	put_device(&devnode->dev);
+}
+
+struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
+					 void *priv, const char *name, u32 caps,
+					 u8 available_las, struct device *parent)
+{
+	struct cec_adapter *adap;
+	int res;
+
+	if (WARN_ON(!parent))
+		return ERR_PTR(-EINVAL);
+	if (WARN_ON(!caps))
+		return ERR_PTR(-EINVAL);
+	if (WARN_ON(!ops))
+		return ERR_PTR(-EINVAL);
+	if (WARN_ON(!available_las || available_las > CEC_MAX_LOG_ADDRS))
+		return ERR_PTR(-EINVAL);
+	adap = kzalloc(sizeof(*adap), GFP_KERNEL);
+	if (!adap)
+		return ERR_PTR(-ENOMEM);
+	adap->owner = parent->driver->owner;
+	adap->devnode.parent = parent;
+	strlcpy(adap->name, name, sizeof(adap->name));
+	adap->phys_addr = CEC_PHYS_ADDR_INVALID;
+	adap->log_addrs.cec_version = CEC_OP_CEC_VERSION_2_0;
+	adap->log_addrs.vendor_id = CEC_VENDOR_ID_NONE;
+	adap->capabilities = caps;
+	adap->available_log_addrs = available_las;
+	adap->sequence = 0;
+	adap->ops = ops;
+	adap->priv = priv;
+	memset(adap->phys_addrs, 0xff, sizeof(adap->phys_addrs));
+	mutex_init(&adap->lock);
+	INIT_LIST_HEAD(&adap->transmit_queue);
+	INIT_LIST_HEAD(&adap->wait_queue);
+	init_waitqueue_head(&adap->kthread_waitq);
+
+	adap->kthread = kthread_run(cec_thread_func, adap, "cec-%s", name);
+	if (IS_ERR(adap->kthread)) {
+		pr_err("cec-%s: kernel_thread() failed\n", name);
+		res = PTR_ERR(adap->kthread);
+		kfree(adap);
+		return ERR_PTR(res);
+	}
+
+	if (!(caps & CEC_CAP_RC))
+		return adap;
+
+#if IS_REACHABLE(CONFIG_RC_CORE)
+	/* Prepare the RC input device */
+	adap->rc = rc_allocate_device();
+	if (!adap->rc) {
+		pr_err("cec-%s: failed to allocate memory for rc_dev\n",
+		       name);
+		kthread_stop(adap->kthread);
+		kfree(adap);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	snprintf(adap->input_name, sizeof(adap->input_name),
+		 "RC for %s", name);
+	snprintf(adap->input_phys, sizeof(adap->input_phys),
+		 "%s/input0", name);
+
+	adap->rc->input_name = adap->input_name;
+	adap->rc->input_phys = adap->input_phys;
+	adap->rc->input_id.bustype = BUS_CEC;
+	adap->rc->input_id.vendor = 0;
+	adap->rc->input_id.product = 0;
+	adap->rc->input_id.version = 1;
+	adap->rc->dev.parent = parent;
+	adap->rc->driver_type = RC_DRIVER_SCANCODE;
+	adap->rc->driver_name = CEC_NAME;
+	adap->rc->allowed_protocols = RC_BIT_CEC;
+	adap->rc->priv = adap;
+	adap->rc->map_name = RC_MAP_CEC;
+	adap->rc->timeout = MS_TO_NS(100);
+#else
+	adap->capabilities &= ~CEC_CAP_RC;
+#endif
+	return adap;
+}
+EXPORT_SYMBOL_GPL(cec_allocate_adapter);
+
+int cec_register_adapter(struct cec_adapter *adap)
+{
+	int res;
+
+	if (IS_ERR_OR_NULL(adap))
+		return 0;
+
+#if IS_REACHABLE(CONFIG_RC_CORE)
+	if (adap->capabilities & CEC_CAP_RC) {
+		res = rc_register_device(adap->rc);
+
+		if (res) {
+			pr_err("cec-%s: failed to prepare input device\n",
+			       adap->name);
+			rc_free_device(adap->rc);
+			adap->rc = NULL;
+			return res;
+		}
+	}
+#endif
+
+	res = cec_devnode_register(&adap->devnode, adap->owner);
+	if (res) {
+#if IS_REACHABLE(CONFIG_RC_CORE)
+		/* Note: rc_unregister also calls rc_free */
+		rc_unregister_device(adap->rc);
+		adap->rc = NULL;
+#endif
+		return res;
+	}
+
+	dev_set_drvdata(&adap->devnode.dev, adap);
+#ifdef CONFIG_MEDIA_CEC_DEBUG
+	if (!top_cec_dir)
+		return 0;
+
+	adap->cec_dir = debugfs_create_dir(dev_name(&adap->devnode.dev), top_cec_dir);
+	if (IS_ERR_OR_NULL(adap->cec_dir)) {
+		pr_warn("cec-%s: Failed to create debugfs dir\n", adap->name);
+		return 0;
+	}
+	adap->status_file = debugfs_create_devm_seqfile(&adap->devnode.dev,
+		"status", adap->cec_dir, cec_adap_status);
+	if (IS_ERR_OR_NULL(adap->status_file)) {
+		pr_warn("cec-%s: Failed to create status file\n", adap->name);
+		debugfs_remove_recursive(adap->cec_dir);
+		adap->cec_dir = NULL;
+	}
+#endif
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cec_register_adapter);
+
+void cec_unregister_adapter(struct cec_adapter *adap)
+{
+	if (IS_ERR_OR_NULL(adap))
+		return;
+
+#if IS_REACHABLE(CONFIG_RC_CORE)
+	/* Note: rc_unregister also calls rc_free */
+	rc_unregister_device(adap->rc);
+	adap->rc = NULL;
+#endif
+	debugfs_remove_recursive(adap->cec_dir);
+	cec_devnode_unregister(&adap->devnode);
+}
+EXPORT_SYMBOL_GPL(cec_unregister_adapter);
+
+void cec_delete_adapter(struct cec_adapter *adap)
+{
+	if (IS_ERR_OR_NULL(adap))
+		return;
+	mutex_lock(&adap->lock);
+	__cec_s_phys_addr(adap, CEC_PHYS_ADDR_INVALID, false);
+	mutex_unlock(&adap->lock);
+	kthread_stop(adap->kthread);
+	if (adap->kthread_config)
+		kthread_stop(adap->kthread_config);
+#if IS_REACHABLE(CONFIG_RC_CORE)
+	rc_free_device(adap->rc);
+#endif
+	kfree(adap);
+}
+EXPORT_SYMBOL_GPL(cec_delete_adapter);
+
+/*
+ *	Initialise cec for linux
+ */
+static int __init cec_devnode_init(void)
+{
+	int ret;
+
+	pr_info("Linux cec interface: v0.10\n");
+	ret = alloc_chrdev_region(&cec_dev_t, 0, CEC_NUM_DEVICES,
+				  CEC_NAME);
+	if (ret < 0) {
+		pr_warn("cec: unable to allocate major\n");
+		return ret;
+	}
+
+#ifdef CONFIG_MEDIA_CEC_DEBUG
+	top_cec_dir = debugfs_create_dir("cec", NULL);
+	if (IS_ERR_OR_NULL(top_cec_dir)) {
+		pr_warn("cec: Failed to create debugfs cec dir\n");
+		top_cec_dir = NULL;
+	}
+#endif
+
+	ret = bus_register(&cec_bus_type);
+	if (ret < 0) {
+		unregister_chrdev_region(cec_dev_t, CEC_NUM_DEVICES);
+		pr_warn("cec: bus_register failed\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void __exit cec_devnode_exit(void)
+{
+	debugfs_remove_recursive(top_cec_dir);
+	bus_unregister(&cec_bus_type);
+	unregister_chrdev_region(cec_dev_t, CEC_NUM_DEVICES);
+}
+
+subsys_initcall(cec_devnode_init);
+module_exit(cec_devnode_exit)
+
+MODULE_AUTHOR("Hans Verkuil <hans.verkuil@cisco.com>");
+MODULE_DESCRIPTION("Device node registration for cec drivers");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/cec/cec-priv.h b/drivers/media/cec/cec-priv.h
new file mode 100644
index 000000000000..70767a7900f2
--- /dev/null
+++ b/drivers/media/cec/cec-priv.h
@@ -0,0 +1,56 @@
+/*
+ * cec-priv.h - HDMI Consumer Electronics Control internal header
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CEC_PRIV_H
+#define _CEC_PRIV_H
+
+#include <linux/cec-funcs.h>
+#include <media/cec.h>
+
+#define dprintk(lvl, fmt, arg...)					\
+	do {								\
+		if (lvl <= cec_debug)					\
+			pr_info("cec-%s: " fmt, adap->name, ## arg);	\
+	} while (0)
+
+/* devnode to cec_adapter */
+#define to_cec_adapter(node) container_of(node, struct cec_adapter, devnode)
+
+/* cec-core.c */
+extern int cec_debug;
+int cec_get_device(struct cec_devnode *devnode);
+void cec_put_device(struct cec_devnode *devnode);
+
+/* cec-adap.c */
+int cec_monitor_all_cnt_inc(struct cec_adapter *adap);
+void cec_monitor_all_cnt_dec(struct cec_adapter *adap);
+int cec_adap_status(struct seq_file *file, void *priv);
+int cec_thread_func(void *_adap);
+void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block);
+int __cec_s_log_addrs(struct cec_adapter *adap,
+		      struct cec_log_addrs *log_addrs, bool block);
+int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg,
+			struct cec_fh *fh, bool block);
+void cec_queue_event_fh(struct cec_fh *fh,
+			const struct cec_event *new_ev, u64 ts);
+
+/* cec-api.c */
+extern const struct file_operations cec_devnode_fops;
+
+#endif
diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index 2669b4bad910..b31fa6fae009 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig
@@ -221,7 +221,7 @@ config VIDEO_ADV7604
 
 config VIDEO_ADV7604_CEC
 	bool "Enable Analog Devices ADV7604 CEC support"
-	depends on VIDEO_ADV7604 && MEDIA_CEC
+	depends on VIDEO_ADV7604 && MEDIA_CEC_SUPPORT
 	---help---
 	  When selected the adv7604 will support the optional
 	  HDMI CEC feature.
@@ -242,7 +242,7 @@ config VIDEO_ADV7842
 
 config VIDEO_ADV7842_CEC
 	bool "Enable Analog Devices ADV7842 CEC support"
-	depends on VIDEO_ADV7842 && MEDIA_CEC
+	depends on VIDEO_ADV7842 && MEDIA_CEC_SUPPORT
 	---help---
 	  When selected the adv7842 will support the optional
 	  HDMI CEC feature.
@@ -481,7 +481,7 @@ config VIDEO_ADV7511
 
 config VIDEO_ADV7511_CEC
 	bool "Enable Analog Devices ADV7511 CEC support"
-	depends on VIDEO_ADV7511 && MEDIA_CEC
+	depends on VIDEO_ADV7511 && MEDIA_CEC_SUPPORT
 	---help---
 	  When selected the adv7511 will support the optional
 	  HDMI CEC feature.
diff --git a/drivers/media/platform/vivid/Kconfig b/drivers/media/platform/vivid/Kconfig
index 8e6918c5c87c..db0dd19d227a 100644
--- a/drivers/media/platform/vivid/Kconfig
+++ b/drivers/media/platform/vivid/Kconfig
@@ -25,7 +25,7 @@ config VIDEO_VIVID
 
 config VIDEO_VIVID_CEC
 	bool "Enable CEC emulation support"
-	depends on VIDEO_VIVID && MEDIA_CEC
+	depends on VIDEO_VIVID && MEDIA_CEC_SUPPORT
 	---help---
 	  When selected the vivid module will emulate the optional
 	  HDMI CEC feature.
diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig
index 6620d96ee44d..0abe5ffb4934 100644
--- a/drivers/staging/media/Kconfig
+++ b/drivers/staging/media/Kconfig
@@ -21,8 +21,6 @@ if STAGING_MEDIA && MEDIA_SUPPORT
 # Please keep them in alphabetic order
 source "drivers/staging/media/bcm2048/Kconfig"
 
-source "drivers/staging/media/cec/Kconfig"
-
 source "drivers/staging/media/cxd2099/Kconfig"
 
 source "drivers/staging/media/davinci_vpfe/Kconfig"
diff --git a/drivers/staging/media/Makefile b/drivers/staging/media/Makefile
index 906257e94dda..246299eff80d 100644
--- a/drivers/staging/media/Makefile
+++ b/drivers/staging/media/Makefile
@@ -1,5 +1,4 @@
 obj-$(CONFIG_I2C_BCM2048)	+= bcm2048/
-obj-$(CONFIG_MEDIA_CEC)		+= cec/
 obj-$(CONFIG_VIDEO_SAMSUNG_S5P_CEC) += s5p-cec/
 obj-$(CONFIG_DVB_CXD2099)	+= cxd2099/
 obj-$(CONFIG_LIRC_STAGING)	+= lirc/
diff --git a/drivers/staging/media/cec/Kconfig b/drivers/staging/media/cec/Kconfig
deleted file mode 100644
index 6e12d41b1f86..000000000000
--- a/drivers/staging/media/cec/Kconfig
+++ /dev/null
@@ -1,12 +0,0 @@
-config MEDIA_CEC
-	bool "CEC API (EXPERIMENTAL)"
-	depends on MEDIA_SUPPORT
-	select MEDIA_CEC_EDID
-	---help---
-	  Enable the CEC API.
-
-config MEDIA_CEC_DEBUG
-	bool "CEC debugfs interface (EXPERIMENTAL)"
-	depends on MEDIA_CEC && DEBUG_FS
-	---help---
-	  Turns on the DebugFS interface for CEC devices.
diff --git a/drivers/staging/media/cec/Makefile b/drivers/staging/media/cec/Makefile
deleted file mode 100644
index bd7f3c593468..000000000000
--- a/drivers/staging/media/cec/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-cec-objs := cec-core.o cec-adap.o cec-api.o
-
-ifeq ($(CONFIG_MEDIA_CEC),y)
-  obj-$(CONFIG_MEDIA_SUPPORT) += cec.o
-endif
diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO
deleted file mode 100644
index 504d35c7ae95..000000000000
--- a/drivers/staging/media/cec/TODO
+++ /dev/null
@@ -1,9 +0,0 @@
-TODOs:
-
-- Once this is out of staging this should no longer be a separate
-  config option, instead it should be selected by drivers that want it.
-- Revisit the IS_REACHABLE(RC_CORE): perhaps the RC_CORE support should
-  be enabled through a separate config option in drivers/media/Kconfig
-  or rc/Kconfig?
-
-Hans Verkuil <hans.verkuil@cisco.com>
diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c
deleted file mode 100644
index 054cd06e2247..000000000000
--- a/drivers/staging/media/cec/cec-adap.c
+++ /dev/null
@@ -1,1856 +0,0 @@
-/*
- * cec-adap.c - HDMI Consumer Electronics Control framework - CEC adapter
- *
- * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- *
- * This program is free software; you may redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <linux/ktime.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include "cec-priv.h"
-
-static int cec_report_features(struct cec_adapter *adap, unsigned int la_idx);
-static int cec_report_phys_addr(struct cec_adapter *adap, unsigned int la_idx);
-
-/*
- * 400 ms is the time it takes for one 16 byte message to be
- * transferred and 5 is the maximum number of retries. Add
- * another 100 ms as a margin. So if the transmit doesn't
- * finish before that time something is really wrong and we
- * have to time out.
- *
- * This is a sign that something it really wrong and a warning
- * will be issued.
- */
-#define CEC_XFER_TIMEOUT_MS (5 * 400 + 100)
-
-#define call_op(adap, op, arg...) \
-	(adap->ops->op ? adap->ops->op(adap, ## arg) : 0)
-
-#define call_void_op(adap, op, arg...)			\
-	do {						\
-		if (adap->ops->op)			\
-			adap->ops->op(adap, ## arg);	\
-	} while (0)
-
-static int cec_log_addr2idx(const struct cec_adapter *adap, u8 log_addr)
-{
-	int i;
-
-	for (i = 0; i < adap->log_addrs.num_log_addrs; i++)
-		if (adap->log_addrs.log_addr[i] == log_addr)
-			return i;
-	return -1;
-}
-
-static unsigned int cec_log_addr2dev(const struct cec_adapter *adap, u8 log_addr)
-{
-	int i = cec_log_addr2idx(adap, log_addr);
-
-	return adap->log_addrs.primary_device_type[i < 0 ? 0 : i];
-}
-
-/*
- * Queue a new event for this filehandle. If ts == 0, then set it
- * to the current time.
- *
- * The two events that are currently defined do not need to keep track
- * of intermediate events, so no actual queue of events is needed,
- * instead just store the latest state and the total number of lost
- * messages.
- *
- * Should new events be added in the future that require intermediate
- * results to be queued as well, then a proper queue data structure is
- * required. But until then, just keep it simple.
- */
-void cec_queue_event_fh(struct cec_fh *fh,
-			const struct cec_event *new_ev, u64 ts)
-{
-	struct cec_event *ev = &fh->events[new_ev->event - 1];
-
-	if (ts == 0)
-		ts = ktime_get_ns();
-
-	mutex_lock(&fh->lock);
-	if (new_ev->event == CEC_EVENT_LOST_MSGS &&
-	    fh->pending_events & (1 << new_ev->event)) {
-		/*
-		 * If there is already a lost_msgs event, then just
-		 * update the lost_msgs count. This effectively
-		 * merges the old and new events into one.
-		 */
-		ev->lost_msgs.lost_msgs += new_ev->lost_msgs.lost_msgs;
-		goto unlock;
-	}
-
-	/*
-	 * Intermediate states are not interesting, so just
-	 * overwrite any older event.
-	 */
-	*ev = *new_ev;
-	ev->ts = ts;
-	fh->pending_events |= 1 << new_ev->event;
-
-unlock:
-	mutex_unlock(&fh->lock);
-	wake_up_interruptible(&fh->wait);
-}
-
-/* Queue a new event for all open filehandles. */
-static void cec_queue_event(struct cec_adapter *adap,
-			    const struct cec_event *ev)
-{
-	u64 ts = ktime_get_ns();
-	struct cec_fh *fh;
-
-	mutex_lock(&adap->devnode.lock);
-	list_for_each_entry(fh, &adap->devnode.fhs, list)
-		cec_queue_event_fh(fh, ev, ts);
-	mutex_unlock(&adap->devnode.lock);
-}
-
-/*
- * Queue a new message for this filehandle. If there is no more room
- * in the queue, then send the LOST_MSGS event instead.
- */
-static void cec_queue_msg_fh(struct cec_fh *fh, const struct cec_msg *msg)
-{
-	static const struct cec_event ev_lost_msg = {
-		.ts = 0,
-		.event = CEC_EVENT_LOST_MSGS,
-		.flags = 0,
-		{
-			.lost_msgs.lost_msgs = 1,
-		},
-	};
-	struct cec_msg_entry *entry;
-
-	mutex_lock(&fh->lock);
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		goto lost_msgs;
-
-	entry->msg = *msg;
-	/* Add new msg at the end of the queue */
-	list_add_tail(&entry->list, &fh->msgs);
-
-	/*
-	 * if the queue now has more than CEC_MAX_MSG_RX_QUEUE_SZ
-	 * messages, drop the oldest one and send a lost message event.
-	 */
-	if (fh->queued_msgs == CEC_MAX_MSG_RX_QUEUE_SZ) {
-		list_del(&entry->list);
-		goto lost_msgs;
-	}
-	fh->queued_msgs++;
-	mutex_unlock(&fh->lock);
-	wake_up_interruptible(&fh->wait);
-	return;
-
-lost_msgs:
-	mutex_unlock(&fh->lock);
-	cec_queue_event_fh(fh, &ev_lost_msg, 0);
-}
-
-/*
- * Queue the message for those filehandles that are in monitor mode.
- * If valid_la is true (this message is for us or was sent by us),
- * then pass it on to any monitoring filehandle. If this message
- * isn't for us or from us, then only give it to filehandles that
- * are in MONITOR_ALL mode.
- *
- * This can only happen if the CEC_CAP_MONITOR_ALL capability is
- * set and the CEC adapter was placed in 'monitor all' mode.
- */
-static void cec_queue_msg_monitor(struct cec_adapter *adap,
-				  const struct cec_msg *msg,
-				  bool valid_la)
-{
-	struct cec_fh *fh;
-	u32 monitor_mode = valid_la ? CEC_MODE_MONITOR :
-				      CEC_MODE_MONITOR_ALL;
-
-	mutex_lock(&adap->devnode.lock);
-	list_for_each_entry(fh, &adap->devnode.fhs, list) {
-		if (fh->mode_follower >= monitor_mode)
-			cec_queue_msg_fh(fh, msg);
-	}
-	mutex_unlock(&adap->devnode.lock);
-}
-
-/*
- * Queue the message for follower filehandles.
- */
-static void cec_queue_msg_followers(struct cec_adapter *adap,
-				    const struct cec_msg *msg)
-{
-	struct cec_fh *fh;
-
-	mutex_lock(&adap->devnode.lock);
-	list_for_each_entry(fh, &adap->devnode.fhs, list) {
-		if (fh->mode_follower == CEC_MODE_FOLLOWER)
-			cec_queue_msg_fh(fh, msg);
-	}
-	mutex_unlock(&adap->devnode.lock);
-}
-
-/* Notify userspace of an adapter state change. */
-static void cec_post_state_event(struct cec_adapter *adap)
-{
-	struct cec_event ev = {
-		.event = CEC_EVENT_STATE_CHANGE,
-	};
-
-	ev.state_change.phys_addr = adap->phys_addr;
-	ev.state_change.log_addr_mask = adap->log_addrs.log_addr_mask;
-	cec_queue_event(adap, &ev);
-}
-
-/*
- * A CEC transmit (and a possible wait for reply) completed.
- * If this was in blocking mode, then complete it, otherwise
- * queue the message for userspace to dequeue later.
- *
- * This function is called with adap->lock held.
- */
-static void cec_data_completed(struct cec_data *data)
-{
-	/*
-	 * Delete this transmit from the filehandle's xfer_list since
-	 * we're done with it.
-	 *
-	 * Note that if the filehandle is closed before this transmit
-	 * finished, then the release() function will set data->fh to NULL.
-	 * Without that we would be referring to a closed filehandle.
-	 */
-	if (data->fh)
-		list_del(&data->xfer_list);
-
-	if (data->blocking) {
-		/*
-		 * Someone is blocking so mark the message as completed
-		 * and call complete.
-		 */
-		data->completed = true;
-		complete(&data->c);
-	} else {
-		/*
-		 * No blocking, so just queue the message if needed and
-		 * free the memory.
-		 */
-		if (data->fh)
-			cec_queue_msg_fh(data->fh, &data->msg);
-		kfree(data);
-	}
-}
-
-/*
- * A pending CEC transmit needs to be cancelled, either because the CEC
- * adapter is disabled or the transmit takes an impossibly long time to
- * finish.
- *
- * This function is called with adap->lock held.
- */
-static void cec_data_cancel(struct cec_data *data)
-{
-	/*
-	 * It's either the current transmit, or it is a pending
-	 * transmit. Take the appropriate action to clear it.
-	 */
-	if (data->adap->transmitting == data) {
-		data->adap->transmitting = NULL;
-	} else {
-		list_del_init(&data->list);
-		if (!(data->msg.tx_status & CEC_TX_STATUS_OK))
-			data->adap->transmit_queue_sz--;
-	}
-
-	/* Mark it as an error */
-	data->msg.tx_ts = ktime_get_ns();
-	data->msg.tx_status = CEC_TX_STATUS_ERROR |
-			      CEC_TX_STATUS_MAX_RETRIES;
-	data->attempts = 0;
-	data->msg.tx_error_cnt = 1;
-	/* Queue transmitted message for monitoring purposes */
-	cec_queue_msg_monitor(data->adap, &data->msg, 1);
-
-	cec_data_completed(data);
-}
-
-/*
- * Main CEC state machine
- *
- * Wait until the thread should be stopped, or we are not transmitting and
- * a new transmit message is queued up, in which case we start transmitting
- * that message. When the adapter finished transmitting the message it will
- * call cec_transmit_done().
- *
- * If the adapter is disabled, then remove all queued messages instead.
- *
- * If the current transmit times out, then cancel that transmit.
- */
-int cec_thread_func(void *_adap)
-{
-	struct cec_adapter *adap = _adap;
-
-	for (;;) {
-		unsigned int signal_free_time;
-		struct cec_data *data;
-		bool timeout = false;
-		u8 attempts;
-
-		if (adap->transmitting) {
-			int err;
-
-			/*
-			 * We are transmitting a message, so add a timeout
-			 * to prevent the state machine to get stuck waiting
-			 * for this message to finalize and add a check to
-			 * see if the adapter is disabled in which case the
-			 * transmit should be canceled.
-			 */
-			err = wait_event_interruptible_timeout(adap->kthread_waitq,
-				kthread_should_stop() ||
-				(!adap->is_configured && !adap->is_configuring) ||
-				(!adap->transmitting &&
-				 !list_empty(&adap->transmit_queue)),
-				msecs_to_jiffies(CEC_XFER_TIMEOUT_MS));
-			timeout = err == 0;
-		} else {
-			/* Otherwise we just wait for something to happen. */
-			wait_event_interruptible(adap->kthread_waitq,
-				kthread_should_stop() ||
-				(!adap->transmitting &&
-				 !list_empty(&adap->transmit_queue)));
-		}
-
-		mutex_lock(&adap->lock);
-
-		if ((!adap->is_configured && !adap->is_configuring) ||
-		    kthread_should_stop()) {
-			/*
-			 * If the adapter is disabled, or we're asked to stop,
-			 * then cancel any pending transmits.
-			 */
-			while (!list_empty(&adap->transmit_queue)) {
-				data = list_first_entry(&adap->transmit_queue,
-							struct cec_data, list);
-				cec_data_cancel(data);
-			}
-			if (adap->transmitting)
-				cec_data_cancel(adap->transmitting);
-
-			/*
-			 * Cancel the pending timeout work. We have to unlock
-			 * the mutex when flushing the work since
-			 * cec_wait_timeout() will take it. This is OK since
-			 * no new entries can be added to wait_queue as long
-			 * as adap->transmitting is NULL, which it is due to
-			 * the cec_data_cancel() above.
-			 */
-			while (!list_empty(&adap->wait_queue)) {
-				data = list_first_entry(&adap->wait_queue,
-							struct cec_data, list);
-
-				if (!cancel_delayed_work(&data->work)) {
-					mutex_unlock(&adap->lock);
-					flush_scheduled_work();
-					mutex_lock(&adap->lock);
-				}
-				cec_data_cancel(data);
-			}
-			goto unlock;
-		}
-
-		if (adap->transmitting && timeout) {
-			/*
-			 * If we timeout, then log that. This really shouldn't
-			 * happen and is an indication of a faulty CEC adapter
-			 * driver, or the CEC bus is in some weird state.
-			 */
-			dprintk(0, "message %*ph timed out!\n",
-				adap->transmitting->msg.len,
-				adap->transmitting->msg.msg);
-			/* Just give up on this. */
-			cec_data_cancel(adap->transmitting);
-			goto unlock;
-		}
-
-		/*
-		 * If we are still transmitting, or there is nothing new to
-		 * transmit, then just continue waiting.
-		 */
-		if (adap->transmitting || list_empty(&adap->transmit_queue))
-			goto unlock;
-
-		/* Get a new message to transmit */
-		data = list_first_entry(&adap->transmit_queue,
-					struct cec_data, list);
-		list_del_init(&data->list);
-		adap->transmit_queue_sz--;
-		/* Make this the current transmitting message */
-		adap->transmitting = data;
-
-		/*
-		 * Suggested number of attempts as per the CEC 2.0 spec:
-		 * 4 attempts is the default, except for 'secondary poll
-		 * messages', i.e. poll messages not sent during the adapter
-		 * configuration phase when it allocates logical addresses.
-		 */
-		if (data->msg.len == 1 && adap->is_configured)
-			attempts = 2;
-		else
-			attempts = 4;
-
-		/* Set the suggested signal free time */
-		if (data->attempts) {
-			/* should be >= 3 data bit periods for a retry */
-			signal_free_time = CEC_SIGNAL_FREE_TIME_RETRY;
-		} else if (data->new_initiator) {
-			/* should be >= 5 data bit periods for new initiator */
-			signal_free_time = CEC_SIGNAL_FREE_TIME_NEW_INITIATOR;
-		} else {
-			/*
-			 * should be >= 7 data bit periods for sending another
-			 * frame immediately after another.
-			 */
-			signal_free_time = CEC_SIGNAL_FREE_TIME_NEXT_XFER;
-		}
-		if (data->attempts == 0)
-			data->attempts = attempts;
-
-		/* Tell the adapter to transmit, cancel on error */
-		if (adap->ops->adap_transmit(adap, data->attempts,
-					     signal_free_time, &data->msg))
-			cec_data_cancel(data);
-
-unlock:
-		mutex_unlock(&adap->lock);
-
-		if (kthread_should_stop())
-			break;
-	}
-	return 0;
-}
-
-/*
- * Called by the CEC adapter if a transmit finished.
- */
-void cec_transmit_done(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt,
-		       u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt)
-{
-	struct cec_data *data;
-	struct cec_msg *msg;
-	u64 ts = ktime_get_ns();
-
-	dprintk(2, "cec_transmit_done %02x\n", status);
-	mutex_lock(&adap->lock);
-	data = adap->transmitting;
-	if (!data) {
-		/*
-		 * This can happen if a transmit was issued and the cable is
-		 * unplugged while the transmit is ongoing. Ignore this
-		 * transmit in that case.
-		 */
-		dprintk(1, "cec_transmit_done without an ongoing transmit!\n");
-		goto unlock;
-	}
-
-	msg = &data->msg;
-
-	/* Drivers must fill in the status! */
-	WARN_ON(status == 0);
-	msg->tx_ts = ts;
-	msg->tx_status |= status;
-	msg->tx_arb_lost_cnt += arb_lost_cnt;
-	msg->tx_nack_cnt += nack_cnt;
-	msg->tx_low_drive_cnt += low_drive_cnt;
-	msg->tx_error_cnt += error_cnt;
-
-	/* Mark that we're done with this transmit */
-	adap->transmitting = NULL;
-
-	/*
-	 * If there are still retry attempts left and there was an error and
-	 * the hardware didn't signal that it retried itself (by setting
-	 * CEC_TX_STATUS_MAX_RETRIES), then we will retry ourselves.
-	 */
-	if (data->attempts > 1 &&
-	    !(status & (CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_OK))) {
-		/* Retry this message */
-		data->attempts--;
-		/* Add the message in front of the transmit queue */
-		list_add(&data->list, &adap->transmit_queue);
-		adap->transmit_queue_sz++;
-		goto wake_thread;
-	}
-
-	data->attempts = 0;
-
-	/* Always set CEC_TX_STATUS_MAX_RETRIES on error */
-	if (!(status & CEC_TX_STATUS_OK))
-		msg->tx_status |= CEC_TX_STATUS_MAX_RETRIES;
-
-	/* Queue transmitted message for monitoring purposes */
-	cec_queue_msg_monitor(adap, msg, 1);
-
-	if ((status & CEC_TX_STATUS_OK) && adap->is_configured &&
-	    msg->timeout) {
-		/*
-		 * Queue the message into the wait queue if we want to wait
-		 * for a reply.
-		 */
-		list_add_tail(&data->list, &adap->wait_queue);
-		schedule_delayed_work(&data->work,
-				      msecs_to_jiffies(msg->timeout));
-	} else {
-		/* Otherwise we're done */
-		cec_data_completed(data);
-	}
-
-wake_thread:
-	/*
-	 * Wake up the main thread to see if another message is ready
-	 * for transmitting or to retry the current message.
-	 */
-	wake_up_interruptible(&adap->kthread_waitq);
-unlock:
-	mutex_unlock(&adap->lock);
-}
-EXPORT_SYMBOL_GPL(cec_transmit_done);
-
-/*
- * Called when waiting for a reply times out.
- */
-static void cec_wait_timeout(struct work_struct *work)
-{
-	struct cec_data *data = container_of(work, struct cec_data, work.work);
-	struct cec_adapter *adap = data->adap;
-
-	mutex_lock(&adap->lock);
-	/*
-	 * Sanity check in case the timeout and the arrival of the message
-	 * happened at the same time.
-	 */
-	if (list_empty(&data->list))
-		goto unlock;
-
-	/* Mark the message as timed out */
-	list_del_init(&data->list);
-	data->msg.rx_ts = ktime_get_ns();
-	data->msg.rx_status = CEC_RX_STATUS_TIMEOUT;
-	cec_data_completed(data);
-unlock:
-	mutex_unlock(&adap->lock);
-}
-
-/*
- * Transmit a message. The fh argument may be NULL if the transmit is not
- * associated with a specific filehandle.
- *
- * This function is called with adap->lock held.
- */
-int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg,
-			struct cec_fh *fh, bool block)
-{
-	struct cec_data *data;
-	u8 last_initiator = 0xff;
-	unsigned int timeout;
-	int res = 0;
-
-	msg->rx_ts = 0;
-	msg->tx_ts = 0;
-	msg->rx_status = 0;
-	msg->tx_status = 0;
-	msg->tx_arb_lost_cnt = 0;
-	msg->tx_nack_cnt = 0;
-	msg->tx_low_drive_cnt = 0;
-	msg->tx_error_cnt = 0;
-	msg->sequence = ++adap->sequence;
-	if (!msg->sequence)
-		msg->sequence = ++adap->sequence;
-
-	if (msg->reply && msg->timeout == 0) {
-		/* Make sure the timeout isn't 0. */
-		msg->timeout = 1000;
-	}
-
-	/* Sanity checks */
-	if (msg->len == 0 || msg->len > CEC_MAX_MSG_SIZE) {
-		dprintk(1, "cec_transmit_msg: invalid length %d\n", msg->len);
-		return -EINVAL;
-	}
-	if (msg->timeout && msg->len == 1) {
-		dprintk(1, "cec_transmit_msg: can't reply for poll msg\n");
-		return -EINVAL;
-	}
-	memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len);
-	if (msg->len == 1) {
-		if (cec_msg_initiator(msg) != 0xf ||
-		    cec_msg_destination(msg) == 0xf) {
-			dprintk(1, "cec_transmit_msg: invalid poll message\n");
-			return -EINVAL;
-		}
-		if (cec_has_log_addr(adap, cec_msg_destination(msg))) {
-			/*
-			 * If the destination is a logical address our adapter
-			 * has already claimed, then just NACK this.
-			 * It depends on the hardware what it will do with a
-			 * POLL to itself (some OK this), so it is just as
-			 * easy to handle it here so the behavior will be
-			 * consistent.
-			 */
-			msg->tx_ts = ktime_get_ns();
-			msg->tx_status = CEC_TX_STATUS_NACK |
-					 CEC_TX_STATUS_MAX_RETRIES;
-			msg->tx_nack_cnt = 1;
-			return 0;
-		}
-	}
-	if (msg->len > 1 && !cec_msg_is_broadcast(msg) &&
-	    cec_has_log_addr(adap, cec_msg_destination(msg))) {
-		dprintk(1, "cec_transmit_msg: destination is the adapter itself\n");
-		return -EINVAL;
-	}
-	if (cec_msg_initiator(msg) != 0xf &&
-	    !cec_has_log_addr(adap, cec_msg_initiator(msg))) {
-		dprintk(1, "cec_transmit_msg: initiator has unknown logical address %d\n",
-			cec_msg_initiator(msg));
-		return -EINVAL;
-	}
-	if (!adap->is_configured && !adap->is_configuring)
-		return -ENONET;
-
-	if (adap->transmit_queue_sz >= CEC_MAX_MSG_TX_QUEUE_SZ)
-		return -EBUSY;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	if (msg->len > 1 && msg->msg[1] == CEC_MSG_CDC_MESSAGE) {
-		msg->msg[2] = adap->phys_addr >> 8;
-		msg->msg[3] = adap->phys_addr & 0xff;
-	}
-
-	if (msg->timeout)
-		dprintk(2, "cec_transmit_msg: %*ph (wait for 0x%02x%s)\n",
-			msg->len, msg->msg, msg->reply, !block ? ", nb" : "");
-	else
-		dprintk(2, "cec_transmit_msg: %*ph%s\n",
-			msg->len, msg->msg, !block ? " (nb)" : "");
-
-	data->msg = *msg;
-	data->fh = fh;
-	data->adap = adap;
-	data->blocking = block;
-
-	/*
-	 * Determine if this message follows a message from the same
-	 * initiator. Needed to determine the free signal time later on.
-	 */
-	if (msg->len > 1) {
-		if (!(list_empty(&adap->transmit_queue))) {
-			const struct cec_data *last;
-
-			last = list_last_entry(&adap->transmit_queue,
-					       const struct cec_data, list);
-			last_initiator = cec_msg_initiator(&last->msg);
-		} else if (adap->transmitting) {
-			last_initiator =
-				cec_msg_initiator(&adap->transmitting->msg);
-		}
-	}
-	data->new_initiator = last_initiator != cec_msg_initiator(msg);
-	init_completion(&data->c);
-	INIT_DELAYED_WORK(&data->work, cec_wait_timeout);
-
-	if (fh)
-		list_add_tail(&data->xfer_list, &fh->xfer_list);
-	list_add_tail(&data->list, &adap->transmit_queue);
-	adap->transmit_queue_sz++;
-	if (!adap->transmitting)
-		wake_up_interruptible(&adap->kthread_waitq);
-
-	/* All done if we don't need to block waiting for completion */
-	if (!block)
-		return 0;
-
-	/*
-	 * If we don't get a completion before this time something is really
-	 * wrong and we time out.
-	 */
-	timeout = CEC_XFER_TIMEOUT_MS;
-	/* Add the requested timeout if we have to wait for a reply as well */
-	if (msg->timeout)
-		timeout += msg->timeout;
-
-	/*
-	 * Release the lock and wait, retake the lock afterwards.
-	 */
-	mutex_unlock(&adap->lock);
-	res = wait_for_completion_killable_timeout(&data->c,
-						   msecs_to_jiffies(timeout));
-	mutex_lock(&adap->lock);
-
-	if (data->completed) {
-		/* The transmit completed (possibly with an error) */
-		*msg = data->msg;
-		kfree(data);
-		return 0;
-	}
-	/*
-	 * The wait for completion timed out or was interrupted, so mark this
-	 * as non-blocking and disconnect from the filehandle since it is
-	 * still 'in flight'. When it finally completes it will just drop the
-	 * result silently.
-	 */
-	data->blocking = false;
-	if (data->fh)
-		list_del(&data->xfer_list);
-	data->fh = NULL;
-
-	if (res == 0) { /* timed out */
-		/* Check if the reply or the transmit failed */
-		if (msg->timeout && (msg->tx_status & CEC_TX_STATUS_OK))
-			msg->rx_status = CEC_RX_STATUS_TIMEOUT;
-		else
-			msg->tx_status = CEC_TX_STATUS_MAX_RETRIES;
-	}
-	return res > 0 ? 0 : res;
-}
-
-/* Helper function to be used by drivers and this framework. */
-int cec_transmit_msg(struct cec_adapter *adap, struct cec_msg *msg,
-		     bool block)
-{
-	int ret;
-
-	mutex_lock(&adap->lock);
-	ret = cec_transmit_msg_fh(adap, msg, NULL, block);
-	mutex_unlock(&adap->lock);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(cec_transmit_msg);
-
-/*
- * I don't like forward references but without this the low-level
- * cec_received_msg() function would come after a bunch of high-level
- * CEC protocol handling functions. That was very confusing.
- */
-static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
-			      bool is_reply);
-
-#define DIRECTED	0x80
-#define BCAST1_4	0x40
-#define BCAST2_0	0x20	/* broadcast only allowed for >= 2.0 */
-#define BCAST		(BCAST1_4 | BCAST2_0)
-#define BOTH		(BCAST | DIRECTED)
-
-/*
- * Specify minimum length and whether the message is directed, broadcast
- * or both. Messages that do not match the criteria are ignored as per
- * the CEC specification.
- */
-static const u8 cec_msg_size[256] = {
-	[CEC_MSG_ACTIVE_SOURCE] = 4 | BCAST,
-	[CEC_MSG_IMAGE_VIEW_ON] = 2 | DIRECTED,
-	[CEC_MSG_TEXT_VIEW_ON] = 2 | DIRECTED,
-	[CEC_MSG_INACTIVE_SOURCE] = 4 | DIRECTED,
-	[CEC_MSG_REQUEST_ACTIVE_SOURCE] = 2 | BCAST,
-	[CEC_MSG_ROUTING_CHANGE] = 6 | BCAST,
-	[CEC_MSG_ROUTING_INFORMATION] = 4 | BCAST,
-	[CEC_MSG_SET_STREAM_PATH] = 4 | BCAST,
-	[CEC_MSG_STANDBY] = 2 | BOTH,
-	[CEC_MSG_RECORD_OFF] = 2 | DIRECTED,
-	[CEC_MSG_RECORD_ON] = 3 | DIRECTED,
-	[CEC_MSG_RECORD_STATUS] = 3 | DIRECTED,
-	[CEC_MSG_RECORD_TV_SCREEN] = 2 | DIRECTED,
-	[CEC_MSG_CLEAR_ANALOGUE_TIMER] = 13 | DIRECTED,
-	[CEC_MSG_CLEAR_DIGITAL_TIMER] = 16 | DIRECTED,
-	[CEC_MSG_CLEAR_EXT_TIMER] = 13 | DIRECTED,
-	[CEC_MSG_SET_ANALOGUE_TIMER] = 13 | DIRECTED,
-	[CEC_MSG_SET_DIGITAL_TIMER] = 16 | DIRECTED,
-	[CEC_MSG_SET_EXT_TIMER] = 13 | DIRECTED,
-	[CEC_MSG_SET_TIMER_PROGRAM_TITLE] = 2 | DIRECTED,
-	[CEC_MSG_TIMER_CLEARED_STATUS] = 3 | DIRECTED,
-	[CEC_MSG_TIMER_STATUS] = 3 | DIRECTED,
-	[CEC_MSG_CEC_VERSION] = 3 | DIRECTED,
-	[CEC_MSG_GET_CEC_VERSION] = 2 | DIRECTED,
-	[CEC_MSG_GIVE_PHYSICAL_ADDR] = 2 | DIRECTED,
-	[CEC_MSG_GET_MENU_LANGUAGE] = 2 | DIRECTED,
-	[CEC_MSG_REPORT_PHYSICAL_ADDR] = 5 | BCAST,
-	[CEC_MSG_SET_MENU_LANGUAGE] = 5 | BCAST,
-	[CEC_MSG_REPORT_FEATURES] = 6 | BCAST,
-	[CEC_MSG_GIVE_FEATURES] = 2 | DIRECTED,
-	[CEC_MSG_DECK_CONTROL] = 3 | DIRECTED,
-	[CEC_MSG_DECK_STATUS] = 3 | DIRECTED,
-	[CEC_MSG_GIVE_DECK_STATUS] = 3 | DIRECTED,
-	[CEC_MSG_PLAY] = 3 | DIRECTED,
-	[CEC_MSG_GIVE_TUNER_DEVICE_STATUS] = 3 | DIRECTED,
-	[CEC_MSG_SELECT_ANALOGUE_SERVICE] = 6 | DIRECTED,
-	[CEC_MSG_SELECT_DIGITAL_SERVICE] = 9 | DIRECTED,
-	[CEC_MSG_TUNER_DEVICE_STATUS] = 7 | DIRECTED,
-	[CEC_MSG_TUNER_STEP_DECREMENT] = 2 | DIRECTED,
-	[CEC_MSG_TUNER_STEP_INCREMENT] = 2 | DIRECTED,
-	[CEC_MSG_DEVICE_VENDOR_ID] = 5 | BCAST,
-	[CEC_MSG_GIVE_DEVICE_VENDOR_ID] = 2 | DIRECTED,
-	[CEC_MSG_VENDOR_COMMAND] = 2 | DIRECTED,
-	[CEC_MSG_VENDOR_COMMAND_WITH_ID] = 5 | BOTH,
-	[CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN] = 2 | BOTH,
-	[CEC_MSG_VENDOR_REMOTE_BUTTON_UP] = 2 | BOTH,
-	[CEC_MSG_SET_OSD_STRING] = 3 | DIRECTED,
-	[CEC_MSG_GIVE_OSD_NAME] = 2 | DIRECTED,
-	[CEC_MSG_SET_OSD_NAME] = 2 | DIRECTED,
-	[CEC_MSG_MENU_REQUEST] = 3 | DIRECTED,
-	[CEC_MSG_MENU_STATUS] = 3 | DIRECTED,
-	[CEC_MSG_USER_CONTROL_PRESSED] = 3 | DIRECTED,
-	[CEC_MSG_USER_CONTROL_RELEASED] = 2 | DIRECTED,
-	[CEC_MSG_GIVE_DEVICE_POWER_STATUS] = 2 | DIRECTED,
-	[CEC_MSG_REPORT_POWER_STATUS] = 3 | DIRECTED | BCAST2_0,
-	[CEC_MSG_FEATURE_ABORT] = 4 | DIRECTED,
-	[CEC_MSG_ABORT] = 2 | DIRECTED,
-	[CEC_MSG_GIVE_AUDIO_STATUS] = 2 | DIRECTED,
-	[CEC_MSG_GIVE_SYSTEM_AUDIO_MODE_STATUS] = 2 | DIRECTED,
-	[CEC_MSG_REPORT_AUDIO_STATUS] = 3 | DIRECTED,
-	[CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR] = 2 | DIRECTED,
-	[CEC_MSG_REQUEST_SHORT_AUDIO_DESCRIPTOR] = 2 | DIRECTED,
-	[CEC_MSG_SET_SYSTEM_AUDIO_MODE] = 3 | BOTH,
-	[CEC_MSG_SYSTEM_AUDIO_MODE_REQUEST] = 2 | DIRECTED,
-	[CEC_MSG_SYSTEM_AUDIO_MODE_STATUS] = 3 | DIRECTED,
-	[CEC_MSG_SET_AUDIO_RATE] = 3 | DIRECTED,
-	[CEC_MSG_INITIATE_ARC] = 2 | DIRECTED,
-	[CEC_MSG_REPORT_ARC_INITIATED] = 2 | DIRECTED,
-	[CEC_MSG_REPORT_ARC_TERMINATED] = 2 | DIRECTED,
-	[CEC_MSG_REQUEST_ARC_INITIATION] = 2 | DIRECTED,
-	[CEC_MSG_REQUEST_ARC_TERMINATION] = 2 | DIRECTED,
-	[CEC_MSG_TERMINATE_ARC] = 2 | DIRECTED,
-	[CEC_MSG_REQUEST_CURRENT_LATENCY] = 4 | BCAST,
-	[CEC_MSG_REPORT_CURRENT_LATENCY] = 7 | BCAST,
-	[CEC_MSG_CDC_MESSAGE] = 2 | BCAST,
-};
-
-/* Called by the CEC adapter if a message is received */
-void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg)
-{
-	struct cec_data *data;
-	u8 msg_init = cec_msg_initiator(msg);
-	u8 msg_dest = cec_msg_destination(msg);
-	u8 cmd = msg->msg[1];
-	bool is_reply = false;
-	bool valid_la = true;
-	u8 min_len = 0;
-
-	if (WARN_ON(!msg->len || msg->len > CEC_MAX_MSG_SIZE))
-		return;
-
-	msg->rx_ts = ktime_get_ns();
-	msg->rx_status = CEC_RX_STATUS_OK;
-	msg->sequence = msg->reply = msg->timeout = 0;
-	msg->tx_status = 0;
-	msg->tx_ts = 0;
-	msg->flags = 0;
-	memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len);
-
-	mutex_lock(&adap->lock);
-	dprintk(2, "cec_received_msg: %*ph\n", msg->len, msg->msg);
-
-	/* Check if this message was for us (directed or broadcast). */
-	if (!cec_msg_is_broadcast(msg))
-		valid_la = cec_has_log_addr(adap, msg_dest);
-
-	/*
-	 * Check if the length is not too short or if the message is a
-	 * broadcast message where a directed message was expected or
-	 * vice versa. If so, then the message has to be ignored (according
-	 * to section CEC 7.3 and CEC 12.2).
-	 */
-	if (valid_la && msg->len > 1 && cec_msg_size[cmd]) {
-		u8 dir_fl = cec_msg_size[cmd] & BOTH;
-
-		min_len = cec_msg_size[cmd] & 0x1f;
-		if (msg->len < min_len)
-			valid_la = false;
-		else if (!cec_msg_is_broadcast(msg) && !(dir_fl & DIRECTED))
-			valid_la = false;
-		else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST1_4))
-			valid_la = false;
-		else if (cec_msg_is_broadcast(msg) &&
-			 adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0 &&
-			 !(dir_fl & BCAST2_0))
-			valid_la = false;
-	}
-	if (valid_la && min_len) {
-		/* These messages have special length requirements */
-		switch (cmd) {
-		case CEC_MSG_TIMER_STATUS:
-			if (msg->msg[2] & 0x10) {
-				switch (msg->msg[2] & 0xf) {
-				case CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE:
-				case CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE:
-					if (msg->len < 5)
-						valid_la = false;
-					break;
-				}
-			} else if ((msg->msg[2] & 0xf) == CEC_OP_PROG_ERROR_DUPLICATE) {
-				if (msg->len < 5)
-					valid_la = false;
-			}
-			break;
-		case CEC_MSG_RECORD_ON:
-			switch (msg->msg[2]) {
-			case CEC_OP_RECORD_SRC_OWN:
-				break;
-			case CEC_OP_RECORD_SRC_DIGITAL:
-				if (msg->len < 10)
-					valid_la = false;
-				break;
-			case CEC_OP_RECORD_SRC_ANALOG:
-				if (msg->len < 7)
-					valid_la = false;
-				break;
-			case CEC_OP_RECORD_SRC_EXT_PLUG:
-				if (msg->len < 4)
-					valid_la = false;
-				break;
-			case CEC_OP_RECORD_SRC_EXT_PHYS_ADDR:
-				if (msg->len < 5)
-					valid_la = false;
-				break;
-			}
-			break;
-		}
-	}
-
-	/* It's a valid message and not a poll or CDC message */
-	if (valid_la && msg->len > 1 && cmd != CEC_MSG_CDC_MESSAGE) {
-		bool abort = cmd == CEC_MSG_FEATURE_ABORT;
-
-		/* The aborted command is in msg[2] */
-		if (abort)
-			cmd = msg->msg[2];
-
-		/*
-		 * Walk over all transmitted messages that are waiting for a
-		 * reply.
-		 */
-		list_for_each_entry(data, &adap->wait_queue, list) {
-			struct cec_msg *dst = &data->msg;
-
-			/*
-			 * The *only* CEC message that has two possible replies
-			 * is CEC_MSG_INITIATE_ARC.
-			 * In this case allow either of the two replies.
-			 */
-			if (!abort && dst->msg[1] == CEC_MSG_INITIATE_ARC &&
-			    (cmd == CEC_MSG_REPORT_ARC_INITIATED ||
-			     cmd == CEC_MSG_REPORT_ARC_TERMINATED) &&
-			    (dst->reply == CEC_MSG_REPORT_ARC_INITIATED ||
-			     dst->reply == CEC_MSG_REPORT_ARC_TERMINATED))
-				dst->reply = cmd;
-
-			/* Does the command match? */
-			if ((abort && cmd != dst->msg[1]) ||
-			    (!abort && cmd != dst->reply))
-				continue;
-
-			/* Does the addressing match? */
-			if (msg_init != cec_msg_destination(dst) &&
-			    !cec_msg_is_broadcast(dst))
-				continue;
-
-			/* We got a reply */
-			memcpy(dst->msg, msg->msg, msg->len);
-			dst->len = msg->len;
-			dst->rx_ts = msg->rx_ts;
-			dst->rx_status = msg->rx_status;
-			if (abort)
-				dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT;
-			msg->flags = dst->flags;
-			/* Remove it from the wait_queue */
-			list_del_init(&data->list);
-
-			/* Cancel the pending timeout work */
-			if (!cancel_delayed_work(&data->work)) {
-				mutex_unlock(&adap->lock);
-				flush_scheduled_work();
-				mutex_lock(&adap->lock);
-			}
-			/*
-			 * Mark this as a reply, provided someone is still
-			 * waiting for the answer.
-			 */
-			if (data->fh)
-				is_reply = true;
-			cec_data_completed(data);
-			break;
-		}
-	}
-	mutex_unlock(&adap->lock);
-
-	/* Pass the message on to any monitoring filehandles */
-	cec_queue_msg_monitor(adap, msg, valid_la);
-
-	/* We're done if it is not for us or a poll message */
-	if (!valid_la || msg->len <= 1)
-		return;
-
-	if (adap->log_addrs.log_addr_mask == 0)
-		return;
-
-	/*
-	 * Process the message on the protocol level. If is_reply is true,
-	 * then cec_receive_notify() won't pass on the reply to the listener(s)
-	 * since that was already done by cec_data_completed() above.
-	 */
-	cec_receive_notify(adap, msg, is_reply);
-}
-EXPORT_SYMBOL_GPL(cec_received_msg);
-
-/* Logical Address Handling */
-
-/*
- * Attempt to claim a specific logical address.
- *
- * This function is called with adap->lock held.
- */
-static int cec_config_log_addr(struct cec_adapter *adap,
-			       unsigned int idx,
-			       unsigned int log_addr)
-{
-	struct cec_log_addrs *las = &adap->log_addrs;
-	struct cec_msg msg = { };
-	int err;
-
-	if (cec_has_log_addr(adap, log_addr))
-		return 0;
-
-	/* Send poll message */
-	msg.len = 1;
-	msg.msg[0] = 0xf0 | log_addr;
-	err = cec_transmit_msg_fh(adap, &msg, NULL, true);
-
-	/*
-	 * While trying to poll the physical address was reset
-	 * and the adapter was unconfigured, so bail out.
-	 */
-	if (!adap->is_configuring)
-		return -EINTR;
-
-	if (err)
-		return err;
-
-	if (msg.tx_status & CEC_TX_STATUS_OK)
-		return 0;
-
-	/*
-	 * Message not acknowledged, so this logical
-	 * address is free to use.
-	 */
-	err = adap->ops->adap_log_addr(adap, log_addr);
-	if (err)
-		return err;
-
-	las->log_addr[idx] = log_addr;
-	las->log_addr_mask |= 1 << log_addr;
-	adap->phys_addrs[log_addr] = adap->phys_addr;
-
-	dprintk(2, "claimed addr %d (%d)\n", log_addr,
-		las->primary_device_type[idx]);
-	return 1;
-}
-
-/*
- * Unconfigure the adapter: clear all logical addresses and send
- * the state changed event.
- *
- * This function is called with adap->lock held.
- */
-static void cec_adap_unconfigure(struct cec_adapter *adap)
-{
-	WARN_ON(adap->ops->adap_log_addr(adap, CEC_LOG_ADDR_INVALID));
-	adap->log_addrs.log_addr_mask = 0;
-	adap->is_configuring = false;
-	adap->is_configured = false;
-	memset(adap->phys_addrs, 0xff, sizeof(adap->phys_addrs));
-	wake_up_interruptible(&adap->kthread_waitq);
-	cec_post_state_event(adap);
-}
-
-/*
- * Attempt to claim the required logical addresses.
- */
-static int cec_config_thread_func(void *arg)
-{
-	/* The various LAs for each type of device */
-	static const u8 tv_log_addrs[] = {
-		CEC_LOG_ADDR_TV, CEC_LOG_ADDR_SPECIFIC,
-		CEC_LOG_ADDR_INVALID
-	};
-	static const u8 record_log_addrs[] = {
-		CEC_LOG_ADDR_RECORD_1, CEC_LOG_ADDR_RECORD_2,
-		CEC_LOG_ADDR_RECORD_3,
-		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
-		CEC_LOG_ADDR_INVALID
-	};
-	static const u8 tuner_log_addrs[] = {
-		CEC_LOG_ADDR_TUNER_1, CEC_LOG_ADDR_TUNER_2,
-		CEC_LOG_ADDR_TUNER_3, CEC_LOG_ADDR_TUNER_4,
-		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
-		CEC_LOG_ADDR_INVALID
-	};
-	static const u8 playback_log_addrs[] = {
-		CEC_LOG_ADDR_PLAYBACK_1, CEC_LOG_ADDR_PLAYBACK_2,
-		CEC_LOG_ADDR_PLAYBACK_3,
-		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
-		CEC_LOG_ADDR_INVALID
-	};
-	static const u8 audiosystem_log_addrs[] = {
-		CEC_LOG_ADDR_AUDIOSYSTEM,
-		CEC_LOG_ADDR_INVALID
-	};
-	static const u8 specific_use_log_addrs[] = {
-		CEC_LOG_ADDR_SPECIFIC,
-		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
-		CEC_LOG_ADDR_INVALID
-	};
-	static const u8 *type2addrs[6] = {
-		[CEC_LOG_ADDR_TYPE_TV] = tv_log_addrs,
-		[CEC_LOG_ADDR_TYPE_RECORD] = record_log_addrs,
-		[CEC_LOG_ADDR_TYPE_TUNER] = tuner_log_addrs,
-		[CEC_LOG_ADDR_TYPE_PLAYBACK] = playback_log_addrs,
-		[CEC_LOG_ADDR_TYPE_AUDIOSYSTEM] = audiosystem_log_addrs,
-		[CEC_LOG_ADDR_TYPE_SPECIFIC] = specific_use_log_addrs,
-	};
-	static const u16 type2mask[] = {
-		[CEC_LOG_ADDR_TYPE_TV] = CEC_LOG_ADDR_MASK_TV,
-		[CEC_LOG_ADDR_TYPE_RECORD] = CEC_LOG_ADDR_MASK_RECORD,
-		[CEC_LOG_ADDR_TYPE_TUNER] = CEC_LOG_ADDR_MASK_TUNER,
-		[CEC_LOG_ADDR_TYPE_PLAYBACK] = CEC_LOG_ADDR_MASK_PLAYBACK,
-		[CEC_LOG_ADDR_TYPE_AUDIOSYSTEM] = CEC_LOG_ADDR_MASK_AUDIOSYSTEM,
-		[CEC_LOG_ADDR_TYPE_SPECIFIC] = CEC_LOG_ADDR_MASK_SPECIFIC,
-	};
-	struct cec_adapter *adap = arg;
-	struct cec_log_addrs *las = &adap->log_addrs;
-	int err;
-	int i, j;
-
-	mutex_lock(&adap->lock);
-	dprintk(1, "physical address: %x.%x.%x.%x, claim %d logical addresses\n",
-		cec_phys_addr_exp(adap->phys_addr), las->num_log_addrs);
-	las->log_addr_mask = 0;
-
-	if (las->log_addr_type[0] == CEC_LOG_ADDR_TYPE_UNREGISTERED)
-		goto configured;
-
-	for (i = 0; i < las->num_log_addrs; i++) {
-		unsigned int type = las->log_addr_type[i];
-		const u8 *la_list;
-		u8 last_la;
-
-		/*
-		 * The TV functionality can only map to physical address 0.
-		 * For any other address, try the Specific functionality
-		 * instead as per the spec.
-		 */
-		if (adap->phys_addr && type == CEC_LOG_ADDR_TYPE_TV)
-			type = CEC_LOG_ADDR_TYPE_SPECIFIC;
-
-		la_list = type2addrs[type];
-		last_la = las->log_addr[i];
-		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
-		if (last_la == CEC_LOG_ADDR_INVALID ||
-		    last_la == CEC_LOG_ADDR_UNREGISTERED ||
-		    !(last_la & type2mask[type]))
-			last_la = la_list[0];
-
-		err = cec_config_log_addr(adap, i, last_la);
-		if (err > 0) /* Reused last LA */
-			continue;
-
-		if (err < 0)
-			goto unconfigure;
-
-		for (j = 0; la_list[j] != CEC_LOG_ADDR_INVALID; j++) {
-			/* Tried this one already, skip it */
-			if (la_list[j] == last_la)
-				continue;
-			/* The backup addresses are CEC 2.0 specific */
-			if ((la_list[j] == CEC_LOG_ADDR_BACKUP_1 ||
-			     la_list[j] == CEC_LOG_ADDR_BACKUP_2) &&
-			    las->cec_version < CEC_OP_CEC_VERSION_2_0)
-				continue;
-
-			err = cec_config_log_addr(adap, i, la_list[j]);
-			if (err == 0) /* LA is in use */
-				continue;
-			if (err < 0)
-				goto unconfigure;
-			/* Done, claimed an LA */
-			break;
-		}
-
-		if (la_list[j] == CEC_LOG_ADDR_INVALID)
-			dprintk(1, "could not claim LA %d\n", i);
-	}
-
-	if (adap->log_addrs.log_addr_mask == 0 &&
-	    !(las->flags & CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK))
-		goto unconfigure;
-
-configured:
-	if (adap->log_addrs.log_addr_mask == 0) {
-		/* Fall back to unregistered */
-		las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED;
-		las->log_addr_mask = 1 << las->log_addr[0];
-		for (i = 1; i < las->num_log_addrs; i++)
-			las->log_addr[i] = CEC_LOG_ADDR_INVALID;
-	}
-	adap->is_configured = true;
-	adap->is_configuring = false;
-	cec_post_state_event(adap);
-	mutex_unlock(&adap->lock);
-
-	for (i = 0; i < las->num_log_addrs; i++) {
-		if (las->log_addr[i] == CEC_LOG_ADDR_INVALID ||
-		    (las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY))
-			continue;
-
-		/*
-		 * Report Features must come first according
-		 * to CEC 2.0
-		 */
-		if (las->log_addr[i] != CEC_LOG_ADDR_UNREGISTERED)
-			cec_report_features(adap, i);
-		cec_report_phys_addr(adap, i);
-	}
-	for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++)
-		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
-	mutex_lock(&adap->lock);
-	adap->kthread_config = NULL;
-	mutex_unlock(&adap->lock);
-	complete(&adap->config_completion);
-	return 0;
-
-unconfigure:
-	for (i = 0; i < las->num_log_addrs; i++)
-		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
-	cec_adap_unconfigure(adap);
-	adap->kthread_config = NULL;
-	mutex_unlock(&adap->lock);
-	complete(&adap->config_completion);
-	return 0;
-}
-
-/*
- * Called from either __cec_s_phys_addr or __cec_s_log_addrs to claim the
- * logical addresses.
- *
- * This function is called with adap->lock held.
- */
-static void cec_claim_log_addrs(struct cec_adapter *adap, bool block)
-{
-	if (WARN_ON(adap->is_configuring || adap->is_configured))
-		return;
-
-	init_completion(&adap->config_completion);
-
-	/* Ready to kick off the thread */
-	adap->is_configuring = true;
-	adap->kthread_config = kthread_run(cec_config_thread_func, adap,
-					   "ceccfg-%s", adap->name);
-	if (IS_ERR(adap->kthread_config)) {
-		adap->kthread_config = NULL;
-	} else if (block) {
-		mutex_unlock(&adap->lock);
-		wait_for_completion(&adap->config_completion);
-		mutex_lock(&adap->lock);
-	}
-}
-
-/* Set a new physical address and send an event notifying userspace of this.
- *
- * This function is called with adap->lock held.
- */
-void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block)
-{
-	if (phys_addr == adap->phys_addr || adap->devnode.unregistered)
-		return;
-
-	if (phys_addr == CEC_PHYS_ADDR_INVALID ||
-	    adap->phys_addr != CEC_PHYS_ADDR_INVALID) {
-		adap->phys_addr = CEC_PHYS_ADDR_INVALID;
-		cec_post_state_event(adap);
-		cec_adap_unconfigure(adap);
-		/* Disabling monitor all mode should always succeed */
-		if (adap->monitor_all_cnt)
-			WARN_ON(call_op(adap, adap_monitor_all_enable, false));
-		WARN_ON(adap->ops->adap_enable(adap, false));
-		if (phys_addr == CEC_PHYS_ADDR_INVALID)
-			return;
-	}
-
-	if (adap->ops->adap_enable(adap, true))
-		return;
-
-	if (adap->monitor_all_cnt &&
-	    call_op(adap, adap_monitor_all_enable, true)) {
-		WARN_ON(adap->ops->adap_enable(adap, false));
-		return;
-	}
-	adap->phys_addr = phys_addr;
-	cec_post_state_event(adap);
-	if (adap->log_addrs.num_log_addrs)
-		cec_claim_log_addrs(adap, block);
-}
-
-void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block)
-{
-	if (IS_ERR_OR_NULL(adap))
-		return;
-
-	mutex_lock(&adap->lock);
-	__cec_s_phys_addr(adap, phys_addr, block);
-	mutex_unlock(&adap->lock);
-}
-EXPORT_SYMBOL_GPL(cec_s_phys_addr);
-
-/*
- * Called from either the ioctl or a driver to set the logical addresses.
- *
- * This function is called with adap->lock held.
- */
-int __cec_s_log_addrs(struct cec_adapter *adap,
-		      struct cec_log_addrs *log_addrs, bool block)
-{
-	u16 type_mask = 0;
-	int i;
-
-	if (adap->devnode.unregistered)
-		return -ENODEV;
-
-	if (!log_addrs || log_addrs->num_log_addrs == 0) {
-		adap->log_addrs.num_log_addrs = 0;
-		cec_adap_unconfigure(adap);
-		return 0;
-	}
-
-	if (log_addrs->flags & CEC_LOG_ADDRS_FL_CDC_ONLY) {
-		/*
-		 * Sanitize log_addrs fields if a CDC-Only device is
-		 * requested.
-		 */
-		log_addrs->num_log_addrs = 1;
-		log_addrs->osd_name[0] = '\0';
-		log_addrs->vendor_id = CEC_VENDOR_ID_NONE;
-		log_addrs->log_addr_type[0] = CEC_LOG_ADDR_TYPE_UNREGISTERED;
-		/*
-		 * This is just an internal convention since a CDC-Only device
-		 * doesn't have to be a switch. But switches already use
-		 * unregistered, so it makes some kind of sense to pick this
-		 * as the primary device. Since a CDC-Only device never sends
-		 * any 'normal' CEC messages this primary device type is never
-		 * sent over the CEC bus.
-		 */
-		log_addrs->primary_device_type[0] = CEC_OP_PRIM_DEVTYPE_SWITCH;
-		log_addrs->all_device_types[0] = 0;
-		log_addrs->features[0][0] = 0;
-		log_addrs->features[0][1] = 0;
-	}
-
-	/* Ensure the osd name is 0-terminated */
-	log_addrs->osd_name[sizeof(log_addrs->osd_name) - 1] = '\0';
-
-	/* Sanity checks */
-	if (log_addrs->num_log_addrs > adap->available_log_addrs) {
-		dprintk(1, "num_log_addrs > %d\n", adap->available_log_addrs);
-		return -EINVAL;
-	}
-
-	/*
-	 * Vendor ID is a 24 bit number, so check if the value is
-	 * within the correct range.
-	 */
-	if (log_addrs->vendor_id != CEC_VENDOR_ID_NONE &&
-	    (log_addrs->vendor_id & 0xff000000) != 0)
-		return -EINVAL;
-
-	if (log_addrs->cec_version != CEC_OP_CEC_VERSION_1_4 &&
-	    log_addrs->cec_version != CEC_OP_CEC_VERSION_2_0)
-		return -EINVAL;
-
-	if (log_addrs->num_log_addrs > 1)
-		for (i = 0; i < log_addrs->num_log_addrs; i++)
-			if (log_addrs->log_addr_type[i] ==
-					CEC_LOG_ADDR_TYPE_UNREGISTERED) {
-				dprintk(1, "num_log_addrs > 1 can't be combined with unregistered LA\n");
-				return -EINVAL;
-			}
-
-	for (i = 0; i < log_addrs->num_log_addrs; i++) {
-		const u8 feature_sz = ARRAY_SIZE(log_addrs->features[0]);
-		u8 *features = log_addrs->features[i];
-		bool op_is_dev_features = false;
-
-		log_addrs->log_addr[i] = CEC_LOG_ADDR_INVALID;
-		if (type_mask & (1 << log_addrs->log_addr_type[i])) {
-			dprintk(1, "duplicate logical address type\n");
-			return -EINVAL;
-		}
-		type_mask |= 1 << log_addrs->log_addr_type[i];
-		if ((type_mask & (1 << CEC_LOG_ADDR_TYPE_RECORD)) &&
-		    (type_mask & (1 << CEC_LOG_ADDR_TYPE_PLAYBACK))) {
-			/* Record already contains the playback functionality */
-			dprintk(1, "invalid record + playback combination\n");
-			return -EINVAL;
-		}
-		if (log_addrs->primary_device_type[i] >
-					CEC_OP_PRIM_DEVTYPE_PROCESSOR) {
-			dprintk(1, "unknown primary device type\n");
-			return -EINVAL;
-		}
-		if (log_addrs->primary_device_type[i] == 2) {
-			dprintk(1, "invalid primary device type\n");
-			return -EINVAL;
-		}
-		if (log_addrs->log_addr_type[i] > CEC_LOG_ADDR_TYPE_UNREGISTERED) {
-			dprintk(1, "unknown logical address type\n");
-			return -EINVAL;
-		}
-		for (i = 0; i < feature_sz; i++) {
-			if ((features[i] & 0x80) == 0) {
-				if (op_is_dev_features)
-					break;
-				op_is_dev_features = true;
-			}
-		}
-		if (!op_is_dev_features || i == feature_sz) {
-			dprintk(1, "malformed features\n");
-			return -EINVAL;
-		}
-		/* Zero unused part of the feature array */
-		memset(features + i + 1, 0, feature_sz - i - 1);
-	}
-
-	if (log_addrs->cec_version >= CEC_OP_CEC_VERSION_2_0) {
-		if (log_addrs->num_log_addrs > 2) {
-			dprintk(1, "CEC 2.0 allows no more than 2 logical addresses\n");
-			return -EINVAL;
-		}
-		if (log_addrs->num_log_addrs == 2) {
-			if (!(type_mask & ((1 << CEC_LOG_ADDR_TYPE_AUDIOSYSTEM) |
-					   (1 << CEC_LOG_ADDR_TYPE_TV)))) {
-				dprintk(1, "Two LAs is only allowed for audiosystem and TV\n");
-				return -EINVAL;
-			}
-			if (!(type_mask & ((1 << CEC_LOG_ADDR_TYPE_PLAYBACK) |
-					   (1 << CEC_LOG_ADDR_TYPE_RECORD)))) {
-				dprintk(1, "An audiosystem/TV can only be combined with record or playback\n");
-				return -EINVAL;
-			}
-		}
-	}
-
-	/* Zero unused LAs */
-	for (i = log_addrs->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) {
-		log_addrs->primary_device_type[i] = 0;
-		log_addrs->log_addr_type[i] = 0;
-		log_addrs->all_device_types[i] = 0;
-		memset(log_addrs->features[i], 0,
-		       sizeof(log_addrs->features[i]));
-	}
-
-	log_addrs->log_addr_mask = adap->log_addrs.log_addr_mask;
-	adap->log_addrs = *log_addrs;
-	if (adap->phys_addr != CEC_PHYS_ADDR_INVALID)
-		cec_claim_log_addrs(adap, block);
-	return 0;
-}
-
-int cec_s_log_addrs(struct cec_adapter *adap,
-		    struct cec_log_addrs *log_addrs, bool block)
-{
-	int err;
-
-	mutex_lock(&adap->lock);
-	err = __cec_s_log_addrs(adap, log_addrs, block);
-	mutex_unlock(&adap->lock);
-	return err;
-}
-EXPORT_SYMBOL_GPL(cec_s_log_addrs);
-
-/* High-level core CEC message handling */
-
-/* Transmit the Report Features message */
-static int cec_report_features(struct cec_adapter *adap, unsigned int la_idx)
-{
-	struct cec_msg msg = { };
-	const struct cec_log_addrs *las = &adap->log_addrs;
-	const u8 *features = las->features[la_idx];
-	bool op_is_dev_features = false;
-	unsigned int idx;
-
-	/* This is 2.0 and up only */
-	if (adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0)
-		return 0;
-
-	/* Report Features */
-	msg.msg[0] = (las->log_addr[la_idx] << 4) | 0x0f;
-	msg.len = 4;
-	msg.msg[1] = CEC_MSG_REPORT_FEATURES;
-	msg.msg[2] = adap->log_addrs.cec_version;
-	msg.msg[3] = las->all_device_types[la_idx];
-
-	/* Write RC Profiles first, then Device Features */
-	for (idx = 0; idx < ARRAY_SIZE(las->features[0]); idx++) {
-		msg.msg[msg.len++] = features[idx];
-		if ((features[idx] & CEC_OP_FEAT_EXT) == 0) {
-			if (op_is_dev_features)
-				break;
-			op_is_dev_features = true;
-		}
-	}
-	return cec_transmit_msg(adap, &msg, false);
-}
-
-/* Transmit the Report Physical Address message */
-static int cec_report_phys_addr(struct cec_adapter *adap, unsigned int la_idx)
-{
-	const struct cec_log_addrs *las = &adap->log_addrs;
-	struct cec_msg msg = { };
-
-	/* Report Physical Address */
-	msg.msg[0] = (las->log_addr[la_idx] << 4) | 0x0f;
-	cec_msg_report_physical_addr(&msg, adap->phys_addr,
-				     las->primary_device_type[la_idx]);
-	dprintk(2, "config: la %d pa %x.%x.%x.%x\n",
-		las->log_addr[la_idx],
-			cec_phys_addr_exp(adap->phys_addr));
-	return cec_transmit_msg(adap, &msg, false);
-}
-
-/* Transmit the Feature Abort message */
-static int cec_feature_abort_reason(struct cec_adapter *adap,
-				    struct cec_msg *msg, u8 reason)
-{
-	struct cec_msg tx_msg = { };
-
-	/*
-	 * Don't reply with CEC_MSG_FEATURE_ABORT to a CEC_MSG_FEATURE_ABORT
-	 * message!
-	 */
-	if (msg->msg[1] == CEC_MSG_FEATURE_ABORT)
-		return 0;
-	cec_msg_set_reply_to(&tx_msg, msg);
-	cec_msg_feature_abort(&tx_msg, msg->msg[1], reason);
-	return cec_transmit_msg(adap, &tx_msg, false);
-}
-
-static int cec_feature_abort(struct cec_adapter *adap, struct cec_msg *msg)
-{
-	return cec_feature_abort_reason(adap, msg,
-					CEC_OP_ABORT_UNRECOGNIZED_OP);
-}
-
-static int cec_feature_refused(struct cec_adapter *adap, struct cec_msg *msg)
-{
-	return cec_feature_abort_reason(adap, msg,
-					CEC_OP_ABORT_REFUSED);
-}
-
-/*
- * Called when a CEC message is received. This function will do any
- * necessary core processing. The is_reply bool is true if this message
- * is a reply to an earlier transmit.
- *
- * The message is either a broadcast message or a valid directed message.
- */
-static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
-			      bool is_reply)
-{
-	bool is_broadcast = cec_msg_is_broadcast(msg);
-	u8 dest_laddr = cec_msg_destination(msg);
-	u8 init_laddr = cec_msg_initiator(msg);
-	u8 devtype = cec_log_addr2dev(adap, dest_laddr);
-	int la_idx = cec_log_addr2idx(adap, dest_laddr);
-	bool from_unregistered = init_laddr == 0xf;
-	struct cec_msg tx_cec_msg = { };
-
-	dprintk(1, "cec_receive_notify: %*ph\n", msg->len, msg->msg);
-
-	/* If this is a CDC-Only device, then ignore any non-CDC messages */
-	if (cec_is_cdc_only(&adap->log_addrs) &&
-	    msg->msg[1] != CEC_MSG_CDC_MESSAGE)
-		return 0;
-
-	if (adap->ops->received) {
-		/* Allow drivers to process the message first */
-		if (adap->ops->received(adap, msg) != -ENOMSG)
-			return 0;
-	}
-
-	/*
-	 * REPORT_PHYSICAL_ADDR, CEC_MSG_USER_CONTROL_PRESSED and
-	 * CEC_MSG_USER_CONTROL_RELEASED messages always have to be
-	 * handled by the CEC core, even if the passthrough mode is on.
-	 * The others are just ignored if passthrough mode is on.
-	 */
-	switch (msg->msg[1]) {
-	case CEC_MSG_GET_CEC_VERSION:
-	case CEC_MSG_GIVE_DEVICE_VENDOR_ID:
-	case CEC_MSG_ABORT:
-	case CEC_MSG_GIVE_DEVICE_POWER_STATUS:
-	case CEC_MSG_GIVE_PHYSICAL_ADDR:
-	case CEC_MSG_GIVE_OSD_NAME:
-	case CEC_MSG_GIVE_FEATURES:
-		/*
-		 * Skip processing these messages if the passthrough mode
-		 * is on.
-		 */
-		if (adap->passthrough)
-			goto skip_processing;
-		/* Ignore if addressing is wrong */
-		if (is_broadcast || from_unregistered)
-			return 0;
-		break;
-
-	case CEC_MSG_USER_CONTROL_PRESSED:
-	case CEC_MSG_USER_CONTROL_RELEASED:
-		/* Wrong addressing mode: don't process */
-		if (is_broadcast || from_unregistered)
-			goto skip_processing;
-		break;
-
-	case CEC_MSG_REPORT_PHYSICAL_ADDR:
-		/*
-		 * This message is always processed, regardless of the
-		 * passthrough setting.
-		 *
-		 * Exception: don't process if wrong addressing mode.
-		 */
-		if (!is_broadcast)
-			goto skip_processing;
-		break;
-
-	default:
-		break;
-	}
-
-	cec_msg_set_reply_to(&tx_cec_msg, msg);
-
-	switch (msg->msg[1]) {
-	/* The following messages are processed but still passed through */
-	case CEC_MSG_REPORT_PHYSICAL_ADDR: {
-		u16 pa = (msg->msg[2] << 8) | msg->msg[3];
-
-		if (!from_unregistered)
-			adap->phys_addrs[init_laddr] = pa;
-		dprintk(1, "Reported physical address %x.%x.%x.%x for logical address %d\n",
-			cec_phys_addr_exp(pa), init_laddr);
-		break;
-	}
-
-	case CEC_MSG_USER_CONTROL_PRESSED:
-		if (!(adap->capabilities & CEC_CAP_RC) ||
-		    !(adap->log_addrs.flags & CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU))
-			break;
-
-#if IS_REACHABLE(CONFIG_RC_CORE)
-		switch (msg->msg[2]) {
-		/*
-		 * Play function, this message can have variable length
-		 * depending on the specific play function that is used.
-		 */
-		case 0x60:
-			if (msg->len == 2)
-				rc_keydown(adap->rc, RC_TYPE_CEC,
-					   msg->msg[2], 0);
-			else
-				rc_keydown(adap->rc, RC_TYPE_CEC,
-					   msg->msg[2] << 8 | msg->msg[3], 0);
-			break;
-		/*
-		 * Other function messages that are not handled.
-		 * Currently the RC framework does not allow to supply an
-		 * additional parameter to a keypress. These "keys" contain
-		 * other information such as channel number, an input number
-		 * etc.
-		 * For the time being these messages are not processed by the
-		 * framework and are simply forwarded to the user space.
-		 */
-		case 0x56: case 0x57:
-		case 0x67: case 0x68: case 0x69: case 0x6a:
-			break;
-		default:
-			rc_keydown(adap->rc, RC_TYPE_CEC, msg->msg[2], 0);
-			break;
-		}
-#endif
-		break;
-
-	case CEC_MSG_USER_CONTROL_RELEASED:
-		if (!(adap->capabilities & CEC_CAP_RC) ||
-		    !(adap->log_addrs.flags & CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU))
-			break;
-#if IS_REACHABLE(CONFIG_RC_CORE)
-		rc_keyup(adap->rc);
-#endif
-		break;
-
-	/*
-	 * The remaining messages are only processed if the passthrough mode
-	 * is off.
-	 */
-	case CEC_MSG_GET_CEC_VERSION:
-		cec_msg_cec_version(&tx_cec_msg, adap->log_addrs.cec_version);
-		return cec_transmit_msg(adap, &tx_cec_msg, false);
-
-	case CEC_MSG_GIVE_PHYSICAL_ADDR:
-		/* Do nothing for CEC switches using addr 15 */
-		if (devtype == CEC_OP_PRIM_DEVTYPE_SWITCH && dest_laddr == 15)
-			return 0;
-		cec_msg_report_physical_addr(&tx_cec_msg, adap->phys_addr, devtype);
-		return cec_transmit_msg(adap, &tx_cec_msg, false);
-
-	case CEC_MSG_GIVE_DEVICE_VENDOR_ID:
-		if (adap->log_addrs.vendor_id == CEC_VENDOR_ID_NONE)
-			return cec_feature_abort(adap, msg);
-		cec_msg_device_vendor_id(&tx_cec_msg, adap->log_addrs.vendor_id);
-		return cec_transmit_msg(adap, &tx_cec_msg, false);
-
-	case CEC_MSG_ABORT:
-		/* Do nothing for CEC switches */
-		if (devtype == CEC_OP_PRIM_DEVTYPE_SWITCH)
-			return 0;
-		return cec_feature_refused(adap, msg);
-
-	case CEC_MSG_GIVE_OSD_NAME: {
-		if (adap->log_addrs.osd_name[0] == 0)
-			return cec_feature_abort(adap, msg);
-		cec_msg_set_osd_name(&tx_cec_msg, adap->log_addrs.osd_name);
-		return cec_transmit_msg(adap, &tx_cec_msg, false);
-	}
-
-	case CEC_MSG_GIVE_FEATURES:
-		if (adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0)
-			return cec_report_features(adap, la_idx);
-		return 0;
-
-	default:
-		/*
-		 * Unprocessed messages are aborted if userspace isn't doing
-		 * any processing either.
-		 */
-		if (!is_broadcast && !is_reply && !adap->follower_cnt &&
-		    !adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT)
-			return cec_feature_abort(adap, msg);
-		break;
-	}
-
-skip_processing:
-	/* If this was a reply, then we're done, unless otherwise specified */
-	if (is_reply && !(msg->flags & CEC_MSG_FL_REPLY_TO_FOLLOWERS))
-		return 0;
-
-	/*
-	 * Send to the exclusive follower if there is one, otherwise send
-	 * to all followers.
-	 */
-	if (adap->cec_follower)
-		cec_queue_msg_fh(adap->cec_follower, msg);
-	else
-		cec_queue_msg_followers(adap, msg);
-	return 0;
-}
-
-/*
- * Helper functions to keep track of the 'monitor all' use count.
- *
- * These functions are called with adap->lock held.
- */
-int cec_monitor_all_cnt_inc(struct cec_adapter *adap)
-{
-	int ret = 0;
-
-	if (adap->monitor_all_cnt == 0)
-		ret = call_op(adap, adap_monitor_all_enable, 1);
-	if (ret == 0)
-		adap->monitor_all_cnt++;
-	return ret;
-}
-
-void cec_monitor_all_cnt_dec(struct cec_adapter *adap)
-{
-	adap->monitor_all_cnt--;
-	if (adap->monitor_all_cnt == 0)
-		WARN_ON(call_op(adap, adap_monitor_all_enable, 0));
-}
-
-#ifdef CONFIG_MEDIA_CEC_DEBUG
-/*
- * Log the current state of the CEC adapter.
- * Very useful for debugging.
- */
-int cec_adap_status(struct seq_file *file, void *priv)
-{
-	struct cec_adapter *adap = dev_get_drvdata(file->private);
-	struct cec_data *data;
-
-	mutex_lock(&adap->lock);
-	seq_printf(file, "configured: %d\n", adap->is_configured);
-	seq_printf(file, "configuring: %d\n", adap->is_configuring);
-	seq_printf(file, "phys_addr: %x.%x.%x.%x\n",
-		   cec_phys_addr_exp(adap->phys_addr));
-	seq_printf(file, "number of LAs: %d\n", adap->log_addrs.num_log_addrs);
-	seq_printf(file, "LA mask: 0x%04x\n", adap->log_addrs.log_addr_mask);
-	if (adap->cec_follower)
-		seq_printf(file, "has CEC follower%s\n",
-			   adap->passthrough ? " (in passthrough mode)" : "");
-	if (adap->cec_initiator)
-		seq_puts(file, "has CEC initiator\n");
-	if (adap->monitor_all_cnt)
-		seq_printf(file, "file handles in Monitor All mode: %u\n",
-			   adap->monitor_all_cnt);
-	data = adap->transmitting;
-	if (data)
-		seq_printf(file, "transmitting message: %*ph (reply: %02x, timeout: %ums)\n",
-			   data->msg.len, data->msg.msg, data->msg.reply,
-			   data->msg.timeout);
-	seq_printf(file, "pending transmits: %u\n", adap->transmit_queue_sz);
-	list_for_each_entry(data, &adap->transmit_queue, list) {
-		seq_printf(file, "queued tx message: %*ph (reply: %02x, timeout: %ums)\n",
-			   data->msg.len, data->msg.msg, data->msg.reply,
-			   data->msg.timeout);
-	}
-	list_for_each_entry(data, &adap->wait_queue, list) {
-		seq_printf(file, "message waiting for reply: %*ph (reply: %02x, timeout: %ums)\n",
-			   data->msg.len, data->msg.msg, data->msg.reply,
-			   data->msg.timeout);
-	}
-
-	call_void_op(adap, adap_status, file);
-	mutex_unlock(&adap->lock);
-	return 0;
-}
-#endif
diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c
deleted file mode 100644
index d4bc4ee2c6e5..000000000000
--- a/drivers/staging/media/cec/cec-api.c
+++ /dev/null
@@ -1,588 +0,0 @@
-/*
- * cec-api.c - HDMI Consumer Electronics Control framework - API
- *
- * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- *
- * This program is free software; you may redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <linux/ktime.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-#include <linux/version.h>
-
-#include "cec-priv.h"
-
-static inline struct cec_devnode *cec_devnode_data(struct file *filp)
-{
-	struct cec_fh *fh = filp->private_data;
-
-	return &fh->adap->devnode;
-}
-
-/* CEC file operations */
-
-static unsigned int cec_poll(struct file *filp,
-			     struct poll_table_struct *poll)
-{
-	struct cec_devnode *devnode = cec_devnode_data(filp);
-	struct cec_fh *fh = filp->private_data;
-	struct cec_adapter *adap = fh->adap;
-	unsigned int res = 0;
-
-	if (!devnode->registered)
-		return POLLERR | POLLHUP;
-	mutex_lock(&adap->lock);
-	if (adap->is_configured &&
-	    adap->transmit_queue_sz < CEC_MAX_MSG_TX_QUEUE_SZ)
-		res |= POLLOUT | POLLWRNORM;
-	if (fh->queued_msgs)
-		res |= POLLIN | POLLRDNORM;
-	if (fh->pending_events)
-		res |= POLLPRI;
-	poll_wait(filp, &fh->wait, poll);
-	mutex_unlock(&adap->lock);
-	return res;
-}
-
-static bool cec_is_busy(const struct cec_adapter *adap,
-			const struct cec_fh *fh)
-{
-	bool valid_initiator = adap->cec_initiator && adap->cec_initiator == fh;
-	bool valid_follower = adap->cec_follower && adap->cec_follower == fh;
-
-	/*
-	 * Exclusive initiators and followers can always access the CEC adapter
-	 */
-	if (valid_initiator || valid_follower)
-		return false;
-	/*
-	 * All others can only access the CEC adapter if there is no
-	 * exclusive initiator and they are in INITIATOR mode.
-	 */
-	return adap->cec_initiator ||
-	       fh->mode_initiator == CEC_MODE_NO_INITIATOR;
-}
-
-static long cec_adap_g_caps(struct cec_adapter *adap,
-			    struct cec_caps __user *parg)
-{
-	struct cec_caps caps = {};
-
-	strlcpy(caps.driver, adap->devnode.parent->driver->name,
-		sizeof(caps.driver));
-	strlcpy(caps.name, adap->name, sizeof(caps.name));
-	caps.available_log_addrs = adap->available_log_addrs;
-	caps.capabilities = adap->capabilities;
-	caps.version = LINUX_VERSION_CODE;
-	if (copy_to_user(parg, &caps, sizeof(caps)))
-		return -EFAULT;
-	return 0;
-}
-
-static long cec_adap_g_phys_addr(struct cec_adapter *adap,
-				 __u16 __user *parg)
-{
-	u16 phys_addr;
-
-	mutex_lock(&adap->lock);
-	phys_addr = adap->phys_addr;
-	mutex_unlock(&adap->lock);
-	if (copy_to_user(parg, &phys_addr, sizeof(phys_addr)))
-		return -EFAULT;
-	return 0;
-}
-
-static long cec_adap_s_phys_addr(struct cec_adapter *adap, struct cec_fh *fh,
-				 bool block, __u16 __user *parg)
-{
-	u16 phys_addr;
-	long err;
-
-	if (!(adap->capabilities & CEC_CAP_PHYS_ADDR))
-		return -ENOTTY;
-	if (copy_from_user(&phys_addr, parg, sizeof(phys_addr)))
-		return -EFAULT;
-
-	err = cec_phys_addr_validate(phys_addr, NULL, NULL);
-	if (err)
-		return err;
-	mutex_lock(&adap->lock);
-	if (cec_is_busy(adap, fh))
-		err = -EBUSY;
-	else
-		__cec_s_phys_addr(adap, phys_addr, block);
-	mutex_unlock(&adap->lock);
-	return err;
-}
-
-static long cec_adap_g_log_addrs(struct cec_adapter *adap,
-				 struct cec_log_addrs __user *parg)
-{
-	struct cec_log_addrs log_addrs;
-
-	mutex_lock(&adap->lock);
-	log_addrs = adap->log_addrs;
-	if (!adap->is_configured)
-		memset(log_addrs.log_addr, CEC_LOG_ADDR_INVALID,
-		       sizeof(log_addrs.log_addr));
-	mutex_unlock(&adap->lock);
-
-	if (copy_to_user(parg, &log_addrs, sizeof(log_addrs)))
-		return -EFAULT;
-	return 0;
-}
-
-static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh,
-				 bool block, struct cec_log_addrs __user *parg)
-{
-	struct cec_log_addrs log_addrs;
-	long err = -EBUSY;
-
-	if (!(adap->capabilities & CEC_CAP_LOG_ADDRS))
-		return -ENOTTY;
-	if (copy_from_user(&log_addrs, parg, sizeof(log_addrs)))
-		return -EFAULT;
-	log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK |
-			   CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU |
-			   CEC_LOG_ADDRS_FL_CDC_ONLY;
-	mutex_lock(&adap->lock);
-	if (!adap->is_configuring &&
-	    (!log_addrs.num_log_addrs || !adap->is_configured) &&
-	    !cec_is_busy(adap, fh)) {
-		err = __cec_s_log_addrs(adap, &log_addrs, block);
-		if (!err)
-			log_addrs = adap->log_addrs;
-	}
-	mutex_unlock(&adap->lock);
-	if (err)
-		return err;
-	if (copy_to_user(parg, &log_addrs, sizeof(log_addrs)))
-		return -EFAULT;
-	return 0;
-}
-
-static long cec_transmit(struct cec_adapter *adap, struct cec_fh *fh,
-			 bool block, struct cec_msg __user *parg)
-{
-	struct cec_msg msg = {};
-	long err = 0;
-
-	if (!(adap->capabilities & CEC_CAP_TRANSMIT))
-		return -ENOTTY;
-	if (copy_from_user(&msg, parg, sizeof(msg)))
-		return -EFAULT;
-
-	/* A CDC-Only device can only send CDC messages */
-	if ((adap->log_addrs.flags & CEC_LOG_ADDRS_FL_CDC_ONLY) &&
-	    (msg.len == 1 || msg.msg[1] != CEC_MSG_CDC_MESSAGE))
-		return -EINVAL;
-
-	msg.flags &= CEC_MSG_FL_REPLY_TO_FOLLOWERS;
-	mutex_lock(&adap->lock);
-	if (!adap->is_configured)
-		err = -ENONET;
-	else if (cec_is_busy(adap, fh))
-		err = -EBUSY;
-	else
-		err = cec_transmit_msg_fh(adap, &msg, fh, block);
-	mutex_unlock(&adap->lock);
-	if (err)
-		return err;
-	if (copy_to_user(parg, &msg, sizeof(msg)))
-		return -EFAULT;
-	return 0;
-}
-
-/* Called by CEC_RECEIVE: wait for a message to arrive */
-static int cec_receive_msg(struct cec_fh *fh, struct cec_msg *msg, bool block)
-{
-	u32 timeout = msg->timeout;
-	int res;
-
-	do {
-		mutex_lock(&fh->lock);
-		/* Are there received messages queued up? */
-		if (fh->queued_msgs) {
-			/* Yes, return the first one */
-			struct cec_msg_entry *entry =
-				list_first_entry(&fh->msgs,
-						 struct cec_msg_entry, list);
-
-			list_del(&entry->list);
-			*msg = entry->msg;
-			kfree(entry);
-			fh->queued_msgs--;
-			mutex_unlock(&fh->lock);
-			/* restore original timeout value */
-			msg->timeout = timeout;
-			return 0;
-		}
-
-		/* No, return EAGAIN in non-blocking mode or wait */
-		mutex_unlock(&fh->lock);
-
-		/* Return when in non-blocking mode */
-		if (!block)
-			return -EAGAIN;
-
-		if (msg->timeout) {
-			/* The user specified a timeout */
-			res = wait_event_interruptible_timeout(fh->wait,
-							       fh->queued_msgs,
-				msecs_to_jiffies(msg->timeout));
-			if (res == 0)
-				res = -ETIMEDOUT;
-			else if (res > 0)
-				res = 0;
-		} else {
-			/* Wait indefinitely */
-			res = wait_event_interruptible(fh->wait,
-						       fh->queued_msgs);
-		}
-		/* Exit on error, otherwise loop to get the new message */
-	} while (!res);
-	return res;
-}
-
-static long cec_receive(struct cec_adapter *adap, struct cec_fh *fh,
-			bool block, struct cec_msg __user *parg)
-{
-	struct cec_msg msg = {};
-	long err = 0;
-
-	if (copy_from_user(&msg, parg, sizeof(msg)))
-		return -EFAULT;
-	mutex_lock(&adap->lock);
-	if (!adap->is_configured && fh->mode_follower < CEC_MODE_MONITOR)
-		err = -ENONET;
-	mutex_unlock(&adap->lock);
-	if (err)
-		return err;
-
-	err = cec_receive_msg(fh, &msg, block);
-	if (err)
-		return err;
-	if (copy_to_user(parg, &msg, sizeof(msg)))
-		return -EFAULT;
-	return 0;
-}
-
-static long cec_dqevent(struct cec_adapter *adap, struct cec_fh *fh,
-			bool block, struct cec_event __user *parg)
-{
-	struct cec_event *ev = NULL;
-	u64 ts = ~0ULL;
-	unsigned int i;
-	long err = 0;
-
-	mutex_lock(&fh->lock);
-	while (!fh->pending_events && block) {
-		mutex_unlock(&fh->lock);
-		err = wait_event_interruptible(fh->wait, fh->pending_events);
-		if (err)
-			return err;
-		mutex_lock(&fh->lock);
-	}
-
-	/* Find the oldest event */
-	for (i = 0; i < CEC_NUM_EVENTS; i++) {
-		if (fh->pending_events & (1 << (i + 1)) &&
-		    fh->events[i].ts <= ts) {
-			ev = &fh->events[i];
-			ts = ev->ts;
-		}
-	}
-	if (!ev) {
-		err = -EAGAIN;
-		goto unlock;
-	}
-
-	if (copy_to_user(parg, ev, sizeof(*ev))) {
-		err = -EFAULT;
-		goto unlock;
-	}
-
-	fh->pending_events &= ~(1 << ev->event);
-
-unlock:
-	mutex_unlock(&fh->lock);
-	return err;
-}
-
-static long cec_g_mode(struct cec_adapter *adap, struct cec_fh *fh,
-		       u32 __user *parg)
-{
-	u32 mode = fh->mode_initiator | fh->mode_follower;
-
-	if (copy_to_user(parg, &mode, sizeof(mode)))
-		return -EFAULT;
-	return 0;
-}
-
-static long cec_s_mode(struct cec_adapter *adap, struct cec_fh *fh,
-		       u32 __user *parg)
-{
-	u32 mode;
-	u8 mode_initiator;
-	u8 mode_follower;
-	long err = 0;
-
-	if (copy_from_user(&mode, parg, sizeof(mode)))
-		return -EFAULT;
-	if (mode & ~(CEC_MODE_INITIATOR_MSK | CEC_MODE_FOLLOWER_MSK))
-		return -EINVAL;
-
-	mode_initiator = mode & CEC_MODE_INITIATOR_MSK;
-	mode_follower = mode & CEC_MODE_FOLLOWER_MSK;
-
-	if (mode_initiator > CEC_MODE_EXCL_INITIATOR ||
-	    mode_follower > CEC_MODE_MONITOR_ALL)
-		return -EINVAL;
-
-	if (mode_follower == CEC_MODE_MONITOR_ALL &&
-	    !(adap->capabilities & CEC_CAP_MONITOR_ALL))
-		return -EINVAL;
-
-	/* Follower modes should always be able to send CEC messages */
-	if ((mode_initiator == CEC_MODE_NO_INITIATOR ||
-	     !(adap->capabilities & CEC_CAP_TRANSMIT)) &&
-	    mode_follower >= CEC_MODE_FOLLOWER &&
-	    mode_follower <= CEC_MODE_EXCL_FOLLOWER_PASSTHRU)
-		return -EINVAL;
-
-	/* Monitor modes require CEC_MODE_NO_INITIATOR */
-	if (mode_initiator && mode_follower >= CEC_MODE_MONITOR)
-		return -EINVAL;
-
-	/* Monitor modes require CAP_NET_ADMIN */
-	if (mode_follower >= CEC_MODE_MONITOR && !capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	mutex_lock(&adap->lock);
-	/*
-	 * You can't become exclusive follower if someone else already
-	 * has that job.
-	 */
-	if ((mode_follower == CEC_MODE_EXCL_FOLLOWER ||
-	     mode_follower == CEC_MODE_EXCL_FOLLOWER_PASSTHRU) &&
-	    adap->cec_follower && adap->cec_follower != fh)
-		err = -EBUSY;
-	/*
-	 * You can't become exclusive initiator if someone else already
-	 * has that job.
-	 */
-	if (mode_initiator == CEC_MODE_EXCL_INITIATOR &&
-	    adap->cec_initiator && adap->cec_initiator != fh)
-		err = -EBUSY;
-
-	if (!err) {
-		bool old_mon_all = fh->mode_follower == CEC_MODE_MONITOR_ALL;
-		bool new_mon_all = mode_follower == CEC_MODE_MONITOR_ALL;
-
-		if (old_mon_all != new_mon_all) {
-			if (new_mon_all)
-				err = cec_monitor_all_cnt_inc(adap);
-			else
-				cec_monitor_all_cnt_dec(adap);
-		}
-	}
-
-	if (err) {
-		mutex_unlock(&adap->lock);
-		return err;
-	}
-
-	if (fh->mode_follower == CEC_MODE_FOLLOWER)
-		adap->follower_cnt--;
-	if (mode_follower == CEC_MODE_FOLLOWER)
-		adap->follower_cnt++;
-	if (mode_follower == CEC_MODE_EXCL_FOLLOWER ||
-	    mode_follower == CEC_MODE_EXCL_FOLLOWER_PASSTHRU) {
-		adap->passthrough =
-			mode_follower == CEC_MODE_EXCL_FOLLOWER_PASSTHRU;
-		adap->cec_follower = fh;
-	} else if (adap->cec_follower == fh) {
-		adap->passthrough = false;
-		adap->cec_follower = NULL;
-	}
-	if (mode_initiator == CEC_MODE_EXCL_INITIATOR)
-		adap->cec_initiator = fh;
-	else if (adap->cec_initiator == fh)
-		adap->cec_initiator = NULL;
-	fh->mode_initiator = mode_initiator;
-	fh->mode_follower = mode_follower;
-	mutex_unlock(&adap->lock);
-	return 0;
-}
-
-static long cec_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct cec_devnode *devnode = cec_devnode_data(filp);
-	struct cec_fh *fh = filp->private_data;
-	struct cec_adapter *adap = fh->adap;
-	bool block = !(filp->f_flags & O_NONBLOCK);
-	void __user *parg = (void __user *)arg;
-
-	if (!devnode->registered)
-		return -ENODEV;
-
-	switch (cmd) {
-	case CEC_ADAP_G_CAPS:
-		return cec_adap_g_caps(adap, parg);
-
-	case CEC_ADAP_G_PHYS_ADDR:
-		return cec_adap_g_phys_addr(adap, parg);
-
-	case CEC_ADAP_S_PHYS_ADDR:
-		return cec_adap_s_phys_addr(adap, fh, block, parg);
-
-	case CEC_ADAP_G_LOG_ADDRS:
-		return cec_adap_g_log_addrs(adap, parg);
-
-	case CEC_ADAP_S_LOG_ADDRS:
-		return cec_adap_s_log_addrs(adap, fh, block, parg);
-
-	case CEC_TRANSMIT:
-		return cec_transmit(adap, fh, block, parg);
-
-	case CEC_RECEIVE:
-		return cec_receive(adap, fh, block, parg);
-
-	case CEC_DQEVENT:
-		return cec_dqevent(adap, fh, block, parg);
-
-	case CEC_G_MODE:
-		return cec_g_mode(adap, fh, parg);
-
-	case CEC_S_MODE:
-		return cec_s_mode(adap, fh, parg);
-
-	default:
-		return -ENOTTY;
-	}
-}
-
-static int cec_open(struct inode *inode, struct file *filp)
-{
-	struct cec_devnode *devnode =
-		container_of(inode->i_cdev, struct cec_devnode, cdev);
-	struct cec_adapter *adap = to_cec_adapter(devnode);
-	struct cec_fh *fh = kzalloc(sizeof(*fh), GFP_KERNEL);
-	/*
-	 * Initial events that are automatically sent when the cec device is
-	 * opened.
-	 */
-	struct cec_event ev_state = {
-		.event = CEC_EVENT_STATE_CHANGE,
-		.flags = CEC_EVENT_FL_INITIAL_STATE,
-	};
-	int err;
-
-	if (!fh)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&fh->msgs);
-	INIT_LIST_HEAD(&fh->xfer_list);
-	mutex_init(&fh->lock);
-	init_waitqueue_head(&fh->wait);
-
-	fh->mode_initiator = CEC_MODE_INITIATOR;
-	fh->adap = adap;
-
-	err = cec_get_device(devnode);
-	if (err) {
-		kfree(fh);
-		return err;
-	}
-
-	filp->private_data = fh;
-
-	mutex_lock(&devnode->lock);
-	/* Queue up initial state events */
-	ev_state.state_change.phys_addr = adap->phys_addr;
-	ev_state.state_change.log_addr_mask = adap->log_addrs.log_addr_mask;
-	cec_queue_event_fh(fh, &ev_state, 0);
-
-	list_add(&fh->list, &devnode->fhs);
-	mutex_unlock(&devnode->lock);
-
-	return 0;
-}
-
-/* Override for the release function */
-static int cec_release(struct inode *inode, struct file *filp)
-{
-	struct cec_devnode *devnode = cec_devnode_data(filp);
-	struct cec_adapter *adap = to_cec_adapter(devnode);
-	struct cec_fh *fh = filp->private_data;
-
-	mutex_lock(&adap->lock);
-	if (adap->cec_initiator == fh)
-		adap->cec_initiator = NULL;
-	if (adap->cec_follower == fh) {
-		adap->cec_follower = NULL;
-		adap->passthrough = false;
-	}
-	if (fh->mode_follower == CEC_MODE_FOLLOWER)
-		adap->follower_cnt--;
-	if (fh->mode_follower == CEC_MODE_MONITOR_ALL)
-		cec_monitor_all_cnt_dec(adap);
-	mutex_unlock(&adap->lock);
-
-	mutex_lock(&devnode->lock);
-	list_del(&fh->list);
-	mutex_unlock(&devnode->lock);
-
-	/* Unhook pending transmits from this filehandle. */
-	mutex_lock(&adap->lock);
-	while (!list_empty(&fh->xfer_list)) {
-		struct cec_data *data =
-			list_first_entry(&fh->xfer_list, struct cec_data, xfer_list);
-
-		data->blocking = false;
-		data->fh = NULL;
-		list_del(&data->xfer_list);
-	}
-	mutex_unlock(&adap->lock);
-	while (!list_empty(&fh->msgs)) {
-		struct cec_msg_entry *entry =
-			list_first_entry(&fh->msgs, struct cec_msg_entry, list);
-
-		list_del(&entry->list);
-		kfree(entry);
-	}
-	kfree(fh);
-
-	cec_put_device(devnode);
-	filp->private_data = NULL;
-	return 0;
-}
-
-const struct file_operations cec_devnode_fops = {
-	.owner = THIS_MODULE,
-	.open = cec_open,
-	.unlocked_ioctl = cec_ioctl,
-	.release = cec_release,
-	.poll = cec_poll,
-	.llseek = no_llseek,
-};
diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c
deleted file mode 100644
index b0137e247dc9..000000000000
--- a/drivers/staging/media/cec/cec-core.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * cec-core.c - HDMI Consumer Electronics Control framework - Core
- *
- * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- *
- * This program is free software; you may redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include "cec-priv.h"
-
-#define CEC_NUM_DEVICES	256
-#define CEC_NAME	"cec"
-
-int cec_debug;
-module_param_named(debug, cec_debug, int, 0644);
-MODULE_PARM_DESC(debug, "debug level (0-2)");
-
-static dev_t cec_dev_t;
-
-/* Active devices */
-static DEFINE_MUTEX(cec_devnode_lock);
-static DECLARE_BITMAP(cec_devnode_nums, CEC_NUM_DEVICES);
-
-static struct dentry *top_cec_dir;
-
-/* dev to cec_devnode */
-#define to_cec_devnode(cd) container_of(cd, struct cec_devnode, dev)
-
-int cec_get_device(struct cec_devnode *devnode)
-{
-	/*
-	 * Check if the cec device is available. This needs to be done with
-	 * the devnode->lock held to prevent an open/unregister race:
-	 * without the lock, the device could be unregistered and freed between
-	 * the devnode->registered check and get_device() calls, leading to
-	 * a crash.
-	 */
-	mutex_lock(&devnode->lock);
-	/*
-	 * return ENXIO if the cec device has been removed
-	 * already or if it is not registered anymore.
-	 */
-	if (!devnode->registered) {
-		mutex_unlock(&devnode->lock);
-		return -ENXIO;
-	}
-	/* and increase the device refcount */
-	get_device(&devnode->dev);
-	mutex_unlock(&devnode->lock);
-	return 0;
-}
-
-void cec_put_device(struct cec_devnode *devnode)
-{
-	put_device(&devnode->dev);
-}
-
-/* Called when the last user of the cec device exits. */
-static void cec_devnode_release(struct device *cd)
-{
-	struct cec_devnode *devnode = to_cec_devnode(cd);
-
-	mutex_lock(&cec_devnode_lock);
-	/* Mark device node number as free */
-	clear_bit(devnode->minor, cec_devnode_nums);
-	mutex_unlock(&cec_devnode_lock);
-
-	cec_delete_adapter(to_cec_adapter(devnode));
-}
-
-static struct bus_type cec_bus_type = {
-	.name = CEC_NAME,
-};
-
-/*
- * Register a cec device node
- *
- * The registration code assigns minor numbers and registers the new device node
- * with the kernel. An error is returned if no free minor number can be found,
- * or if the registration of the device node fails.
- *
- * Zero is returned on success.
- *
- * Note that if the cec_devnode_register call fails, the release() callback of
- * the cec_devnode structure is *not* called, so the caller is responsible for
- * freeing any data.
- */
-static int __must_check cec_devnode_register(struct cec_devnode *devnode,
-					     struct module *owner)
-{
-	int minor;
-	int ret;
-
-	/* Initialization */
-	INIT_LIST_HEAD(&devnode->fhs);
-	mutex_init(&devnode->lock);
-
-	/* Part 1: Find a free minor number */
-	mutex_lock(&cec_devnode_lock);
-	minor = find_next_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES, 0);
-	if (minor == CEC_NUM_DEVICES) {
-		mutex_unlock(&cec_devnode_lock);
-		pr_err("could not get a free minor\n");
-		return -ENFILE;
-	}
-
-	set_bit(minor, cec_devnode_nums);
-	mutex_unlock(&cec_devnode_lock);
-
-	devnode->minor = minor;
-	devnode->dev.bus = &cec_bus_type;
-	devnode->dev.devt = MKDEV(MAJOR(cec_dev_t), minor);
-	devnode->dev.release = cec_devnode_release;
-	devnode->dev.parent = devnode->parent;
-	dev_set_name(&devnode->dev, "cec%d", devnode->minor);
-	device_initialize(&devnode->dev);
-
-	/* Part 2: Initialize and register the character device */
-	cdev_init(&devnode->cdev, &cec_devnode_fops);
-	devnode->cdev.kobj.parent = &devnode->dev.kobj;
-	devnode->cdev.owner = owner;
-
-	ret = cdev_add(&devnode->cdev, devnode->dev.devt, 1);
-	if (ret < 0) {
-		pr_err("%s: cdev_add failed\n", __func__);
-		goto clr_bit;
-	}
-
-	ret = device_add(&devnode->dev);
-	if (ret)
-		goto cdev_del;
-
-	devnode->registered = true;
-	return 0;
-
-cdev_del:
-	cdev_del(&devnode->cdev);
-clr_bit:
-	mutex_lock(&cec_devnode_lock);
-	clear_bit(devnode->minor, cec_devnode_nums);
-	mutex_unlock(&cec_devnode_lock);
-	return ret;
-}
-
-/*
- * Unregister a cec device node
- *
- * This unregisters the passed device. Future open calls will be met with
- * errors.
- *
- * This function can safely be called if the device node has never been
- * registered or has already been unregistered.
- */
-static void cec_devnode_unregister(struct cec_devnode *devnode)
-{
-	struct cec_fh *fh;
-
-	mutex_lock(&devnode->lock);
-
-	/* Check if devnode was never registered or already unregistered */
-	if (!devnode->registered || devnode->unregistered) {
-		mutex_unlock(&devnode->lock);
-		return;
-	}
-
-	list_for_each_entry(fh, &devnode->fhs, list)
-		wake_up_interruptible(&fh->wait);
-
-	devnode->registered = false;
-	devnode->unregistered = true;
-	mutex_unlock(&devnode->lock);
-
-	device_del(&devnode->dev);
-	cdev_del(&devnode->cdev);
-	put_device(&devnode->dev);
-}
-
-struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
-					 void *priv, const char *name, u32 caps,
-					 u8 available_las, struct device *parent)
-{
-	struct cec_adapter *adap;
-	int res;
-
-	if (WARN_ON(!parent))
-		return ERR_PTR(-EINVAL);
-	if (WARN_ON(!caps))
-		return ERR_PTR(-EINVAL);
-	if (WARN_ON(!ops))
-		return ERR_PTR(-EINVAL);
-	if (WARN_ON(!available_las || available_las > CEC_MAX_LOG_ADDRS))
-		return ERR_PTR(-EINVAL);
-	adap = kzalloc(sizeof(*adap), GFP_KERNEL);
-	if (!adap)
-		return ERR_PTR(-ENOMEM);
-	adap->owner = parent->driver->owner;
-	adap->devnode.parent = parent;
-	strlcpy(adap->name, name, sizeof(adap->name));
-	adap->phys_addr = CEC_PHYS_ADDR_INVALID;
-	adap->log_addrs.cec_version = CEC_OP_CEC_VERSION_2_0;
-	adap->log_addrs.vendor_id = CEC_VENDOR_ID_NONE;
-	adap->capabilities = caps;
-	adap->available_log_addrs = available_las;
-	adap->sequence = 0;
-	adap->ops = ops;
-	adap->priv = priv;
-	memset(adap->phys_addrs, 0xff, sizeof(adap->phys_addrs));
-	mutex_init(&adap->lock);
-	INIT_LIST_HEAD(&adap->transmit_queue);
-	INIT_LIST_HEAD(&adap->wait_queue);
-	init_waitqueue_head(&adap->kthread_waitq);
-
-	adap->kthread = kthread_run(cec_thread_func, adap, "cec-%s", name);
-	if (IS_ERR(adap->kthread)) {
-		pr_err("cec-%s: kernel_thread() failed\n", name);
-		res = PTR_ERR(adap->kthread);
-		kfree(adap);
-		return ERR_PTR(res);
-	}
-
-	if (!(caps & CEC_CAP_RC))
-		return adap;
-
-#if IS_REACHABLE(CONFIG_RC_CORE)
-	/* Prepare the RC input device */
-	adap->rc = rc_allocate_device();
-	if (!adap->rc) {
-		pr_err("cec-%s: failed to allocate memory for rc_dev\n",
-		       name);
-		kthread_stop(adap->kthread);
-		kfree(adap);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	snprintf(adap->input_name, sizeof(adap->input_name),
-		 "RC for %s", name);
-	snprintf(adap->input_phys, sizeof(adap->input_phys),
-		 "%s/input0", name);
-
-	adap->rc->input_name = adap->input_name;
-	adap->rc->input_phys = adap->input_phys;
-	adap->rc->input_id.bustype = BUS_CEC;
-	adap->rc->input_id.vendor = 0;
-	adap->rc->input_id.product = 0;
-	adap->rc->input_id.version = 1;
-	adap->rc->dev.parent = parent;
-	adap->rc->driver_type = RC_DRIVER_SCANCODE;
-	adap->rc->driver_name = CEC_NAME;
-	adap->rc->allowed_protocols = RC_BIT_CEC;
-	adap->rc->priv = adap;
-	adap->rc->map_name = RC_MAP_CEC;
-	adap->rc->timeout = MS_TO_NS(100);
-#else
-	adap->capabilities &= ~CEC_CAP_RC;
-#endif
-	return adap;
-}
-EXPORT_SYMBOL_GPL(cec_allocate_adapter);
-
-int cec_register_adapter(struct cec_adapter *adap)
-{
-	int res;
-
-	if (IS_ERR_OR_NULL(adap))
-		return 0;
-
-#if IS_REACHABLE(CONFIG_RC_CORE)
-	if (adap->capabilities & CEC_CAP_RC) {
-		res = rc_register_device(adap->rc);
-
-		if (res) {
-			pr_err("cec-%s: failed to prepare input device\n",
-			       adap->name);
-			rc_free_device(adap->rc);
-			adap->rc = NULL;
-			return res;
-		}
-	}
-#endif
-
-	res = cec_devnode_register(&adap->devnode, adap->owner);
-	if (res) {
-#if IS_REACHABLE(CONFIG_RC_CORE)
-		/* Note: rc_unregister also calls rc_free */
-		rc_unregister_device(adap->rc);
-		adap->rc = NULL;
-#endif
-		return res;
-	}
-
-	dev_set_drvdata(&adap->devnode.dev, adap);
-#ifdef CONFIG_MEDIA_CEC_DEBUG
-	if (!top_cec_dir)
-		return 0;
-
-	adap->cec_dir = debugfs_create_dir(dev_name(&adap->devnode.dev), top_cec_dir);
-	if (IS_ERR_OR_NULL(adap->cec_dir)) {
-		pr_warn("cec-%s: Failed to create debugfs dir\n", adap->name);
-		return 0;
-	}
-	adap->status_file = debugfs_create_devm_seqfile(&adap->devnode.dev,
-		"status", adap->cec_dir, cec_adap_status);
-	if (IS_ERR_OR_NULL(adap->status_file)) {
-		pr_warn("cec-%s: Failed to create status file\n", adap->name);
-		debugfs_remove_recursive(adap->cec_dir);
-		adap->cec_dir = NULL;
-	}
-#endif
-	return 0;
-}
-EXPORT_SYMBOL_GPL(cec_register_adapter);
-
-void cec_unregister_adapter(struct cec_adapter *adap)
-{
-	if (IS_ERR_OR_NULL(adap))
-		return;
-
-#if IS_REACHABLE(CONFIG_RC_CORE)
-	/* Note: rc_unregister also calls rc_free */
-	rc_unregister_device(adap->rc);
-	adap->rc = NULL;
-#endif
-	debugfs_remove_recursive(adap->cec_dir);
-	cec_devnode_unregister(&adap->devnode);
-}
-EXPORT_SYMBOL_GPL(cec_unregister_adapter);
-
-void cec_delete_adapter(struct cec_adapter *adap)
-{
-	if (IS_ERR_OR_NULL(adap))
-		return;
-	mutex_lock(&adap->lock);
-	__cec_s_phys_addr(adap, CEC_PHYS_ADDR_INVALID, false);
-	mutex_unlock(&adap->lock);
-	kthread_stop(adap->kthread);
-	if (adap->kthread_config)
-		kthread_stop(adap->kthread_config);
-#if IS_REACHABLE(CONFIG_RC_CORE)
-	rc_free_device(adap->rc);
-#endif
-	kfree(adap);
-}
-EXPORT_SYMBOL_GPL(cec_delete_adapter);
-
-/*
- *	Initialise cec for linux
- */
-static int __init cec_devnode_init(void)
-{
-	int ret;
-
-	pr_info("Linux cec interface: v0.10\n");
-	ret = alloc_chrdev_region(&cec_dev_t, 0, CEC_NUM_DEVICES,
-				  CEC_NAME);
-	if (ret < 0) {
-		pr_warn("cec: unable to allocate major\n");
-		return ret;
-	}
-
-#ifdef CONFIG_MEDIA_CEC_DEBUG
-	top_cec_dir = debugfs_create_dir("cec", NULL);
-	if (IS_ERR_OR_NULL(top_cec_dir)) {
-		pr_warn("cec: Failed to create debugfs cec dir\n");
-		top_cec_dir = NULL;
-	}
-#endif
-
-	ret = bus_register(&cec_bus_type);
-	if (ret < 0) {
-		unregister_chrdev_region(cec_dev_t, CEC_NUM_DEVICES);
-		pr_warn("cec: bus_register failed\n");
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static void __exit cec_devnode_exit(void)
-{
-	debugfs_remove_recursive(top_cec_dir);
-	bus_unregister(&cec_bus_type);
-	unregister_chrdev_region(cec_dev_t, CEC_NUM_DEVICES);
-}
-
-subsys_initcall(cec_devnode_init);
-module_exit(cec_devnode_exit)
-
-MODULE_AUTHOR("Hans Verkuil <hans.verkuil@cisco.com>");
-MODULE_DESCRIPTION("Device node registration for cec drivers");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/media/cec/cec-priv.h b/drivers/staging/media/cec/cec-priv.h
deleted file mode 100644
index 70767a7900f2..000000000000
--- a/drivers/staging/media/cec/cec-priv.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * cec-priv.h - HDMI Consumer Electronics Control internal header
- *
- * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- *
- * This program is free software; you may redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _CEC_PRIV_H
-#define _CEC_PRIV_H
-
-#include <linux/cec-funcs.h>
-#include <media/cec.h>
-
-#define dprintk(lvl, fmt, arg...)					\
-	do {								\
-		if (lvl <= cec_debug)					\
-			pr_info("cec-%s: " fmt, adap->name, ## arg);	\
-	} while (0)
-
-/* devnode to cec_adapter */
-#define to_cec_adapter(node) container_of(node, struct cec_adapter, devnode)
-
-/* cec-core.c */
-extern int cec_debug;
-int cec_get_device(struct cec_devnode *devnode);
-void cec_put_device(struct cec_devnode *devnode);
-
-/* cec-adap.c */
-int cec_monitor_all_cnt_inc(struct cec_adapter *adap);
-void cec_monitor_all_cnt_dec(struct cec_adapter *adap);
-int cec_adap_status(struct seq_file *file, void *priv);
-int cec_thread_func(void *_adap);
-void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block);
-int __cec_s_log_addrs(struct cec_adapter *adap,
-		      struct cec_log_addrs *log_addrs, bool block);
-int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg,
-			struct cec_fh *fh, bool block);
-void cec_queue_event_fh(struct cec_fh *fh,
-			const struct cec_event *new_ev, u64 ts);
-
-/* cec-api.c */
-extern const struct file_operations cec_devnode_fops;
-
-#endif
diff --git a/drivers/staging/media/pulse8-cec/Kconfig b/drivers/staging/media/pulse8-cec/Kconfig
index c6aa2d1c9df0..6ffc407de62f 100644
--- a/drivers/staging/media/pulse8-cec/Kconfig
+++ b/drivers/staging/media/pulse8-cec/Kconfig
@@ -1,6 +1,6 @@
 config USB_PULSE8_CEC
 	tristate "Pulse Eight HDMI CEC"
-	depends on USB_ACM && MEDIA_CEC
+	depends on USB_ACM && MEDIA_CEC_SUPPORT
 	select SERIO
 	select SERIO_SERPORT
 	---help---
diff --git a/drivers/staging/media/s5p-cec/Kconfig b/drivers/staging/media/s5p-cec/Kconfig
index 0315fd7ad0f1..ddfd955da0d4 100644
--- a/drivers/staging/media/s5p-cec/Kconfig
+++ b/drivers/staging/media/s5p-cec/Kconfig
@@ -1,6 +1,6 @@
 config VIDEO_SAMSUNG_S5P_CEC
        tristate "Samsung S5P CEC driver"
-       depends on VIDEO_DEV && MEDIA_CEC && (PLAT_S5P || ARCH_EXYNOS || COMPILE_TEST)
+       depends on VIDEO_DEV && MEDIA_CEC_SUPPORT && (PLAT_S5P || ARCH_EXYNOS || COMPILE_TEST)
        ---help---
          This is a driver for Samsung S5P HDMI CEC interface. It uses the
          generic CEC framework interface.
diff --git a/drivers/staging/media/st-cec/Kconfig b/drivers/staging/media/st-cec/Kconfig
index 784d2c600aca..c04283db58d6 100644
--- a/drivers/staging/media/st-cec/Kconfig
+++ b/drivers/staging/media/st-cec/Kconfig
@@ -1,6 +1,6 @@
 config VIDEO_STI_HDMI_CEC
        tristate "STMicroelectronics STiH4xx HDMI CEC driver"
-       depends on VIDEO_DEV && MEDIA_CEC && (ARCH_STI || COMPILE_TEST)
+       depends on VIDEO_DEV && MEDIA_CEC_SUPPORT && (ARCH_STI || COMPILE_TEST)
        ---help---
          This is a driver for STIH4xx HDMI CEC interface. It uses the
          generic CEC framework interface.
diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h
deleted file mode 100644
index 138bbf721e70..000000000000
--- a/include/linux/cec-funcs.h
+++ /dev/null
@@ -1,1971 +0,0 @@
-/*
- * cec - HDMI Consumer Electronics Control message functions
- *
- * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- *
- * This program is free software; you may redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * Alternatively you can redistribute this file under the terms of the
- * BSD license as stated below:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. The names of its contributors may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * Note: this framework is still in staging and it is likely the API
- * will change before it goes out of staging.
- *
- * Once it is moved out of staging this header will move to uapi.
- */
-#ifndef _CEC_UAPI_FUNCS_H
-#define _CEC_UAPI_FUNCS_H
-
-#include <linux/cec.h>
-
-/* One Touch Play Feature */
-static inline void cec_msg_active_source(struct cec_msg *msg, __u16 phys_addr)
-{
-	msg->len = 4;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_ACTIVE_SOURCE;
-	msg->msg[2] = phys_addr >> 8;
-	msg->msg[3] = phys_addr & 0xff;
-}
-
-static inline void cec_ops_active_source(const struct cec_msg *msg,
-					 __u16 *phys_addr)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-}
-
-static inline void cec_msg_image_view_on(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_IMAGE_VIEW_ON;
-}
-
-static inline void cec_msg_text_view_on(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_TEXT_VIEW_ON;
-}
-
-
-/* Routing Control Feature */
-static inline void cec_msg_inactive_source(struct cec_msg *msg,
-					   __u16 phys_addr)
-{
-	msg->len = 4;
-	msg->msg[1] = CEC_MSG_INACTIVE_SOURCE;
-	msg->msg[2] = phys_addr >> 8;
-	msg->msg[3] = phys_addr & 0xff;
-}
-
-static inline void cec_ops_inactive_source(const struct cec_msg *msg,
-					   __u16 *phys_addr)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-}
-
-static inline void cec_msg_request_active_source(struct cec_msg *msg,
-						 bool reply)
-{
-	msg->len = 2;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_REQUEST_ACTIVE_SOURCE;
-	msg->reply = reply ? CEC_MSG_ACTIVE_SOURCE : 0;
-}
-
-static inline void cec_msg_routing_information(struct cec_msg *msg,
-					       __u16 phys_addr)
-{
-	msg->len = 4;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_ROUTING_INFORMATION;
-	msg->msg[2] = phys_addr >> 8;
-	msg->msg[3] = phys_addr & 0xff;
-}
-
-static inline void cec_ops_routing_information(const struct cec_msg *msg,
-					       __u16 *phys_addr)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-}
-
-static inline void cec_msg_routing_change(struct cec_msg *msg,
-					  bool reply,
-					  __u16 orig_phys_addr,
-					  __u16 new_phys_addr)
-{
-	msg->len = 6;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_ROUTING_CHANGE;
-	msg->msg[2] = orig_phys_addr >> 8;
-	msg->msg[3] = orig_phys_addr & 0xff;
-	msg->msg[4] = new_phys_addr >> 8;
-	msg->msg[5] = new_phys_addr & 0xff;
-	msg->reply = reply ? CEC_MSG_ROUTING_INFORMATION : 0;
-}
-
-static inline void cec_ops_routing_change(const struct cec_msg *msg,
-					  __u16 *orig_phys_addr,
-					  __u16 *new_phys_addr)
-{
-	*orig_phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*new_phys_addr = (msg->msg[4] << 8) | msg->msg[5];
-}
-
-static inline void cec_msg_set_stream_path(struct cec_msg *msg, __u16 phys_addr)
-{
-	msg->len = 4;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_SET_STREAM_PATH;
-	msg->msg[2] = phys_addr >> 8;
-	msg->msg[3] = phys_addr & 0xff;
-}
-
-static inline void cec_ops_set_stream_path(const struct cec_msg *msg,
-					   __u16 *phys_addr)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-}
-
-
-/* Standby Feature */
-static inline void cec_msg_standby(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_STANDBY;
-}
-
-
-/* One Touch Record Feature */
-static inline void cec_msg_record_off(struct cec_msg *msg, bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_RECORD_OFF;
-	msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
-}
-
-struct cec_op_arib_data {
-	__u16 transport_id;
-	__u16 service_id;
-	__u16 orig_network_id;
-};
-
-struct cec_op_atsc_data {
-	__u16 transport_id;
-	__u16 program_number;
-};
-
-struct cec_op_dvb_data {
-	__u16 transport_id;
-	__u16 service_id;
-	__u16 orig_network_id;
-};
-
-struct cec_op_channel_data {
-	__u8 channel_number_fmt;
-	__u16 major;
-	__u16 minor;
-};
-
-struct cec_op_digital_service_id {
-	__u8 service_id_method;
-	__u8 dig_bcast_system;
-	union {
-		struct cec_op_arib_data arib;
-		struct cec_op_atsc_data atsc;
-		struct cec_op_dvb_data dvb;
-		struct cec_op_channel_data channel;
-	};
-};
-
-struct cec_op_record_src {
-	__u8 type;
-	union {
-		struct cec_op_digital_service_id digital;
-		struct {
-			__u8 ana_bcast_type;
-			__u16 ana_freq;
-			__u8 bcast_system;
-		} analog;
-		struct {
-			__u8 plug;
-		} ext_plug;
-		struct {
-			__u16 phys_addr;
-		} ext_phys_addr;
-	};
-};
-
-static inline void cec_set_digital_service_id(__u8 *msg,
-	      const struct cec_op_digital_service_id *digital)
-{
-	*msg++ = (digital->service_id_method << 7) | digital->dig_bcast_system;
-	if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) {
-		*msg++ = (digital->channel.channel_number_fmt << 2) |
-			 (digital->channel.major >> 8);
-		*msg++ = digital->channel.major & 0xff;
-		*msg++ = digital->channel.minor >> 8;
-		*msg++ = digital->channel.minor & 0xff;
-		*msg++ = 0;
-		*msg++ = 0;
-		return;
-	}
-	switch (digital->dig_bcast_system) {
-	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_GEN:
-	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_CABLE:
-	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_SAT:
-	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_T:
-		*msg++ = digital->atsc.transport_id >> 8;
-		*msg++ = digital->atsc.transport_id & 0xff;
-		*msg++ = digital->atsc.program_number >> 8;
-		*msg++ = digital->atsc.program_number & 0xff;
-		*msg++ = 0;
-		*msg++ = 0;
-		break;
-	default:
-		*msg++ = digital->dvb.transport_id >> 8;
-		*msg++ = digital->dvb.transport_id & 0xff;
-		*msg++ = digital->dvb.service_id >> 8;
-		*msg++ = digital->dvb.service_id & 0xff;
-		*msg++ = digital->dvb.orig_network_id >> 8;
-		*msg++ = digital->dvb.orig_network_id & 0xff;
-		break;
-	}
-}
-
-static inline void cec_get_digital_service_id(const __u8 *msg,
-	      struct cec_op_digital_service_id *digital)
-{
-	digital->service_id_method = msg[0] >> 7;
-	digital->dig_bcast_system = msg[0] & 0x7f;
-	if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) {
-		digital->channel.channel_number_fmt = msg[1] >> 2;
-		digital->channel.major = ((msg[1] & 3) << 6) | msg[2];
-		digital->channel.minor = (msg[3] << 8) | msg[4];
-		return;
-	}
-	digital->dvb.transport_id = (msg[1] << 8) | msg[2];
-	digital->dvb.service_id = (msg[3] << 8) | msg[4];
-	digital->dvb.orig_network_id = (msg[5] << 8) | msg[6];
-}
-
-static inline void cec_msg_record_on_own(struct cec_msg *msg)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_RECORD_ON;
-	msg->msg[2] = CEC_OP_RECORD_SRC_OWN;
-}
-
-static inline void cec_msg_record_on_digital(struct cec_msg *msg,
-			     const struct cec_op_digital_service_id *digital)
-{
-	msg->len = 10;
-	msg->msg[1] = CEC_MSG_RECORD_ON;
-	msg->msg[2] = CEC_OP_RECORD_SRC_DIGITAL;
-	cec_set_digital_service_id(msg->msg + 3, digital);
-}
-
-static inline void cec_msg_record_on_analog(struct cec_msg *msg,
-					    __u8 ana_bcast_type,
-					    __u16 ana_freq,
-					    __u8 bcast_system)
-{
-	msg->len = 7;
-	msg->msg[1] = CEC_MSG_RECORD_ON;
-	msg->msg[2] = CEC_OP_RECORD_SRC_ANALOG;
-	msg->msg[3] = ana_bcast_type;
-	msg->msg[4] = ana_freq >> 8;
-	msg->msg[5] = ana_freq & 0xff;
-	msg->msg[6] = bcast_system;
-}
-
-static inline void cec_msg_record_on_plug(struct cec_msg *msg,
-					  __u8 plug)
-{
-	msg->len = 4;
-	msg->msg[1] = CEC_MSG_RECORD_ON;
-	msg->msg[2] = CEC_OP_RECORD_SRC_EXT_PLUG;
-	msg->msg[3] = plug;
-}
-
-static inline void cec_msg_record_on_phys_addr(struct cec_msg *msg,
-					       __u16 phys_addr)
-{
-	msg->len = 5;
-	msg->msg[1] = CEC_MSG_RECORD_ON;
-	msg->msg[2] = CEC_OP_RECORD_SRC_EXT_PHYS_ADDR;
-	msg->msg[3] = phys_addr >> 8;
-	msg->msg[4] = phys_addr & 0xff;
-}
-
-static inline void cec_msg_record_on(struct cec_msg *msg,
-				     bool reply,
-				     const struct cec_op_record_src *rec_src)
-{
-	switch (rec_src->type) {
-	case CEC_OP_RECORD_SRC_OWN:
-		cec_msg_record_on_own(msg);
-		break;
-	case CEC_OP_RECORD_SRC_DIGITAL:
-		cec_msg_record_on_digital(msg, &rec_src->digital);
-		break;
-	case CEC_OP_RECORD_SRC_ANALOG:
-		cec_msg_record_on_analog(msg,
-					 rec_src->analog.ana_bcast_type,
-					 rec_src->analog.ana_freq,
-					 rec_src->analog.bcast_system);
-		break;
-	case CEC_OP_RECORD_SRC_EXT_PLUG:
-		cec_msg_record_on_plug(msg, rec_src->ext_plug.plug);
-		break;
-	case CEC_OP_RECORD_SRC_EXT_PHYS_ADDR:
-		cec_msg_record_on_phys_addr(msg,
-					    rec_src->ext_phys_addr.phys_addr);
-		break;
-	}
-	msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
-}
-
-static inline void cec_ops_record_on(const struct cec_msg *msg,
-				     struct cec_op_record_src *rec_src)
-{
-	rec_src->type = msg->msg[2];
-	switch (rec_src->type) {
-	case CEC_OP_RECORD_SRC_OWN:
-		break;
-	case CEC_OP_RECORD_SRC_DIGITAL:
-		cec_get_digital_service_id(msg->msg + 3, &rec_src->digital);
-		break;
-	case CEC_OP_RECORD_SRC_ANALOG:
-		rec_src->analog.ana_bcast_type = msg->msg[3];
-		rec_src->analog.ana_freq =
-			(msg->msg[4] << 8) | msg->msg[5];
-		rec_src->analog.bcast_system = msg->msg[6];
-		break;
-	case CEC_OP_RECORD_SRC_EXT_PLUG:
-		rec_src->ext_plug.plug = msg->msg[3];
-		break;
-	case CEC_OP_RECORD_SRC_EXT_PHYS_ADDR:
-		rec_src->ext_phys_addr.phys_addr =
-			(msg->msg[3] << 8) | msg->msg[4];
-		break;
-	}
-}
-
-static inline void cec_msg_record_status(struct cec_msg *msg, __u8 rec_status)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_RECORD_STATUS;
-	msg->msg[2] = rec_status;
-}
-
-static inline void cec_ops_record_status(const struct cec_msg *msg,
-					 __u8 *rec_status)
-{
-	*rec_status = msg->msg[2];
-}
-
-static inline void cec_msg_record_tv_screen(struct cec_msg *msg,
-					    bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_RECORD_TV_SCREEN;
-	msg->reply = reply ? CEC_MSG_RECORD_ON : 0;
-}
-
-
-/* Timer Programming Feature */
-static inline void cec_msg_timer_status(struct cec_msg *msg,
-					__u8 timer_overlap_warning,
-					__u8 media_info,
-					__u8 prog_info,
-					__u8 prog_error,
-					__u8 duration_hr,
-					__u8 duration_min)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_TIMER_STATUS;
-	msg->msg[2] = (timer_overlap_warning << 7) |
-		(media_info << 5) |
-		(prog_info ? 0x10 : 0) |
-		(prog_info ? prog_info : prog_error);
-	if (prog_info == CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE ||
-	    prog_info == CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE ||
-	    prog_error == CEC_OP_PROG_ERROR_DUPLICATE) {
-		msg->len += 2;
-		msg->msg[3] = ((duration_hr / 10) << 4) | (duration_hr % 10);
-		msg->msg[4] = ((duration_min / 10) << 4) | (duration_min % 10);
-	}
-}
-
-static inline void cec_ops_timer_status(const struct cec_msg *msg,
-					__u8 *timer_overlap_warning,
-					__u8 *media_info,
-					__u8 *prog_info,
-					__u8 *prog_error,
-					__u8 *duration_hr,
-					__u8 *duration_min)
-{
-	*timer_overlap_warning = msg->msg[2] >> 7;
-	*media_info = (msg->msg[2] >> 5) & 3;
-	if (msg->msg[2] & 0x10) {
-		*prog_info = msg->msg[2] & 0xf;
-		*prog_error = 0;
-	} else {
-		*prog_info = 0;
-		*prog_error = msg->msg[2] & 0xf;
-	}
-	if (*prog_info == CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE ||
-	    *prog_info == CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE ||
-	    *prog_error == CEC_OP_PROG_ERROR_DUPLICATE) {
-		*duration_hr = (msg->msg[3] >> 4) * 10 + (msg->msg[3] & 0xf);
-		*duration_min = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
-	} else {
-		*duration_hr = *duration_min = 0;
-	}
-}
-
-static inline void cec_msg_timer_cleared_status(struct cec_msg *msg,
-						__u8 timer_cleared_status)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_TIMER_CLEARED_STATUS;
-	msg->msg[2] = timer_cleared_status;
-}
-
-static inline void cec_ops_timer_cleared_status(const struct cec_msg *msg,
-						__u8 *timer_cleared_status)
-{
-	*timer_cleared_status = msg->msg[2];
-}
-
-static inline void cec_msg_clear_analogue_timer(struct cec_msg *msg,
-						bool reply,
-						__u8 day,
-						__u8 month,
-						__u8 start_hr,
-						__u8 start_min,
-						__u8 duration_hr,
-						__u8 duration_min,
-						__u8 recording_seq,
-						__u8 ana_bcast_type,
-						__u16 ana_freq,
-						__u8 bcast_system)
-{
-	msg->len = 13;
-	msg->msg[1] = CEC_MSG_CLEAR_ANALOGUE_TIMER;
-	msg->msg[2] = day;
-	msg->msg[3] = month;
-	/* Hours and minutes are in BCD format */
-	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
-	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
-	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
-	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
-	msg->msg[8] = recording_seq;
-	msg->msg[9] = ana_bcast_type;
-	msg->msg[10] = ana_freq >> 8;
-	msg->msg[11] = ana_freq & 0xff;
-	msg->msg[12] = bcast_system;
-	msg->reply = reply ? CEC_MSG_TIMER_CLEARED_STATUS : 0;
-}
-
-static inline void cec_ops_clear_analogue_timer(const struct cec_msg *msg,
-						__u8 *day,
-						__u8 *month,
-						__u8 *start_hr,
-						__u8 *start_min,
-						__u8 *duration_hr,
-						__u8 *duration_min,
-						__u8 *recording_seq,
-						__u8 *ana_bcast_type,
-						__u16 *ana_freq,
-						__u8 *bcast_system)
-{
-	*day = msg->msg[2];
-	*month = msg->msg[3];
-	/* Hours and minutes are in BCD format */
-	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
-	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
-	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
-	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
-	*recording_seq = msg->msg[8];
-	*ana_bcast_type = msg->msg[9];
-	*ana_freq = (msg->msg[10] << 8) | msg->msg[11];
-	*bcast_system = msg->msg[12];
-}
-
-static inline void cec_msg_clear_digital_timer(struct cec_msg *msg,
-				bool reply,
-				__u8 day,
-				__u8 month,
-				__u8 start_hr,
-				__u8 start_min,
-				__u8 duration_hr,
-				__u8 duration_min,
-				__u8 recording_seq,
-				const struct cec_op_digital_service_id *digital)
-{
-	msg->len = 16;
-	msg->reply = reply ? CEC_MSG_TIMER_CLEARED_STATUS : 0;
-	msg->msg[1] = CEC_MSG_CLEAR_DIGITAL_TIMER;
-	msg->msg[2] = day;
-	msg->msg[3] = month;
-	/* Hours and minutes are in BCD format */
-	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
-	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
-	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
-	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
-	msg->msg[8] = recording_seq;
-	cec_set_digital_service_id(msg->msg + 9, digital);
-}
-
-static inline void cec_ops_clear_digital_timer(const struct cec_msg *msg,
-				__u8 *day,
-				__u8 *month,
-				__u8 *start_hr,
-				__u8 *start_min,
-				__u8 *duration_hr,
-				__u8 *duration_min,
-				__u8 *recording_seq,
-				struct cec_op_digital_service_id *digital)
-{
-	*day = msg->msg[2];
-	*month = msg->msg[3];
-	/* Hours and minutes are in BCD format */
-	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
-	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
-	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
-	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
-	*recording_seq = msg->msg[8];
-	cec_get_digital_service_id(msg->msg + 9, digital);
-}
-
-static inline void cec_msg_clear_ext_timer(struct cec_msg *msg,
-					   bool reply,
-					   __u8 day,
-					   __u8 month,
-					   __u8 start_hr,
-					   __u8 start_min,
-					   __u8 duration_hr,
-					   __u8 duration_min,
-					   __u8 recording_seq,
-					   __u8 ext_src_spec,
-					   __u8 plug,
-					   __u16 phys_addr)
-{
-	msg->len = 13;
-	msg->msg[1] = CEC_MSG_CLEAR_EXT_TIMER;
-	msg->msg[2] = day;
-	msg->msg[3] = month;
-	/* Hours and minutes are in BCD format */
-	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
-	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
-	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
-	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
-	msg->msg[8] = recording_seq;
-	msg->msg[9] = ext_src_spec;
-	msg->msg[10] = plug;
-	msg->msg[11] = phys_addr >> 8;
-	msg->msg[12] = phys_addr & 0xff;
-	msg->reply = reply ? CEC_MSG_TIMER_CLEARED_STATUS : 0;
-}
-
-static inline void cec_ops_clear_ext_timer(const struct cec_msg *msg,
-					   __u8 *day,
-					   __u8 *month,
-					   __u8 *start_hr,
-					   __u8 *start_min,
-					   __u8 *duration_hr,
-					   __u8 *duration_min,
-					   __u8 *recording_seq,
-					   __u8 *ext_src_spec,
-					   __u8 *plug,
-					   __u16 *phys_addr)
-{
-	*day = msg->msg[2];
-	*month = msg->msg[3];
-	/* Hours and minutes are in BCD format */
-	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
-	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
-	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
-	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
-	*recording_seq = msg->msg[8];
-	*ext_src_spec = msg->msg[9];
-	*plug = msg->msg[10];
-	*phys_addr = (msg->msg[11] << 8) | msg->msg[12];
-}
-
-static inline void cec_msg_set_analogue_timer(struct cec_msg *msg,
-					      bool reply,
-					      __u8 day,
-					      __u8 month,
-					      __u8 start_hr,
-					      __u8 start_min,
-					      __u8 duration_hr,
-					      __u8 duration_min,
-					      __u8 recording_seq,
-					      __u8 ana_bcast_type,
-					      __u16 ana_freq,
-					      __u8 bcast_system)
-{
-	msg->len = 13;
-	msg->msg[1] = CEC_MSG_SET_ANALOGUE_TIMER;
-	msg->msg[2] = day;
-	msg->msg[3] = month;
-	/* Hours and minutes are in BCD format */
-	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
-	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
-	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
-	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
-	msg->msg[8] = recording_seq;
-	msg->msg[9] = ana_bcast_type;
-	msg->msg[10] = ana_freq >> 8;
-	msg->msg[11] = ana_freq & 0xff;
-	msg->msg[12] = bcast_system;
-	msg->reply = reply ? CEC_MSG_TIMER_STATUS : 0;
-}
-
-static inline void cec_ops_set_analogue_timer(const struct cec_msg *msg,
-					      __u8 *day,
-					      __u8 *month,
-					      __u8 *start_hr,
-					      __u8 *start_min,
-					      __u8 *duration_hr,
-					      __u8 *duration_min,
-					      __u8 *recording_seq,
-					      __u8 *ana_bcast_type,
-					      __u16 *ana_freq,
-					      __u8 *bcast_system)
-{
-	*day = msg->msg[2];
-	*month = msg->msg[3];
-	/* Hours and minutes are in BCD format */
-	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
-	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
-	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
-	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
-	*recording_seq = msg->msg[8];
-	*ana_bcast_type = msg->msg[9];
-	*ana_freq = (msg->msg[10] << 8) | msg->msg[11];
-	*bcast_system = msg->msg[12];
-}
-
-static inline void cec_msg_set_digital_timer(struct cec_msg *msg,
-			bool reply,
-			__u8 day,
-			__u8 month,
-			__u8 start_hr,
-			__u8 start_min,
-			__u8 duration_hr,
-			__u8 duration_min,
-			__u8 recording_seq,
-			const struct cec_op_digital_service_id *digital)
-{
-	msg->len = 16;
-	msg->reply = reply ? CEC_MSG_TIMER_STATUS : 0;
-	msg->msg[1] = CEC_MSG_SET_DIGITAL_TIMER;
-	msg->msg[2] = day;
-	msg->msg[3] = month;
-	/* Hours and minutes are in BCD format */
-	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
-	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
-	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
-	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
-	msg->msg[8] = recording_seq;
-	cec_set_digital_service_id(msg->msg + 9, digital);
-}
-
-static inline void cec_ops_set_digital_timer(const struct cec_msg *msg,
-			__u8 *day,
-			__u8 *month,
-			__u8 *start_hr,
-			__u8 *start_min,
-			__u8 *duration_hr,
-			__u8 *duration_min,
-			__u8 *recording_seq,
-			struct cec_op_digital_service_id *digital)
-{
-	*day = msg->msg[2];
-	*month = msg->msg[3];
-	/* Hours and minutes are in BCD format */
-	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
-	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
-	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
-	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
-	*recording_seq = msg->msg[8];
-	cec_get_digital_service_id(msg->msg + 9, digital);
-}
-
-static inline void cec_msg_set_ext_timer(struct cec_msg *msg,
-					 bool reply,
-					 __u8 day,
-					 __u8 month,
-					 __u8 start_hr,
-					 __u8 start_min,
-					 __u8 duration_hr,
-					 __u8 duration_min,
-					 __u8 recording_seq,
-					 __u8 ext_src_spec,
-					 __u8 plug,
-					 __u16 phys_addr)
-{
-	msg->len = 13;
-	msg->msg[1] = CEC_MSG_SET_EXT_TIMER;
-	msg->msg[2] = day;
-	msg->msg[3] = month;
-	/* Hours and minutes are in BCD format */
-	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
-	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
-	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
-	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
-	msg->msg[8] = recording_seq;
-	msg->msg[9] = ext_src_spec;
-	msg->msg[10] = plug;
-	msg->msg[11] = phys_addr >> 8;
-	msg->msg[12] = phys_addr & 0xff;
-	msg->reply = reply ? CEC_MSG_TIMER_STATUS : 0;
-}
-
-static inline void cec_ops_set_ext_timer(const struct cec_msg *msg,
-					 __u8 *day,
-					 __u8 *month,
-					 __u8 *start_hr,
-					 __u8 *start_min,
-					 __u8 *duration_hr,
-					 __u8 *duration_min,
-					 __u8 *recording_seq,
-					 __u8 *ext_src_spec,
-					 __u8 *plug,
-					 __u16 *phys_addr)
-{
-	*day = msg->msg[2];
-	*month = msg->msg[3];
-	/* Hours and minutes are in BCD format */
-	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
-	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
-	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
-	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
-	*recording_seq = msg->msg[8];
-	*ext_src_spec = msg->msg[9];
-	*plug = msg->msg[10];
-	*phys_addr = (msg->msg[11] << 8) | msg->msg[12];
-}
-
-static inline void cec_msg_set_timer_program_title(struct cec_msg *msg,
-						   const char *prog_title)
-{
-	unsigned int len = strlen(prog_title);
-
-	if (len > 14)
-		len = 14;
-	msg->len = 2 + len;
-	msg->msg[1] = CEC_MSG_SET_TIMER_PROGRAM_TITLE;
-	memcpy(msg->msg + 2, prog_title, len);
-}
-
-static inline void cec_ops_set_timer_program_title(const struct cec_msg *msg,
-						   char *prog_title)
-{
-	unsigned int len = msg->len > 2 ? msg->len - 2 : 0;
-
-	if (len > 14)
-		len = 14;
-	memcpy(prog_title, msg->msg + 2, len);
-	prog_title[len] = '\0';
-}
-
-/* System Information Feature */
-static inline void cec_msg_cec_version(struct cec_msg *msg, __u8 cec_version)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_CEC_VERSION;
-	msg->msg[2] = cec_version;
-}
-
-static inline void cec_ops_cec_version(const struct cec_msg *msg,
-				       __u8 *cec_version)
-{
-	*cec_version = msg->msg[2];
-}
-
-static inline void cec_msg_get_cec_version(struct cec_msg *msg,
-					   bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_GET_CEC_VERSION;
-	msg->reply = reply ? CEC_MSG_CEC_VERSION : 0;
-}
-
-static inline void cec_msg_report_physical_addr(struct cec_msg *msg,
-					__u16 phys_addr, __u8 prim_devtype)
-{
-	msg->len = 5;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_REPORT_PHYSICAL_ADDR;
-	msg->msg[2] = phys_addr >> 8;
-	msg->msg[3] = phys_addr & 0xff;
-	msg->msg[4] = prim_devtype;
-}
-
-static inline void cec_ops_report_physical_addr(const struct cec_msg *msg,
-					__u16 *phys_addr, __u8 *prim_devtype)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*prim_devtype = msg->msg[4];
-}
-
-static inline void cec_msg_give_physical_addr(struct cec_msg *msg,
-					      bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_GIVE_PHYSICAL_ADDR;
-	msg->reply = reply ? CEC_MSG_REPORT_PHYSICAL_ADDR : 0;
-}
-
-static inline void cec_msg_set_menu_language(struct cec_msg *msg,
-					     const char *language)
-{
-	msg->len = 5;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_SET_MENU_LANGUAGE;
-	memcpy(msg->msg + 2, language, 3);
-}
-
-static inline void cec_ops_set_menu_language(const struct cec_msg *msg,
-					     char *language)
-{
-	memcpy(language, msg->msg + 2, 3);
-	language[3] = '\0';
-}
-
-static inline void cec_msg_get_menu_language(struct cec_msg *msg,
-					     bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_GET_MENU_LANGUAGE;
-	msg->reply = reply ? CEC_MSG_SET_MENU_LANGUAGE : 0;
-}
-
-/*
- * Assumes a single RC Profile byte and a single Device Features byte,
- * i.e. no extended features are supported by this helper function.
- *
- * As of CEC 2.0 no extended features are defined, should those be added
- * in the future, then this function needs to be adapted or a new function
- * should be added.
- */
-static inline void cec_msg_report_features(struct cec_msg *msg,
-				__u8 cec_version, __u8 all_device_types,
-				__u8 rc_profile, __u8 dev_features)
-{
-	msg->len = 6;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_REPORT_FEATURES;
-	msg->msg[2] = cec_version;
-	msg->msg[3] = all_device_types;
-	msg->msg[4] = rc_profile;
-	msg->msg[5] = dev_features;
-}
-
-static inline void cec_ops_report_features(const struct cec_msg *msg,
-			__u8 *cec_version, __u8 *all_device_types,
-			const __u8 **rc_profile, const __u8 **dev_features)
-{
-	const __u8 *p = &msg->msg[4];
-
-	*cec_version = msg->msg[2];
-	*all_device_types = msg->msg[3];
-	*rc_profile = p;
-	while (p < &msg->msg[14] && (*p & CEC_OP_FEAT_EXT))
-		p++;
-	if (!(*p & CEC_OP_FEAT_EXT)) {
-		*dev_features = p + 1;
-		while (p < &msg->msg[15] && (*p & CEC_OP_FEAT_EXT))
-			p++;
-	}
-	if (*p & CEC_OP_FEAT_EXT)
-		*rc_profile = *dev_features = NULL;
-}
-
-static inline void cec_msg_give_features(struct cec_msg *msg,
-					 bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_GIVE_FEATURES;
-	msg->reply = reply ? CEC_MSG_REPORT_FEATURES : 0;
-}
-
-/* Deck Control Feature */
-static inline void cec_msg_deck_control(struct cec_msg *msg,
-					__u8 deck_control_mode)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_DECK_CONTROL;
-	msg->msg[2] = deck_control_mode;
-}
-
-static inline void cec_ops_deck_control(const struct cec_msg *msg,
-					__u8 *deck_control_mode)
-{
-	*deck_control_mode = msg->msg[2];
-}
-
-static inline void cec_msg_deck_status(struct cec_msg *msg,
-				       __u8 deck_info)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_DECK_STATUS;
-	msg->msg[2] = deck_info;
-}
-
-static inline void cec_ops_deck_status(const struct cec_msg *msg,
-				       __u8 *deck_info)
-{
-	*deck_info = msg->msg[2];
-}
-
-static inline void cec_msg_give_deck_status(struct cec_msg *msg,
-					    bool reply,
-					    __u8 status_req)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_GIVE_DECK_STATUS;
-	msg->msg[2] = status_req;
-	msg->reply = reply ? CEC_MSG_DECK_STATUS : 0;
-}
-
-static inline void cec_ops_give_deck_status(const struct cec_msg *msg,
-					    __u8 *status_req)
-{
-	*status_req = msg->msg[2];
-}
-
-static inline void cec_msg_play(struct cec_msg *msg,
-				__u8 play_mode)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_PLAY;
-	msg->msg[2] = play_mode;
-}
-
-static inline void cec_ops_play(const struct cec_msg *msg,
-				__u8 *play_mode)
-{
-	*play_mode = msg->msg[2];
-}
-
-
-/* Tuner Control Feature */
-struct cec_op_tuner_device_info {
-	__u8 rec_flag;
-	__u8 tuner_display_info;
-	bool is_analog;
-	union {
-		struct cec_op_digital_service_id digital;
-		struct {
-			__u8 ana_bcast_type;
-			__u16 ana_freq;
-			__u8 bcast_system;
-		} analog;
-	};
-};
-
-static inline void cec_msg_tuner_device_status_analog(struct cec_msg *msg,
-						      __u8 rec_flag,
-						      __u8 tuner_display_info,
-						      __u8 ana_bcast_type,
-						      __u16 ana_freq,
-						      __u8 bcast_system)
-{
-	msg->len = 7;
-	msg->msg[1] = CEC_MSG_TUNER_DEVICE_STATUS;
-	msg->msg[2] = (rec_flag << 7) | tuner_display_info;
-	msg->msg[3] = ana_bcast_type;
-	msg->msg[4] = ana_freq >> 8;
-	msg->msg[5] = ana_freq & 0xff;
-	msg->msg[6] = bcast_system;
-}
-
-static inline void cec_msg_tuner_device_status_digital(struct cec_msg *msg,
-		   __u8 rec_flag, __u8 tuner_display_info,
-		   const struct cec_op_digital_service_id *digital)
-{
-	msg->len = 10;
-	msg->msg[1] = CEC_MSG_TUNER_DEVICE_STATUS;
-	msg->msg[2] = (rec_flag << 7) | tuner_display_info;
-	cec_set_digital_service_id(msg->msg + 3, digital);
-}
-
-static inline void cec_msg_tuner_device_status(struct cec_msg *msg,
-			const struct cec_op_tuner_device_info *tuner_dev_info)
-{
-	if (tuner_dev_info->is_analog)
-		cec_msg_tuner_device_status_analog(msg,
-			tuner_dev_info->rec_flag,
-			tuner_dev_info->tuner_display_info,
-			tuner_dev_info->analog.ana_bcast_type,
-			tuner_dev_info->analog.ana_freq,
-			tuner_dev_info->analog.bcast_system);
-	else
-		cec_msg_tuner_device_status_digital(msg,
-			tuner_dev_info->rec_flag,
-			tuner_dev_info->tuner_display_info,
-			&tuner_dev_info->digital);
-}
-
-static inline void cec_ops_tuner_device_status(const struct cec_msg *msg,
-				struct cec_op_tuner_device_info *tuner_dev_info)
-{
-	tuner_dev_info->is_analog = msg->len < 10;
-	tuner_dev_info->rec_flag = msg->msg[2] >> 7;
-	tuner_dev_info->tuner_display_info = msg->msg[2] & 0x7f;
-	if (tuner_dev_info->is_analog) {
-		tuner_dev_info->analog.ana_bcast_type = msg->msg[3];
-		tuner_dev_info->analog.ana_freq = (msg->msg[4] << 8) | msg->msg[5];
-		tuner_dev_info->analog.bcast_system = msg->msg[6];
-		return;
-	}
-	cec_get_digital_service_id(msg->msg + 3, &tuner_dev_info->digital);
-}
-
-static inline void cec_msg_give_tuner_device_status(struct cec_msg *msg,
-						    bool reply,
-						    __u8 status_req)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_GIVE_TUNER_DEVICE_STATUS;
-	msg->msg[2] = status_req;
-	msg->reply = reply ? CEC_MSG_TUNER_DEVICE_STATUS : 0;
-}
-
-static inline void cec_ops_give_tuner_device_status(const struct cec_msg *msg,
-						    __u8 *status_req)
-{
-	*status_req = msg->msg[2];
-}
-
-static inline void cec_msg_select_analogue_service(struct cec_msg *msg,
-						   __u8 ana_bcast_type,
-						   __u16 ana_freq,
-						   __u8 bcast_system)
-{
-	msg->len = 6;
-	msg->msg[1] = CEC_MSG_SELECT_ANALOGUE_SERVICE;
-	msg->msg[2] = ana_bcast_type;
-	msg->msg[3] = ana_freq >> 8;
-	msg->msg[4] = ana_freq & 0xff;
-	msg->msg[5] = bcast_system;
-}
-
-static inline void cec_ops_select_analogue_service(const struct cec_msg *msg,
-						   __u8 *ana_bcast_type,
-						   __u16 *ana_freq,
-						   __u8 *bcast_system)
-{
-	*ana_bcast_type = msg->msg[2];
-	*ana_freq = (msg->msg[3] << 8) | msg->msg[4];
-	*bcast_system = msg->msg[5];
-}
-
-static inline void cec_msg_select_digital_service(struct cec_msg *msg,
-				const struct cec_op_digital_service_id *digital)
-{
-	msg->len = 9;
-	msg->msg[1] = CEC_MSG_SELECT_DIGITAL_SERVICE;
-	cec_set_digital_service_id(msg->msg + 2, digital);
-}
-
-static inline void cec_ops_select_digital_service(const struct cec_msg *msg,
-				struct cec_op_digital_service_id *digital)
-{
-	cec_get_digital_service_id(msg->msg + 2, digital);
-}
-
-static inline void cec_msg_tuner_step_decrement(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_TUNER_STEP_DECREMENT;
-}
-
-static inline void cec_msg_tuner_step_increment(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_TUNER_STEP_INCREMENT;
-}
-
-
-/* Vendor Specific Commands Feature */
-static inline void cec_msg_device_vendor_id(struct cec_msg *msg, __u32 vendor_id)
-{
-	msg->len = 5;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_DEVICE_VENDOR_ID;
-	msg->msg[2] = vendor_id >> 16;
-	msg->msg[3] = (vendor_id >> 8) & 0xff;
-	msg->msg[4] = vendor_id & 0xff;
-}
-
-static inline void cec_ops_device_vendor_id(const struct cec_msg *msg,
-					    __u32 *vendor_id)
-{
-	*vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4];
-}
-
-static inline void cec_msg_give_device_vendor_id(struct cec_msg *msg,
-						 bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_GIVE_DEVICE_VENDOR_ID;
-	msg->reply = reply ? CEC_MSG_DEVICE_VENDOR_ID : 0;
-}
-
-static inline void cec_msg_vendor_command(struct cec_msg *msg,
-					  __u8 size, const __u8 *vendor_cmd)
-{
-	if (size > 14)
-		size = 14;
-	msg->len = 2 + size;
-	msg->msg[1] = CEC_MSG_VENDOR_COMMAND;
-	memcpy(msg->msg + 2, vendor_cmd, size);
-}
-
-static inline void cec_ops_vendor_command(const struct cec_msg *msg,
-					  __u8 *size,
-					  const __u8 **vendor_cmd)
-{
-	*size = msg->len - 2;
-
-	if (*size > 14)
-		*size = 14;
-	*vendor_cmd = msg->msg + 2;
-}
-
-static inline void cec_msg_vendor_command_with_id(struct cec_msg *msg,
-						  __u32 vendor_id, __u8 size,
-						  const __u8 *vendor_cmd)
-{
-	if (size > 11)
-		size = 11;
-	msg->len = 5 + size;
-	msg->msg[1] = CEC_MSG_VENDOR_COMMAND_WITH_ID;
-	msg->msg[2] = vendor_id >> 16;
-	msg->msg[3] = (vendor_id >> 8) & 0xff;
-	msg->msg[4] = vendor_id & 0xff;
-	memcpy(msg->msg + 5, vendor_cmd, size);
-}
-
-static inline void cec_ops_vendor_command_with_id(const struct cec_msg *msg,
-						  __u32 *vendor_id,  __u8 *size,
-						  const __u8 **vendor_cmd)
-{
-	*size = msg->len - 5;
-
-	if (*size > 11)
-		*size = 11;
-	*vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4];
-	*vendor_cmd = msg->msg + 5;
-}
-
-static inline void cec_msg_vendor_remote_button_down(struct cec_msg *msg,
-						     __u8 size,
-						     const __u8 *rc_code)
-{
-	if (size > 14)
-		size = 14;
-	msg->len = 2 + size;
-	msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN;
-	memcpy(msg->msg + 2, rc_code, size);
-}
-
-static inline void cec_ops_vendor_remote_button_down(const struct cec_msg *msg,
-						     __u8 *size,
-						     const __u8 **rc_code)
-{
-	*size = msg->len - 2;
-
-	if (*size > 14)
-		*size = 14;
-	*rc_code = msg->msg + 2;
-}
-
-static inline void cec_msg_vendor_remote_button_up(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_UP;
-}
-
-
-/* OSD Display Feature */
-static inline void cec_msg_set_osd_string(struct cec_msg *msg,
-					  __u8 disp_ctl,
-					  const char *osd)
-{
-	unsigned int len = strlen(osd);
-
-	if (len > 13)
-		len = 13;
-	msg->len = 3 + len;
-	msg->msg[1] = CEC_MSG_SET_OSD_STRING;
-	msg->msg[2] = disp_ctl;
-	memcpy(msg->msg + 3, osd, len);
-}
-
-static inline void cec_ops_set_osd_string(const struct cec_msg *msg,
-					  __u8 *disp_ctl,
-					  char *osd)
-{
-	unsigned int len = msg->len > 3 ? msg->len - 3 : 0;
-
-	*disp_ctl = msg->msg[2];
-	if (len > 13)
-		len = 13;
-	memcpy(osd, msg->msg + 3, len);
-	osd[len] = '\0';
-}
-
-
-/* Device OSD Transfer Feature */
-static inline void cec_msg_set_osd_name(struct cec_msg *msg, const char *name)
-{
-	unsigned int len = strlen(name);
-
-	if (len > 14)
-		len = 14;
-	msg->len = 2 + len;
-	msg->msg[1] = CEC_MSG_SET_OSD_NAME;
-	memcpy(msg->msg + 2, name, len);
-}
-
-static inline void cec_ops_set_osd_name(const struct cec_msg *msg,
-					char *name)
-{
-	unsigned int len = msg->len > 2 ? msg->len - 2 : 0;
-
-	if (len > 14)
-		len = 14;
-	memcpy(name, msg->msg + 2, len);
-	name[len] = '\0';
-}
-
-static inline void cec_msg_give_osd_name(struct cec_msg *msg,
-					 bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_GIVE_OSD_NAME;
-	msg->reply = reply ? CEC_MSG_SET_OSD_NAME : 0;
-}
-
-
-/* Device Menu Control Feature */
-static inline void cec_msg_menu_status(struct cec_msg *msg,
-				       __u8 menu_state)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_MENU_STATUS;
-	msg->msg[2] = menu_state;
-}
-
-static inline void cec_ops_menu_status(const struct cec_msg *msg,
-				       __u8 *menu_state)
-{
-	*menu_state = msg->msg[2];
-}
-
-static inline void cec_msg_menu_request(struct cec_msg *msg,
-					bool reply,
-					__u8 menu_req)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_MENU_REQUEST;
-	msg->msg[2] = menu_req;
-	msg->reply = reply ? CEC_MSG_MENU_STATUS : 0;
-}
-
-static inline void cec_ops_menu_request(const struct cec_msg *msg,
-					__u8 *menu_req)
-{
-	*menu_req = msg->msg[2];
-}
-
-struct cec_op_ui_command {
-	__u8 ui_cmd;
-	bool has_opt_arg;
-	union {
-		struct cec_op_channel_data channel_identifier;
-		__u8 ui_broadcast_type;
-		__u8 ui_sound_presentation_control;
-		__u8 play_mode;
-		__u8 ui_function_media;
-		__u8 ui_function_select_av_input;
-		__u8 ui_function_select_audio_input;
-	};
-};
-
-static inline void cec_msg_user_control_pressed(struct cec_msg *msg,
-					const struct cec_op_ui_command *ui_cmd)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_USER_CONTROL_PRESSED;
-	msg->msg[2] = ui_cmd->ui_cmd;
-	if (!ui_cmd->has_opt_arg)
-		return;
-	switch (ui_cmd->ui_cmd) {
-	case 0x56:
-	case 0x57:
-	case 0x60:
-	case 0x68:
-	case 0x69:
-	case 0x6a:
-		/* The optional operand is one byte for all these ui commands */
-		msg->len++;
-		msg->msg[3] = ui_cmd->play_mode;
-		break;
-	case 0x67:
-		msg->len += 4;
-		msg->msg[3] = (ui_cmd->channel_identifier.channel_number_fmt << 2) |
-			      (ui_cmd->channel_identifier.major >> 8);
-		msg->msg[4] = ui_cmd->channel_identifier.major & 0xff;
-		msg->msg[5] = ui_cmd->channel_identifier.minor >> 8;
-		msg->msg[6] = ui_cmd->channel_identifier.minor & 0xff;
-		break;
-	}
-}
-
-static inline void cec_ops_user_control_pressed(const struct cec_msg *msg,
-						struct cec_op_ui_command *ui_cmd)
-{
-	ui_cmd->ui_cmd = msg->msg[2];
-	ui_cmd->has_opt_arg = false;
-	if (msg->len == 3)
-		return;
-	switch (ui_cmd->ui_cmd) {
-	case 0x56:
-	case 0x57:
-	case 0x60:
-	case 0x68:
-	case 0x69:
-	case 0x6a:
-		/* The optional operand is one byte for all these ui commands */
-		ui_cmd->play_mode = msg->msg[3];
-		ui_cmd->has_opt_arg = true;
-		break;
-	case 0x67:
-		if (msg->len < 7)
-			break;
-		ui_cmd->has_opt_arg = true;
-		ui_cmd->channel_identifier.channel_number_fmt = msg->msg[3] >> 2;
-		ui_cmd->channel_identifier.major = ((msg->msg[3] & 3) << 6) | msg->msg[4];
-		ui_cmd->channel_identifier.minor = (msg->msg[5] << 8) | msg->msg[6];
-		break;
-	}
-}
-
-static inline void cec_msg_user_control_released(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_USER_CONTROL_RELEASED;
-}
-
-/* Remote Control Passthrough Feature */
-
-/* Power Status Feature */
-static inline void cec_msg_report_power_status(struct cec_msg *msg,
-					       __u8 pwr_state)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_REPORT_POWER_STATUS;
-	msg->msg[2] = pwr_state;
-}
-
-static inline void cec_ops_report_power_status(const struct cec_msg *msg,
-					       __u8 *pwr_state)
-{
-	*pwr_state = msg->msg[2];
-}
-
-static inline void cec_msg_give_device_power_status(struct cec_msg *msg,
-						    bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_GIVE_DEVICE_POWER_STATUS;
-	msg->reply = reply ? CEC_MSG_REPORT_POWER_STATUS : 0;
-}
-
-/* General Protocol Messages */
-static inline void cec_msg_feature_abort(struct cec_msg *msg,
-					 __u8 abort_msg, __u8 reason)
-{
-	msg->len = 4;
-	msg->msg[1] = CEC_MSG_FEATURE_ABORT;
-	msg->msg[2] = abort_msg;
-	msg->msg[3] = reason;
-}
-
-static inline void cec_ops_feature_abort(const struct cec_msg *msg,
-					 __u8 *abort_msg, __u8 *reason)
-{
-	*abort_msg = msg->msg[2];
-	*reason = msg->msg[3];
-}
-
-/* This changes the current message into a feature abort message */
-static inline void cec_msg_reply_feature_abort(struct cec_msg *msg, __u8 reason)
-{
-	cec_msg_set_reply_to(msg, msg);
-	msg->len = 4;
-	msg->msg[2] = msg->msg[1];
-	msg->msg[3] = reason;
-	msg->msg[1] = CEC_MSG_FEATURE_ABORT;
-}
-
-static inline void cec_msg_abort(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_ABORT;
-}
-
-
-/* System Audio Control Feature */
-static inline void cec_msg_report_audio_status(struct cec_msg *msg,
-					       __u8 aud_mute_status,
-					       __u8 aud_vol_status)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_REPORT_AUDIO_STATUS;
-	msg->msg[2] = (aud_mute_status << 7) | (aud_vol_status & 0x7f);
-}
-
-static inline void cec_ops_report_audio_status(const struct cec_msg *msg,
-					       __u8 *aud_mute_status,
-					       __u8 *aud_vol_status)
-{
-	*aud_mute_status = msg->msg[2] >> 7;
-	*aud_vol_status = msg->msg[2] & 0x7f;
-}
-
-static inline void cec_msg_give_audio_status(struct cec_msg *msg,
-					     bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_GIVE_AUDIO_STATUS;
-	msg->reply = reply ? CEC_MSG_REPORT_AUDIO_STATUS : 0;
-}
-
-static inline void cec_msg_set_system_audio_mode(struct cec_msg *msg,
-						 __u8 sys_aud_status)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_SET_SYSTEM_AUDIO_MODE;
-	msg->msg[2] = sys_aud_status;
-}
-
-static inline void cec_ops_set_system_audio_mode(const struct cec_msg *msg,
-						 __u8 *sys_aud_status)
-{
-	*sys_aud_status = msg->msg[2];
-}
-
-static inline void cec_msg_system_audio_mode_request(struct cec_msg *msg,
-						     bool reply,
-						     __u16 phys_addr)
-{
-	msg->len = phys_addr == 0xffff ? 2 : 4;
-	msg->msg[1] = CEC_MSG_SYSTEM_AUDIO_MODE_REQUEST;
-	msg->msg[2] = phys_addr >> 8;
-	msg->msg[3] = phys_addr & 0xff;
-	msg->reply = reply ? CEC_MSG_SET_SYSTEM_AUDIO_MODE : 0;
-
-}
-
-static inline void cec_ops_system_audio_mode_request(const struct cec_msg *msg,
-						     __u16 *phys_addr)
-{
-	if (msg->len < 4)
-		*phys_addr = 0xffff;
-	else
-		*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-}
-
-static inline void cec_msg_system_audio_mode_status(struct cec_msg *msg,
-						    __u8 sys_aud_status)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_SYSTEM_AUDIO_MODE_STATUS;
-	msg->msg[2] = sys_aud_status;
-}
-
-static inline void cec_ops_system_audio_mode_status(const struct cec_msg *msg,
-						    __u8 *sys_aud_status)
-{
-	*sys_aud_status = msg->msg[2];
-}
-
-static inline void cec_msg_give_system_audio_mode_status(struct cec_msg *msg,
-							 bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_GIVE_SYSTEM_AUDIO_MODE_STATUS;
-	msg->reply = reply ? CEC_MSG_SYSTEM_AUDIO_MODE_STATUS : 0;
-}
-
-static inline void cec_msg_report_short_audio_descriptor(struct cec_msg *msg,
-					__u8 num_descriptors,
-					const __u32 *descriptors)
-{
-	unsigned int i;
-
-	if (num_descriptors > 4)
-		num_descriptors = 4;
-	msg->len = 2 + num_descriptors * 3;
-	msg->msg[1] = CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR;
-	for (i = 0; i < num_descriptors; i++) {
-		msg->msg[2 + i * 3] = (descriptors[i] >> 16) & 0xff;
-		msg->msg[3 + i * 3] = (descriptors[i] >> 8) & 0xff;
-		msg->msg[4 + i * 3] = descriptors[i] & 0xff;
-	}
-}
-
-static inline void cec_ops_report_short_audio_descriptor(const struct cec_msg *msg,
-							 __u8 *num_descriptors,
-							 __u32 *descriptors)
-{
-	unsigned int i;
-
-	*num_descriptors = (msg->len - 2) / 3;
-	if (*num_descriptors > 4)
-		*num_descriptors = 4;
-	for (i = 0; i < *num_descriptors; i++)
-		descriptors[i] = (msg->msg[2 + i * 3] << 16) |
-			(msg->msg[3 + i * 3] << 8) |
-			msg->msg[4 + i * 3];
-}
-
-static inline void cec_msg_request_short_audio_descriptor(struct cec_msg *msg,
-					bool reply,
-					__u8 num_descriptors,
-					const __u8 *audio_format_id,
-					const __u8 *audio_format_code)
-{
-	unsigned int i;
-
-	if (num_descriptors > 4)
-		num_descriptors = 4;
-	msg->len = 2 + num_descriptors;
-	msg->msg[1] = CEC_MSG_REQUEST_SHORT_AUDIO_DESCRIPTOR;
-	msg->reply = reply ? CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR : 0;
-	for (i = 0; i < num_descriptors; i++)
-		msg->msg[2 + i] = (audio_format_id[i] << 6) |
-				  (audio_format_code[i] & 0x3f);
-}
-
-static inline void cec_ops_request_short_audio_descriptor(const struct cec_msg *msg,
-					__u8 *num_descriptors,
-					__u8 *audio_format_id,
-					__u8 *audio_format_code)
-{
-	unsigned int i;
-
-	*num_descriptors = msg->len - 2;
-	if (*num_descriptors > 4)
-		*num_descriptors = 4;
-	for (i = 0; i < *num_descriptors; i++) {
-		audio_format_id[i] = msg->msg[2 + i] >> 6;
-		audio_format_code[i] = msg->msg[2 + i] & 0x3f;
-	}
-}
-
-
-/* Audio Rate Control Feature */
-static inline void cec_msg_set_audio_rate(struct cec_msg *msg,
-					  __u8 audio_rate)
-{
-	msg->len = 3;
-	msg->msg[1] = CEC_MSG_SET_AUDIO_RATE;
-	msg->msg[2] = audio_rate;
-}
-
-static inline void cec_ops_set_audio_rate(const struct cec_msg *msg,
-					  __u8 *audio_rate)
-{
-	*audio_rate = msg->msg[2];
-}
-
-
-/* Audio Return Channel Control Feature */
-static inline void cec_msg_report_arc_initiated(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_REPORT_ARC_INITIATED;
-}
-
-static inline void cec_msg_initiate_arc(struct cec_msg *msg,
-					bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_INITIATE_ARC;
-	msg->reply = reply ? CEC_MSG_REPORT_ARC_INITIATED : 0;
-}
-
-static inline void cec_msg_request_arc_initiation(struct cec_msg *msg,
-						  bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_REQUEST_ARC_INITIATION;
-	msg->reply = reply ? CEC_MSG_INITIATE_ARC : 0;
-}
-
-static inline void cec_msg_report_arc_terminated(struct cec_msg *msg)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_REPORT_ARC_TERMINATED;
-}
-
-static inline void cec_msg_terminate_arc(struct cec_msg *msg,
-					 bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_TERMINATE_ARC;
-	msg->reply = reply ? CEC_MSG_REPORT_ARC_TERMINATED : 0;
-}
-
-static inline void cec_msg_request_arc_termination(struct cec_msg *msg,
-						   bool reply)
-{
-	msg->len = 2;
-	msg->msg[1] = CEC_MSG_REQUEST_ARC_TERMINATION;
-	msg->reply = reply ? CEC_MSG_TERMINATE_ARC : 0;
-}
-
-
-/* Dynamic Audio Lipsync Feature */
-/* Only for CEC 2.0 and up */
-static inline void cec_msg_report_current_latency(struct cec_msg *msg,
-						  __u16 phys_addr,
-						  __u8 video_latency,
-						  __u8 low_latency_mode,
-						  __u8 audio_out_compensated,
-						  __u8 audio_out_delay)
-{
-	msg->len = 7;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_REPORT_CURRENT_LATENCY;
-	msg->msg[2] = phys_addr >> 8;
-	msg->msg[3] = phys_addr & 0xff;
-	msg->msg[4] = video_latency;
-	msg->msg[5] = (low_latency_mode << 2) | audio_out_compensated;
-	msg->msg[6] = audio_out_delay;
-}
-
-static inline void cec_ops_report_current_latency(const struct cec_msg *msg,
-						  __u16 *phys_addr,
-						  __u8 *video_latency,
-						  __u8 *low_latency_mode,
-						  __u8 *audio_out_compensated,
-						  __u8 *audio_out_delay)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*video_latency = msg->msg[4];
-	*low_latency_mode = (msg->msg[5] >> 2) & 1;
-	*audio_out_compensated = msg->msg[5] & 3;
-	*audio_out_delay = msg->msg[6];
-}
-
-static inline void cec_msg_request_current_latency(struct cec_msg *msg,
-						   bool reply,
-						   __u16 phys_addr)
-{
-	msg->len = 4;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_REQUEST_CURRENT_LATENCY;
-	msg->msg[2] = phys_addr >> 8;
-	msg->msg[3] = phys_addr & 0xff;
-	msg->reply = reply ? CEC_MSG_REPORT_CURRENT_LATENCY : 0;
-}
-
-static inline void cec_ops_request_current_latency(const struct cec_msg *msg,
-						   __u16 *phys_addr)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-}
-
-
-/* Capability Discovery and Control Feature */
-static inline void cec_msg_cdc_hec_inquire_state(struct cec_msg *msg,
-						 __u16 phys_addr1,
-						 __u16 phys_addr2)
-{
-	msg->len = 9;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
-	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
-	msg->msg[4] = CEC_MSG_CDC_HEC_INQUIRE_STATE;
-	msg->msg[5] = phys_addr1 >> 8;
-	msg->msg[6] = phys_addr1 & 0xff;
-	msg->msg[7] = phys_addr2 >> 8;
-	msg->msg[8] = phys_addr2 & 0xff;
-}
-
-static inline void cec_ops_cdc_hec_inquire_state(const struct cec_msg *msg,
-						 __u16 *phys_addr,
-						 __u16 *phys_addr1,
-						 __u16 *phys_addr2)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
-	*phys_addr2 = (msg->msg[7] << 8) | msg->msg[8];
-}
-
-static inline void cec_msg_cdc_hec_report_state(struct cec_msg *msg,
-						__u16 target_phys_addr,
-						__u8 hec_func_state,
-						__u8 host_func_state,
-						__u8 enc_func_state,
-						__u8 cdc_errcode,
-						__u8 has_field,
-						__u16 hec_field)
-{
-	msg->len = has_field ? 10 : 8;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
-	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
-	msg->msg[4] = CEC_MSG_CDC_HEC_REPORT_STATE;
-	msg->msg[5] = target_phys_addr >> 8;
-	msg->msg[6] = target_phys_addr & 0xff;
-	msg->msg[7] = (hec_func_state << 6) |
-		      (host_func_state << 4) |
-		      (enc_func_state << 2) |
-		      cdc_errcode;
-	if (has_field) {
-		msg->msg[8] = hec_field >> 8;
-		msg->msg[9] = hec_field & 0xff;
-	}
-}
-
-static inline void cec_ops_cdc_hec_report_state(const struct cec_msg *msg,
-						__u16 *phys_addr,
-						__u16 *target_phys_addr,
-						__u8 *hec_func_state,
-						__u8 *host_func_state,
-						__u8 *enc_func_state,
-						__u8 *cdc_errcode,
-						__u8 *has_field,
-						__u16 *hec_field)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*target_phys_addr = (msg->msg[5] << 8) | msg->msg[6];
-	*hec_func_state = msg->msg[7] >> 6;
-	*host_func_state = (msg->msg[7] >> 4) & 3;
-	*enc_func_state = (msg->msg[7] >> 4) & 3;
-	*cdc_errcode = msg->msg[7] & 3;
-	*has_field = msg->len >= 10;
-	*hec_field = *has_field ? ((msg->msg[8] << 8) | msg->msg[9]) : 0;
-}
-
-static inline void cec_msg_cdc_hec_set_state(struct cec_msg *msg,
-					     __u16 phys_addr1,
-					     __u16 phys_addr2,
-					     __u8 hec_set_state,
-					     __u16 phys_addr3,
-					     __u16 phys_addr4,
-					     __u16 phys_addr5)
-{
-	msg->len = 10;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
-	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
-	msg->msg[4] = CEC_MSG_CDC_HEC_INQUIRE_STATE;
-	msg->msg[5] = phys_addr1 >> 8;
-	msg->msg[6] = phys_addr1 & 0xff;
-	msg->msg[7] = phys_addr2 >> 8;
-	msg->msg[8] = phys_addr2 & 0xff;
-	msg->msg[9] = hec_set_state;
-	if (phys_addr3 != CEC_PHYS_ADDR_INVALID) {
-		msg->msg[msg->len++] = phys_addr3 >> 8;
-		msg->msg[msg->len++] = phys_addr3 & 0xff;
-		if (phys_addr4 != CEC_PHYS_ADDR_INVALID) {
-			msg->msg[msg->len++] = phys_addr4 >> 8;
-			msg->msg[msg->len++] = phys_addr4 & 0xff;
-			if (phys_addr5 != CEC_PHYS_ADDR_INVALID) {
-				msg->msg[msg->len++] = phys_addr5 >> 8;
-				msg->msg[msg->len++] = phys_addr5 & 0xff;
-			}
-		}
-	}
-}
-
-static inline void cec_ops_cdc_hec_set_state(const struct cec_msg *msg,
-					     __u16 *phys_addr,
-					     __u16 *phys_addr1,
-					     __u16 *phys_addr2,
-					     __u8 *hec_set_state,
-					     __u16 *phys_addr3,
-					     __u16 *phys_addr4,
-					     __u16 *phys_addr5)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
-	*phys_addr2 = (msg->msg[7] << 8) | msg->msg[8];
-	*hec_set_state = msg->msg[9];
-	*phys_addr3 = *phys_addr4 = *phys_addr5 = CEC_PHYS_ADDR_INVALID;
-	if (msg->len >= 12)
-		*phys_addr3 = (msg->msg[10] << 8) | msg->msg[11];
-	if (msg->len >= 14)
-		*phys_addr4 = (msg->msg[12] << 8) | msg->msg[13];
-	if (msg->len >= 16)
-		*phys_addr5 = (msg->msg[14] << 8) | msg->msg[15];
-}
-
-static inline void cec_msg_cdc_hec_set_state_adjacent(struct cec_msg *msg,
-						      __u16 phys_addr1,
-						      __u8 hec_set_state)
-{
-	msg->len = 8;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
-	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
-	msg->msg[4] = CEC_MSG_CDC_HEC_SET_STATE_ADJACENT;
-	msg->msg[5] = phys_addr1 >> 8;
-	msg->msg[6] = phys_addr1 & 0xff;
-	msg->msg[7] = hec_set_state;
-}
-
-static inline void cec_ops_cdc_hec_set_state_adjacent(const struct cec_msg *msg,
-						      __u16 *phys_addr,
-						      __u16 *phys_addr1,
-						      __u8 *hec_set_state)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
-	*hec_set_state = msg->msg[7];
-}
-
-static inline void cec_msg_cdc_hec_request_deactivation(struct cec_msg *msg,
-							__u16 phys_addr1,
-							__u16 phys_addr2,
-							__u16 phys_addr3)
-{
-	msg->len = 11;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
-	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
-	msg->msg[4] = CEC_MSG_CDC_HEC_REQUEST_DEACTIVATION;
-	msg->msg[5] = phys_addr1 >> 8;
-	msg->msg[6] = phys_addr1 & 0xff;
-	msg->msg[7] = phys_addr2 >> 8;
-	msg->msg[8] = phys_addr2 & 0xff;
-	msg->msg[9] = phys_addr3 >> 8;
-	msg->msg[10] = phys_addr3 & 0xff;
-}
-
-static inline void cec_ops_cdc_hec_request_deactivation(const struct cec_msg *msg,
-							__u16 *phys_addr,
-							__u16 *phys_addr1,
-							__u16 *phys_addr2,
-							__u16 *phys_addr3)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
-	*phys_addr2 = (msg->msg[7] << 8) | msg->msg[8];
-	*phys_addr3 = (msg->msg[9] << 8) | msg->msg[10];
-}
-
-static inline void cec_msg_cdc_hec_notify_alive(struct cec_msg *msg)
-{
-	msg->len = 5;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
-	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
-	msg->msg[4] = CEC_MSG_CDC_HEC_NOTIFY_ALIVE;
-}
-
-static inline void cec_ops_cdc_hec_notify_alive(const struct cec_msg *msg,
-						__u16 *phys_addr)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-}
-
-static inline void cec_msg_cdc_hec_discover(struct cec_msg *msg)
-{
-	msg->len = 5;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
-	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
-	msg->msg[4] = CEC_MSG_CDC_HEC_DISCOVER;
-}
-
-static inline void cec_ops_cdc_hec_discover(const struct cec_msg *msg,
-					    __u16 *phys_addr)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-}
-
-static inline void cec_msg_cdc_hpd_set_state(struct cec_msg *msg,
-					     __u8 input_port,
-					     __u8 hpd_state)
-{
-	msg->len = 6;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
-	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
-	msg->msg[4] = CEC_MSG_CDC_HPD_SET_STATE;
-	msg->msg[5] = (input_port << 4) | hpd_state;
-}
-
-static inline void cec_ops_cdc_hpd_set_state(const struct cec_msg *msg,
-					    __u16 *phys_addr,
-					    __u8 *input_port,
-					    __u8 *hpd_state)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*input_port = msg->msg[5] >> 4;
-	*hpd_state = msg->msg[5] & 0xf;
-}
-
-static inline void cec_msg_cdc_hpd_report_state(struct cec_msg *msg,
-						__u8 hpd_state,
-						__u8 hpd_error)
-{
-	msg->len = 6;
-	msg->msg[0] |= 0xf; /* broadcast */
-	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
-	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
-	msg->msg[4] = CEC_MSG_CDC_HPD_REPORT_STATE;
-	msg->msg[5] = (hpd_state << 4) | hpd_error;
-}
-
-static inline void cec_ops_cdc_hpd_report_state(const struct cec_msg *msg,
-						__u16 *phys_addr,
-						__u8 *hpd_state,
-						__u8 *hpd_error)
-{
-	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
-	*hpd_state = msg->msg[5] >> 4;
-	*hpd_error = msg->msg[5] & 0xf;
-}
-
-#endif
diff --git a/include/linux/cec.h b/include/linux/cec.h
deleted file mode 100644
index 9c87711c0e1c..000000000000
--- a/include/linux/cec.h
+++ /dev/null
@@ -1,1071 +0,0 @@
-/*
- * cec - HDMI Consumer Electronics Control public header
- *
- * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- *
- * This program is free software; you may redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * Alternatively you can redistribute this file under the terms of the
- * BSD license as stated below:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. The names of its contributors may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * Note: this framework is still in staging and it is likely the API
- * will change before it goes out of staging.
- *
- * Once it is moved out of staging this header will move to uapi.
- */
-#ifndef _CEC_UAPI_H
-#define _CEC_UAPI_H
-
-#include <linux/types.h>
-
-#define CEC_MAX_MSG_SIZE	16
-
-/**
- * struct cec_msg - CEC message structure.
- * @tx_ts:	Timestamp in nanoseconds using CLOCK_MONOTONIC. Set by the
- *		driver when the message transmission has finished.
- * @rx_ts:	Timestamp in nanoseconds using CLOCK_MONOTONIC. Set by the
- *		driver when the message was received.
- * @len:	Length in bytes of the message.
- * @timeout:	The timeout (in ms) that is used to timeout CEC_RECEIVE.
- *		Set to 0 if you want to wait forever. This timeout can also be
- *		used with CEC_TRANSMIT as the timeout for waiting for a reply.
- *		If 0, then it will use a 1 second timeout instead of waiting
- *		forever as is done with CEC_RECEIVE.
- * @sequence:	The framework assigns a sequence number to messages that are
- *		sent. This can be used to track replies to previously sent
- *		messages.
- * @flags:	Set to 0.
- * @msg:	The message payload.
- * @reply:	This field is ignored with CEC_RECEIVE and is only used by
- *		CEC_TRANSMIT. If non-zero, then wait for a reply with this
- *		opcode. Set to CEC_MSG_FEATURE_ABORT if you want to wait for
- *		a possible ABORT reply. If there was an error when sending the
- *		msg or FeatureAbort was returned, then reply is set to 0.
- *		If reply is non-zero upon return, then len/msg are set to
- *		the received message.
- *		If reply is zero upon return and status has the
- *		CEC_TX_STATUS_FEATURE_ABORT bit set, then len/msg are set to
- *		the received feature abort message.
- *		If reply is zero upon return and status has the
- *		CEC_TX_STATUS_MAX_RETRIES bit set, then no reply was seen at
- *		all. If reply is non-zero for CEC_TRANSMIT and the message is a
- *		broadcast, then -EINVAL is returned.
- *		if reply is non-zero, then timeout is set to 1000 (the required
- *		maximum response time).
- * @rx_status:	The message receive status bits. Set by the driver.
- * @tx_status:	The message transmit status bits. Set by the driver.
- * @tx_arb_lost_cnt: The number of 'Arbitration Lost' events. Set by the driver.
- * @tx_nack_cnt: The number of 'Not Acknowledged' events. Set by the driver.
- * @tx_low_drive_cnt: The number of 'Low Drive Detected' events. Set by the
- *		driver.
- * @tx_error_cnt: The number of 'Error' events. Set by the driver.
- */
-struct cec_msg {
-	__u64 tx_ts;
-	__u64 rx_ts;
-	__u32 len;
-	__u32 timeout;
-	__u32 sequence;
-	__u32 flags;
-	__u8 msg[CEC_MAX_MSG_SIZE];
-	__u8 reply;
-	__u8 rx_status;
-	__u8 tx_status;
-	__u8 tx_arb_lost_cnt;
-	__u8 tx_nack_cnt;
-	__u8 tx_low_drive_cnt;
-	__u8 tx_error_cnt;
-};
-
-/**
- * cec_msg_initiator - return the initiator's logical address.
- * @msg:	the message structure
- */
-static inline __u8 cec_msg_initiator(const struct cec_msg *msg)
-{
-	return msg->msg[0] >> 4;
-}
-
-/**
- * cec_msg_destination - return the destination's logical address.
- * @msg:	the message structure
- */
-static inline __u8 cec_msg_destination(const struct cec_msg *msg)
-{
-	return msg->msg[0] & 0xf;
-}
-
-/**
- * cec_msg_opcode - return the opcode of the message, -1 for poll
- * @msg:	the message structure
- */
-static inline int cec_msg_opcode(const struct cec_msg *msg)
-{
-	return msg->len > 1 ? msg->msg[1] : -1;
-}
-
-/**
- * cec_msg_is_broadcast - return true if this is a broadcast message.
- * @msg:	the message structure
- */
-static inline bool cec_msg_is_broadcast(const struct cec_msg *msg)
-{
-	return (msg->msg[0] & 0xf) == 0xf;
-}
-
-/**
- * cec_msg_init - initialize the message structure.
- * @msg:	the message structure
- * @initiator:	the logical address of the initiator
- * @destination:the logical address of the destination (0xf for broadcast)
- *
- * The whole structure is zeroed, the len field is set to 1 (i.e. a poll
- * message) and the initiator and destination are filled in.
- */
-static inline void cec_msg_init(struct cec_msg *msg,
-				__u8 initiator, __u8 destination)
-{
-	memset(msg, 0, sizeof(*msg));
-	msg->msg[0] = (initiator << 4) | destination;
-	msg->len = 1;
-}
-
-/**
- * cec_msg_set_reply_to - fill in destination/initiator in a reply message.
- * @msg:	the message structure for the reply
- * @orig:	the original message structure
- *
- * Set the msg destination to the orig initiator and the msg initiator to the
- * orig destination. Note that msg and orig may be the same pointer, in which
- * case the change is done in place.
- */
-static inline void cec_msg_set_reply_to(struct cec_msg *msg,
-					struct cec_msg *orig)
-{
-	/* The destination becomes the initiator and vice versa */
-	msg->msg[0] = (cec_msg_destination(orig) << 4) |
-		      cec_msg_initiator(orig);
-	msg->reply = msg->timeout = 0;
-}
-
-/* cec_msg flags field */
-#define CEC_MSG_FL_REPLY_TO_FOLLOWERS	(1 << 0)
-
-/* cec_msg tx/rx_status field */
-#define CEC_TX_STATUS_OK		(1 << 0)
-#define CEC_TX_STATUS_ARB_LOST		(1 << 1)
-#define CEC_TX_STATUS_NACK		(1 << 2)
-#define CEC_TX_STATUS_LOW_DRIVE		(1 << 3)
-#define CEC_TX_STATUS_ERROR		(1 << 4)
-#define CEC_TX_STATUS_MAX_RETRIES	(1 << 5)
-
-#define CEC_RX_STATUS_OK		(1 << 0)
-#define CEC_RX_STATUS_TIMEOUT		(1 << 1)
-#define CEC_RX_STATUS_FEATURE_ABORT	(1 << 2)
-
-static inline bool cec_msg_status_is_ok(const struct cec_msg *msg)
-{
-	if (msg->tx_status && !(msg->tx_status & CEC_TX_STATUS_OK))
-		return false;
-	if (msg->rx_status && !(msg->rx_status & CEC_RX_STATUS_OK))
-		return false;
-	if (!msg->tx_status && !msg->rx_status)
-		return false;
-	return !(msg->rx_status & CEC_RX_STATUS_FEATURE_ABORT);
-}
-
-#define CEC_LOG_ADDR_INVALID		0xff
-#define CEC_PHYS_ADDR_INVALID		0xffff
-
-/*
- * The maximum number of logical addresses one device can be assigned to.
- * The CEC 2.0 spec allows for only 2 logical addresses at the moment. The
- * Analog Devices CEC hardware supports 3. So let's go wild and go for 4.
- */
-#define CEC_MAX_LOG_ADDRS 4
-
-/* The logical addresses defined by CEC 2.0 */
-#define CEC_LOG_ADDR_TV			0
-#define CEC_LOG_ADDR_RECORD_1		1
-#define CEC_LOG_ADDR_RECORD_2		2
-#define CEC_LOG_ADDR_TUNER_1		3
-#define CEC_LOG_ADDR_PLAYBACK_1		4
-#define CEC_LOG_ADDR_AUDIOSYSTEM	5
-#define CEC_LOG_ADDR_TUNER_2		6
-#define CEC_LOG_ADDR_TUNER_3		7
-#define CEC_LOG_ADDR_PLAYBACK_2		8
-#define CEC_LOG_ADDR_RECORD_3		9
-#define CEC_LOG_ADDR_TUNER_4		10
-#define CEC_LOG_ADDR_PLAYBACK_3		11
-#define CEC_LOG_ADDR_BACKUP_1		12
-#define CEC_LOG_ADDR_BACKUP_2		13
-#define CEC_LOG_ADDR_SPECIFIC		14
-#define CEC_LOG_ADDR_UNREGISTERED	15 /* as initiator address */
-#define CEC_LOG_ADDR_BROADCAST		15 /* ad destination address */
-
-/* The logical address types that the CEC device wants to claim */
-#define CEC_LOG_ADDR_TYPE_TV		0
-#define CEC_LOG_ADDR_TYPE_RECORD	1
-#define CEC_LOG_ADDR_TYPE_TUNER		2
-#define CEC_LOG_ADDR_TYPE_PLAYBACK	3
-#define CEC_LOG_ADDR_TYPE_AUDIOSYSTEM	4
-#define CEC_LOG_ADDR_TYPE_SPECIFIC	5
-#define CEC_LOG_ADDR_TYPE_UNREGISTERED	6
-/*
- * Switches should use UNREGISTERED.
- * Processors should use SPECIFIC.
- */
-
-#define CEC_LOG_ADDR_MASK_TV		(1 << CEC_LOG_ADDR_TV)
-#define CEC_LOG_ADDR_MASK_RECORD	((1 << CEC_LOG_ADDR_RECORD_1) | \
-					 (1 << CEC_LOG_ADDR_RECORD_2) | \
-					 (1 << CEC_LOG_ADDR_RECORD_3))
-#define CEC_LOG_ADDR_MASK_TUNER		((1 << CEC_LOG_ADDR_TUNER_1) | \
-					 (1 << CEC_LOG_ADDR_TUNER_2) | \
-					 (1 << CEC_LOG_ADDR_TUNER_3) | \
-					 (1 << CEC_LOG_ADDR_TUNER_4))
-#define CEC_LOG_ADDR_MASK_PLAYBACK	((1 << CEC_LOG_ADDR_PLAYBACK_1) | \
-					 (1 << CEC_LOG_ADDR_PLAYBACK_2) | \
-					 (1 << CEC_LOG_ADDR_PLAYBACK_3))
-#define CEC_LOG_ADDR_MASK_AUDIOSYSTEM	(1 << CEC_LOG_ADDR_AUDIOSYSTEM)
-#define CEC_LOG_ADDR_MASK_BACKUP	((1 << CEC_LOG_ADDR_BACKUP_1) | \
-					 (1 << CEC_LOG_ADDR_BACKUP_2))
-#define CEC_LOG_ADDR_MASK_SPECIFIC	(1 << CEC_LOG_ADDR_SPECIFIC)
-#define CEC_LOG_ADDR_MASK_UNREGISTERED	(1 << CEC_LOG_ADDR_UNREGISTERED)
-
-static inline bool cec_has_tv(__u16 log_addr_mask)
-{
-	return log_addr_mask & CEC_LOG_ADDR_MASK_TV;
-}
-
-static inline bool cec_has_record(__u16 log_addr_mask)
-{
-	return log_addr_mask & CEC_LOG_ADDR_MASK_RECORD;
-}
-
-static inline bool cec_has_tuner(__u16 log_addr_mask)
-{
-	return log_addr_mask & CEC_LOG_ADDR_MASK_TUNER;
-}
-
-static inline bool cec_has_playback(__u16 log_addr_mask)
-{
-	return log_addr_mask & CEC_LOG_ADDR_MASK_PLAYBACK;
-}
-
-static inline bool cec_has_audiosystem(__u16 log_addr_mask)
-{
-	return log_addr_mask & CEC_LOG_ADDR_MASK_AUDIOSYSTEM;
-}
-
-static inline bool cec_has_backup(__u16 log_addr_mask)
-{
-	return log_addr_mask & CEC_LOG_ADDR_MASK_BACKUP;
-}
-
-static inline bool cec_has_specific(__u16 log_addr_mask)
-{
-	return log_addr_mask & CEC_LOG_ADDR_MASK_SPECIFIC;
-}
-
-static inline bool cec_is_unregistered(__u16 log_addr_mask)
-{
-	return log_addr_mask & CEC_LOG_ADDR_MASK_UNREGISTERED;
-}
-
-static inline bool cec_is_unconfigured(__u16 log_addr_mask)
-{
-	return log_addr_mask == 0;
-}
-
-/*
- * Use this if there is no vendor ID (CEC_G_VENDOR_ID) or if the vendor ID
- * should be disabled (CEC_S_VENDOR_ID)
- */
-#define CEC_VENDOR_ID_NONE		0xffffffff
-
-/* The message handling modes */
-/* Modes for initiator */
-#define CEC_MODE_NO_INITIATOR		(0x0 << 0)
-#define CEC_MODE_INITIATOR		(0x1 << 0)
-#define CEC_MODE_EXCL_INITIATOR		(0x2 << 0)
-#define CEC_MODE_INITIATOR_MSK		0x0f
-
-/* Modes for follower */
-#define CEC_MODE_NO_FOLLOWER		(0x0 << 4)
-#define CEC_MODE_FOLLOWER		(0x1 << 4)
-#define CEC_MODE_EXCL_FOLLOWER		(0x2 << 4)
-#define CEC_MODE_EXCL_FOLLOWER_PASSTHRU	(0x3 << 4)
-#define CEC_MODE_MONITOR		(0xe << 4)
-#define CEC_MODE_MONITOR_ALL		(0xf << 4)
-#define CEC_MODE_FOLLOWER_MSK		0xf0
-
-/* Userspace has to configure the physical address */
-#define CEC_CAP_PHYS_ADDR	(1 << 0)
-/* Userspace has to configure the logical addresses */
-#define CEC_CAP_LOG_ADDRS	(1 << 1)
-/* Userspace can transmit messages (and thus become follower as well) */
-#define CEC_CAP_TRANSMIT	(1 << 2)
-/*
- * Passthrough all messages instead of processing them.
- */
-#define CEC_CAP_PASSTHROUGH	(1 << 3)
-/* Supports remote control */
-#define CEC_CAP_RC		(1 << 4)
-/* Hardware can monitor all messages, not just directed and broadcast. */
-#define CEC_CAP_MONITOR_ALL	(1 << 5)
-
-/**
- * struct cec_caps - CEC capabilities structure.
- * @driver: name of the CEC device driver.
- * @name: name of the CEC device. @driver + @name must be unique.
- * @available_log_addrs: number of available logical addresses.
- * @capabilities: capabilities of the CEC adapter.
- * @version: version of the CEC adapter framework.
- */
-struct cec_caps {
-	char driver[32];
-	char name[32];
-	__u32 available_log_addrs;
-	__u32 capabilities;
-	__u32 version;
-};
-
-/**
- * struct cec_log_addrs - CEC logical addresses structure.
- * @log_addr: the claimed logical addresses. Set by the driver.
- * @log_addr_mask: current logical address mask. Set by the driver.
- * @cec_version: the CEC version that the adapter should implement. Set by the
- *	caller.
- * @num_log_addrs: how many logical addresses should be claimed. Set by the
- *	caller.
- * @vendor_id: the vendor ID of the device. Set by the caller.
- * @flags: flags.
- * @osd_name: the OSD name of the device. Set by the caller.
- * @primary_device_type: the primary device type for each logical address.
- *	Set by the caller.
- * @log_addr_type: the logical address types. Set by the caller.
- * @all_device_types: CEC 2.0: all device types represented by the logical
- *	address. Set by the caller.
- * @features:	CEC 2.0: The logical address features. Set by the caller.
- */
-struct cec_log_addrs {
-	__u8 log_addr[CEC_MAX_LOG_ADDRS];
-	__u16 log_addr_mask;
-	__u8 cec_version;
-	__u8 num_log_addrs;
-	__u32 vendor_id;
-	__u32 flags;
-	char osd_name[15];
-	__u8 primary_device_type[CEC_MAX_LOG_ADDRS];
-	__u8 log_addr_type[CEC_MAX_LOG_ADDRS];
-
-	/* CEC 2.0 */
-	__u8 all_device_types[CEC_MAX_LOG_ADDRS];
-	__u8 features[CEC_MAX_LOG_ADDRS][12];
-};
-
-/* Allow a fallback to unregistered */
-#define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK	(1 << 0)
-/* Passthrough RC messages to the input subsystem */
-#define CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU	(1 << 1)
-/* CDC-Only device: supports only CDC messages */
-#define CEC_LOG_ADDRS_FL_CDC_ONLY		(1 << 2)
-
-/* Events */
-
-/* Event that occurs when the adapter state changes */
-#define CEC_EVENT_STATE_CHANGE		1
-/*
- * This event is sent when messages are lost because the application
- * didn't empty the message queue in time
- */
-#define CEC_EVENT_LOST_MSGS		2
-
-#define CEC_EVENT_FL_INITIAL_STATE	(1 << 0)
-
-/**
- * struct cec_event_state_change - used when the CEC adapter changes state.
- * @phys_addr: the current physical address
- * @log_addr_mask: the current logical address mask
- */
-struct cec_event_state_change {
-	__u16 phys_addr;
-	__u16 log_addr_mask;
-};
-
-/**
- * struct cec_event_lost_msgs - tells you how many messages were lost due.
- * @lost_msgs: how many messages were lost.
- */
-struct cec_event_lost_msgs {
-	__u32 lost_msgs;
-};
-
-/**
- * struct cec_event - CEC event structure
- * @ts: the timestamp of when the event was sent.
- * @event: the event.
- * array.
- * @state_change: the event payload for CEC_EVENT_STATE_CHANGE.
- * @lost_msgs: the event payload for CEC_EVENT_LOST_MSGS.
- * @raw: array to pad the union.
- */
-struct cec_event {
-	__u64 ts;
-	__u32 event;
-	__u32 flags;
-	union {
-		struct cec_event_state_change state_change;
-		struct cec_event_lost_msgs lost_msgs;
-		__u32 raw[16];
-	};
-};
-
-/* ioctls */
-
-/* Adapter capabilities */
-#define CEC_ADAP_G_CAPS		_IOWR('a',  0, struct cec_caps)
-
-/*
- * phys_addr is either 0 (if this is the CEC root device)
- * or a valid physical address obtained from the sink's EDID
- * as read by this CEC device (if this is a source device)
- * or a physical address obtained and modified from a sink
- * EDID and used for a sink CEC device.
- * If nothing is connected, then phys_addr is 0xffff.
- * See HDMI 1.4b, section 8.7 (Physical Address).
- *
- * The CEC_ADAP_S_PHYS_ADDR ioctl may not be available if that is handled
- * internally.
- */
-#define CEC_ADAP_G_PHYS_ADDR	_IOR('a',  1, __u16)
-#define CEC_ADAP_S_PHYS_ADDR	_IOW('a',  2, __u16)
-
-/*
- * Configure the CEC adapter. It sets the device type and which
- * logical types it will try to claim. It will return which
- * logical addresses it could actually claim.
- * An error is returned if the adapter is disabled or if there
- * is no physical address assigned.
- */
-
-#define CEC_ADAP_G_LOG_ADDRS	_IOR('a',  3, struct cec_log_addrs)
-#define CEC_ADAP_S_LOG_ADDRS	_IOWR('a',  4, struct cec_log_addrs)
-
-/* Transmit/receive a CEC command */
-#define CEC_TRANSMIT		_IOWR('a',  5, struct cec_msg)
-#define CEC_RECEIVE		_IOWR('a',  6, struct cec_msg)
-
-/* Dequeue CEC events */
-#define CEC_DQEVENT		_IOWR('a',  7, struct cec_event)
-
-/*
- * Get and set the message handling mode for this filehandle.
- */
-#define CEC_G_MODE		_IOR('a',  8, __u32)
-#define CEC_S_MODE		_IOW('a',  9, __u32)
-
-/*
- * The remainder of this header defines all CEC messages and operands.
- * The format matters since it the cec-ctl utility parses it to generate
- * code for implementing all these messages.
- *
- * Comments ending with 'Feature' group messages for each feature.
- * If messages are part of multiple features, then the "Has also"
- * comment is used to list the previously defined messages that are
- * supported by the feature.
- *
- * Before operands are defined a comment is added that gives the
- * name of the operand and in brackets the variable name of the
- * corresponding argument in the cec-funcs.h function.
- */
-
-/* Messages */
-
-/* One Touch Play Feature */
-#define CEC_MSG_ACTIVE_SOURCE				0x82
-#define CEC_MSG_IMAGE_VIEW_ON				0x04
-#define CEC_MSG_TEXT_VIEW_ON				0x0d
-
-
-/* Routing Control Feature */
-
-/*
- * Has also:
- *	CEC_MSG_ACTIVE_SOURCE
- */
-
-#define CEC_MSG_INACTIVE_SOURCE				0x9d
-#define CEC_MSG_REQUEST_ACTIVE_SOURCE			0x85
-#define CEC_MSG_ROUTING_CHANGE				0x80
-#define CEC_MSG_ROUTING_INFORMATION			0x81
-#define CEC_MSG_SET_STREAM_PATH				0x86
-
-
-/* Standby Feature */
-#define CEC_MSG_STANDBY					0x36
-
-
-/* One Touch Record Feature */
-#define CEC_MSG_RECORD_OFF				0x0b
-#define CEC_MSG_RECORD_ON				0x09
-/* Record Source Type Operand (rec_src_type) */
-#define CEC_OP_RECORD_SRC_OWN				1
-#define CEC_OP_RECORD_SRC_DIGITAL			2
-#define CEC_OP_RECORD_SRC_ANALOG			3
-#define CEC_OP_RECORD_SRC_EXT_PLUG			4
-#define CEC_OP_RECORD_SRC_EXT_PHYS_ADDR			5
-/* Service Identification Method Operand (service_id_method) */
-#define CEC_OP_SERVICE_ID_METHOD_BY_DIG_ID		0
-#define CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL		1
-/* Digital Service Broadcast System Operand (dig_bcast_system) */
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_GEN	0x00
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_GEN	0x01
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_GEN		0x02
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_BS		0x08
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_CS		0x09
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_T		0x0a
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_CABLE	0x10
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_SAT	0x11
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_T		0x12
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_C		0x18
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_S		0x19
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_S2		0x1a
-#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_T		0x1b
-/* Analogue Broadcast Type Operand (ana_bcast_type) */
-#define CEC_OP_ANA_BCAST_TYPE_CABLE			0
-#define CEC_OP_ANA_BCAST_TYPE_SATELLITE			1
-#define CEC_OP_ANA_BCAST_TYPE_TERRESTRIAL		2
-/* Broadcast System Operand (bcast_system) */
-#define CEC_OP_BCAST_SYSTEM_PAL_BG			0x00
-#define CEC_OP_BCAST_SYSTEM_SECAM_LQ			0x01 /* SECAM L' */
-#define CEC_OP_BCAST_SYSTEM_PAL_M			0x02
-#define CEC_OP_BCAST_SYSTEM_NTSC_M			0x03
-#define CEC_OP_BCAST_SYSTEM_PAL_I			0x04
-#define CEC_OP_BCAST_SYSTEM_SECAM_DK			0x05
-#define CEC_OP_BCAST_SYSTEM_SECAM_BG			0x06
-#define CEC_OP_BCAST_SYSTEM_SECAM_L			0x07
-#define CEC_OP_BCAST_SYSTEM_PAL_DK			0x08
-#define CEC_OP_BCAST_SYSTEM_OTHER			0x1f
-/* Channel Number Format Operand (channel_number_fmt) */
-#define CEC_OP_CHANNEL_NUMBER_FMT_1_PART		0x01
-#define CEC_OP_CHANNEL_NUMBER_FMT_2_PART		0x02
-
-#define CEC_MSG_RECORD_STATUS				0x0a
-/* Record Status Operand (rec_status) */
-#define CEC_OP_RECORD_STATUS_CUR_SRC			0x01
-#define CEC_OP_RECORD_STATUS_DIG_SERVICE		0x02
-#define CEC_OP_RECORD_STATUS_ANA_SERVICE		0x03
-#define CEC_OP_RECORD_STATUS_EXT_INPUT			0x04
-#define CEC_OP_RECORD_STATUS_NO_DIG_SERVICE		0x05
-#define CEC_OP_RECORD_STATUS_NO_ANA_SERVICE		0x06
-#define CEC_OP_RECORD_STATUS_NO_SERVICE			0x07
-#define CEC_OP_RECORD_STATUS_INVALID_EXT_PLUG		0x09
-#define CEC_OP_RECORD_STATUS_INVALID_EXT_PHYS_ADDR	0x0a
-#define CEC_OP_RECORD_STATUS_UNSUP_CA			0x0b
-#define CEC_OP_RECORD_STATUS_NO_CA_ENTITLEMENTS		0x0c
-#define CEC_OP_RECORD_STATUS_CANT_COPY_SRC		0x0d
-#define CEC_OP_RECORD_STATUS_NO_MORE_COPIES		0x0e
-#define CEC_OP_RECORD_STATUS_NO_MEDIA			0x10
-#define CEC_OP_RECORD_STATUS_PLAYING			0x11
-#define CEC_OP_RECORD_STATUS_ALREADY_RECORDING		0x12
-#define CEC_OP_RECORD_STATUS_MEDIA_PROT			0x13
-#define CEC_OP_RECORD_STATUS_NO_SIGNAL			0x14
-#define CEC_OP_RECORD_STATUS_MEDIA_PROBLEM		0x15
-#define CEC_OP_RECORD_STATUS_NO_SPACE			0x16
-#define CEC_OP_RECORD_STATUS_PARENTAL_LOCK		0x17
-#define CEC_OP_RECORD_STATUS_TERMINATED_OK		0x1a
-#define CEC_OP_RECORD_STATUS_ALREADY_TERM		0x1b
-#define CEC_OP_RECORD_STATUS_OTHER			0x1f
-
-#define CEC_MSG_RECORD_TV_SCREEN			0x0f
-
-
-/* Timer Programming Feature */
-#define CEC_MSG_CLEAR_ANALOGUE_TIMER			0x33
-/* Recording Sequence Operand (recording_seq) */
-#define CEC_OP_REC_SEQ_SUNDAY				0x01
-#define CEC_OP_REC_SEQ_MONDAY				0x02
-#define CEC_OP_REC_SEQ_TUESDAY				0x04
-#define CEC_OP_REC_SEQ_WEDNESDAY			0x08
-#define CEC_OP_REC_SEQ_THURSDAY				0x10
-#define CEC_OP_REC_SEQ_FRIDAY				0x20
-#define CEC_OP_REC_SEQ_SATERDAY				0x40
-#define CEC_OP_REC_SEQ_ONCE_ONLY			0x00
-
-#define CEC_MSG_CLEAR_DIGITAL_TIMER			0x99
-
-#define CEC_MSG_CLEAR_EXT_TIMER				0xa1
-/* External Source Specifier Operand (ext_src_spec) */
-#define CEC_OP_EXT_SRC_PLUG				0x04
-#define CEC_OP_EXT_SRC_PHYS_ADDR			0x05
-
-#define CEC_MSG_SET_ANALOGUE_TIMER			0x34
-#define CEC_MSG_SET_DIGITAL_TIMER			0x97
-#define CEC_MSG_SET_EXT_TIMER				0xa2
-
-#define CEC_MSG_SET_TIMER_PROGRAM_TITLE			0x67
-#define CEC_MSG_TIMER_CLEARED_STATUS			0x43
-/* Timer Cleared Status Data Operand (timer_cleared_status) */
-#define CEC_OP_TIMER_CLR_STAT_RECORDING			0x00
-#define CEC_OP_TIMER_CLR_STAT_NO_MATCHING		0x01
-#define CEC_OP_TIMER_CLR_STAT_NO_INFO			0x02
-#define CEC_OP_TIMER_CLR_STAT_CLEARED			0x80
-
-#define CEC_MSG_TIMER_STATUS				0x35
-/* Timer Overlap Warning Operand (timer_overlap_warning) */
-#define CEC_OP_TIMER_OVERLAP_WARNING_NO_OVERLAP		0
-#define CEC_OP_TIMER_OVERLAP_WARNING_OVERLAP		1
-/* Media Info Operand (media_info) */
-#define CEC_OP_MEDIA_INFO_UNPROT_MEDIA			0
-#define CEC_OP_MEDIA_INFO_PROT_MEDIA			1
-#define CEC_OP_MEDIA_INFO_NO_MEDIA			2
-/* Programmed Indicator Operand (prog_indicator) */
-#define CEC_OP_PROG_IND_NOT_PROGRAMMED			0
-#define CEC_OP_PROG_IND_PROGRAMMED			1
-/* Programmed Info Operand (prog_info) */
-#define CEC_OP_PROG_INFO_ENOUGH_SPACE			0x08
-#define CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE		0x09
-#define CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE	0x0b
-#define CEC_OP_PROG_INFO_NONE_AVAILABLE			0x0a
-/* Not Programmed Error Info Operand (prog_error) */
-#define CEC_OP_PROG_ERROR_NO_FREE_TIMER			0x01
-#define CEC_OP_PROG_ERROR_DATE_OUT_OF_RANGE		0x02
-#define CEC_OP_PROG_ERROR_REC_SEQ_ERROR			0x03
-#define CEC_OP_PROG_ERROR_INV_EXT_PLUG			0x04
-#define CEC_OP_PROG_ERROR_INV_EXT_PHYS_ADDR		0x05
-#define CEC_OP_PROG_ERROR_CA_UNSUPP			0x06
-#define CEC_OP_PROG_ERROR_INSUF_CA_ENTITLEMENTS		0x07
-#define CEC_OP_PROG_ERROR_RESOLUTION_UNSUPP		0x08
-#define CEC_OP_PROG_ERROR_PARENTAL_LOCK			0x09
-#define CEC_OP_PROG_ERROR_CLOCK_FAILURE			0x0a
-#define CEC_OP_PROG_ERROR_DUPLICATE			0x0e
-
-
-/* System Information Feature */
-#define CEC_MSG_CEC_VERSION				0x9e
-/* CEC Version Operand (cec_version) */
-#define CEC_OP_CEC_VERSION_1_3A				4
-#define CEC_OP_CEC_VERSION_1_4				5
-#define CEC_OP_CEC_VERSION_2_0				6
-
-#define CEC_MSG_GET_CEC_VERSION				0x9f
-#define CEC_MSG_GIVE_PHYSICAL_ADDR			0x83
-#define CEC_MSG_GET_MENU_LANGUAGE			0x91
-#define CEC_MSG_REPORT_PHYSICAL_ADDR			0x84
-/* Primary Device Type Operand (prim_devtype) */
-#define CEC_OP_PRIM_DEVTYPE_TV				0
-#define CEC_OP_PRIM_DEVTYPE_RECORD			1
-#define CEC_OP_PRIM_DEVTYPE_TUNER			3
-#define CEC_OP_PRIM_DEVTYPE_PLAYBACK			4
-#define CEC_OP_PRIM_DEVTYPE_AUDIOSYSTEM			5
-#define CEC_OP_PRIM_DEVTYPE_SWITCH			6
-#define CEC_OP_PRIM_DEVTYPE_PROCESSOR			7
-
-#define CEC_MSG_SET_MENU_LANGUAGE			0x32
-#define CEC_MSG_REPORT_FEATURES				0xa6	/* HDMI 2.0 */
-/* All Device Types Operand (all_device_types) */
-#define CEC_OP_ALL_DEVTYPE_TV				0x80
-#define CEC_OP_ALL_DEVTYPE_RECORD			0x40
-#define CEC_OP_ALL_DEVTYPE_TUNER			0x20
-#define CEC_OP_ALL_DEVTYPE_PLAYBACK			0x10
-#define CEC_OP_ALL_DEVTYPE_AUDIOSYSTEM			0x08
-#define CEC_OP_ALL_DEVTYPE_SWITCH			0x04
-/*
- * And if you wondering what happened to PROCESSOR devices: those should
- * be mapped to a SWITCH.
- */
-
-/* Valid for RC Profile and Device Feature operands */
-#define CEC_OP_FEAT_EXT					0x80	/* Extension bit */
-/* RC Profile Operand (rc_profile) */
-#define CEC_OP_FEAT_RC_TV_PROFILE_NONE			0x00
-#define CEC_OP_FEAT_RC_TV_PROFILE_1			0x02
-#define CEC_OP_FEAT_RC_TV_PROFILE_2			0x06
-#define CEC_OP_FEAT_RC_TV_PROFILE_3			0x0a
-#define CEC_OP_FEAT_RC_TV_PROFILE_4			0x0e
-#define CEC_OP_FEAT_RC_SRC_HAS_DEV_ROOT_MENU		0x50
-#define CEC_OP_FEAT_RC_SRC_HAS_DEV_SETUP_MENU		0x48
-#define CEC_OP_FEAT_RC_SRC_HAS_CONTENTS_MENU		0x44
-#define CEC_OP_FEAT_RC_SRC_HAS_MEDIA_TOP_MENU		0x42
-#define CEC_OP_FEAT_RC_SRC_HAS_MEDIA_CONTEXT_MENU	0x41
-/* Device Feature Operand (dev_features) */
-#define CEC_OP_FEAT_DEV_HAS_RECORD_TV_SCREEN		0x40
-#define CEC_OP_FEAT_DEV_HAS_SET_OSD_STRING		0x20
-#define CEC_OP_FEAT_DEV_HAS_DECK_CONTROL		0x10
-#define CEC_OP_FEAT_DEV_HAS_SET_AUDIO_RATE		0x08
-#define CEC_OP_FEAT_DEV_SINK_HAS_ARC_TX			0x04
-#define CEC_OP_FEAT_DEV_SOURCE_HAS_ARC_RX		0x02
-
-#define CEC_MSG_GIVE_FEATURES				0xa5	/* HDMI 2.0 */
-
-
-/* Deck Control Feature */
-#define CEC_MSG_DECK_CONTROL				0x42
-/* Deck Control Mode Operand (deck_control_mode) */
-#define CEC_OP_DECK_CTL_MODE_SKIP_FWD			1
-#define CEC_OP_DECK_CTL_MODE_SKIP_REV			2
-#define CEC_OP_DECK_CTL_MODE_STOP			3
-#define CEC_OP_DECK_CTL_MODE_EJECT			4
-
-#define CEC_MSG_DECK_STATUS				0x1b
-/* Deck Info Operand (deck_info) */
-#define CEC_OP_DECK_INFO_PLAY				0x11
-#define CEC_OP_DECK_INFO_RECORD				0x12
-#define CEC_OP_DECK_INFO_PLAY_REV			0x13
-#define CEC_OP_DECK_INFO_STILL				0x14
-#define CEC_OP_DECK_INFO_SLOW				0x15
-#define CEC_OP_DECK_INFO_SLOW_REV			0x16
-#define CEC_OP_DECK_INFO_FAST_FWD			0x17
-#define CEC_OP_DECK_INFO_FAST_REV			0x18
-#define CEC_OP_DECK_INFO_NO_MEDIA			0x19
-#define CEC_OP_DECK_INFO_STOP				0x1a
-#define CEC_OP_DECK_INFO_SKIP_FWD			0x1b
-#define CEC_OP_DECK_INFO_SKIP_REV			0x1c
-#define CEC_OP_DECK_INFO_INDEX_SEARCH_FWD		0x1d
-#define CEC_OP_DECK_INFO_INDEX_SEARCH_REV		0x1e
-#define CEC_OP_DECK_INFO_OTHER				0x1f
-
-#define CEC_MSG_GIVE_DECK_STATUS			0x1a
-/* Status Request Operand (status_req) */
-#define CEC_OP_STATUS_REQ_ON				1
-#define CEC_OP_STATUS_REQ_OFF				2
-#define CEC_OP_STATUS_REQ_ONCE				3
-
-#define CEC_MSG_PLAY					0x41
-/* Play Mode Operand (play_mode) */
-#define CEC_OP_PLAY_MODE_PLAY_FWD			0x24
-#define CEC_OP_PLAY_MODE_PLAY_REV			0x20
-#define CEC_OP_PLAY_MODE_PLAY_STILL			0x25
-#define CEC_OP_PLAY_MODE_PLAY_FAST_FWD_MIN		0x05
-#define CEC_OP_PLAY_MODE_PLAY_FAST_FWD_MED		0x06
-#define CEC_OP_PLAY_MODE_PLAY_FAST_FWD_MAX		0x07
-#define CEC_OP_PLAY_MODE_PLAY_FAST_REV_MIN		0x09
-#define CEC_OP_PLAY_MODE_PLAY_FAST_REV_MED		0x0a
-#define CEC_OP_PLAY_MODE_PLAY_FAST_REV_MAX		0x0b
-#define CEC_OP_PLAY_MODE_PLAY_SLOW_FWD_MIN		0x15
-#define CEC_OP_PLAY_MODE_PLAY_SLOW_FWD_MED		0x16
-#define CEC_OP_PLAY_MODE_PLAY_SLOW_FWD_MAX		0x17
-#define CEC_OP_PLAY_MODE_PLAY_SLOW_REV_MIN		0x19
-#define CEC_OP_PLAY_MODE_PLAY_SLOW_REV_MED		0x1a
-#define CEC_OP_PLAY_MODE_PLAY_SLOW_REV_MAX		0x1b
-
-
-/* Tuner Control Feature */
-#define CEC_MSG_GIVE_TUNER_DEVICE_STATUS		0x08
-#define CEC_MSG_SELECT_ANALOGUE_SERVICE			0x92
-#define CEC_MSG_SELECT_DIGITAL_SERVICE			0x93
-#define CEC_MSG_TUNER_DEVICE_STATUS			0x07
-/* Recording Flag Operand (rec_flag) */
-#define CEC_OP_REC_FLAG_USED				0
-#define CEC_OP_REC_FLAG_NOT_USED			1
-/* Tuner Display Info Operand (tuner_display_info) */
-#define CEC_OP_TUNER_DISPLAY_INFO_DIGITAL		0
-#define CEC_OP_TUNER_DISPLAY_INFO_NONE			1
-#define CEC_OP_TUNER_DISPLAY_INFO_ANALOGUE		2
-
-#define CEC_MSG_TUNER_STEP_DECREMENT			0x06
-#define CEC_MSG_TUNER_STEP_INCREMENT			0x05
-
-
-/* Vendor Specific Commands Feature */
-
-/*
- * Has also:
- *	CEC_MSG_CEC_VERSION
- *	CEC_MSG_GET_CEC_VERSION
- */
-#define CEC_MSG_DEVICE_VENDOR_ID			0x87
-#define CEC_MSG_GIVE_DEVICE_VENDOR_ID			0x8c
-#define CEC_MSG_VENDOR_COMMAND				0x89
-#define CEC_MSG_VENDOR_COMMAND_WITH_ID			0xa0
-#define CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN		0x8a
-#define CEC_MSG_VENDOR_REMOTE_BUTTON_UP			0x8b
-
-
-/* OSD Display Feature */
-#define CEC_MSG_SET_OSD_STRING				0x64
-/* Display Control Operand (disp_ctl) */
-#define CEC_OP_DISP_CTL_DEFAULT				0x00
-#define CEC_OP_DISP_CTL_UNTIL_CLEARED			0x40
-#define CEC_OP_DISP_CTL_CLEAR				0x80
-
-
-/* Device OSD Transfer Feature */
-#define CEC_MSG_GIVE_OSD_NAME				0x46
-#define CEC_MSG_SET_OSD_NAME				0x47
-
-
-/* Device Menu Control Feature */
-#define CEC_MSG_MENU_REQUEST				0x8d
-/* Menu Request Type Operand (menu_req) */
-#define CEC_OP_MENU_REQUEST_ACTIVATE			0x00
-#define CEC_OP_MENU_REQUEST_DEACTIVATE			0x01
-#define CEC_OP_MENU_REQUEST_QUERY			0x02
-
-#define CEC_MSG_MENU_STATUS				0x8e
-/* Menu State Operand (menu_state) */
-#define CEC_OP_MENU_STATE_ACTIVATED			0x00
-#define CEC_OP_MENU_STATE_DEACTIVATED			0x01
-
-#define CEC_MSG_USER_CONTROL_PRESSED			0x44
-/* UI Broadcast Type Operand (ui_bcast_type) */
-#define CEC_OP_UI_BCAST_TYPE_TOGGLE_ALL			0x00
-#define CEC_OP_UI_BCAST_TYPE_TOGGLE_DIG_ANA		0x01
-#define CEC_OP_UI_BCAST_TYPE_ANALOGUE			0x10
-#define CEC_OP_UI_BCAST_TYPE_ANALOGUE_T			0x20
-#define CEC_OP_UI_BCAST_TYPE_ANALOGUE_CABLE		0x30
-#define CEC_OP_UI_BCAST_TYPE_ANALOGUE_SAT		0x40
-#define CEC_OP_UI_BCAST_TYPE_DIGITAL			0x50
-#define CEC_OP_UI_BCAST_TYPE_DIGITAL_T			0x60
-#define CEC_OP_UI_BCAST_TYPE_DIGITAL_CABLE		0x70
-#define CEC_OP_UI_BCAST_TYPE_DIGITAL_SAT		0x80
-#define CEC_OP_UI_BCAST_TYPE_DIGITAL_COM_SAT		0x90
-#define CEC_OP_UI_BCAST_TYPE_DIGITAL_COM_SAT2		0x91
-#define CEC_OP_UI_BCAST_TYPE_IP				0xa0
-/* UI Sound Presentation Control Operand (ui_snd_pres_ctl) */
-#define CEC_OP_UI_SND_PRES_CTL_DUAL_MONO		0x10
-#define CEC_OP_UI_SND_PRES_CTL_KARAOKE			0x20
-#define CEC_OP_UI_SND_PRES_CTL_DOWNMIX			0x80
-#define CEC_OP_UI_SND_PRES_CTL_REVERB			0x90
-#define CEC_OP_UI_SND_PRES_CTL_EQUALIZER		0xa0
-#define CEC_OP_UI_SND_PRES_CTL_BASS_UP			0xb1
-#define CEC_OP_UI_SND_PRES_CTL_BASS_NEUTRAL		0xb2
-#define CEC_OP_UI_SND_PRES_CTL_BASS_DOWN		0xb3
-#define CEC_OP_UI_SND_PRES_CTL_TREBLE_UP		0xc1
-#define CEC_OP_UI_SND_PRES_CTL_TREBLE_NEUTRAL		0xc2
-#define CEC_OP_UI_SND_PRES_CTL_TREBLE_DOWN		0xc3
-
-#define CEC_MSG_USER_CONTROL_RELEASED			0x45
-
-
-/* Remote Control Passthrough Feature */
-
-/*
- * Has also:
- *	CEC_MSG_USER_CONTROL_PRESSED
- *	CEC_MSG_USER_CONTROL_RELEASED
- */
-
-
-/* Power Status Feature */
-#define CEC_MSG_GIVE_DEVICE_POWER_STATUS		0x8f
-#define CEC_MSG_REPORT_POWER_STATUS			0x90
-/* Power Status Operand (pwr_state) */
-#define CEC_OP_POWER_STATUS_ON				0
-#define CEC_OP_POWER_STATUS_STANDBY			1
-#define CEC_OP_POWER_STATUS_TO_ON			2
-#define CEC_OP_POWER_STATUS_TO_STANDBY			3
-
-
-/* General Protocol Messages */
-#define CEC_MSG_FEATURE_ABORT				0x00
-/* Abort Reason Operand (reason) */
-#define CEC_OP_ABORT_UNRECOGNIZED_OP			0
-#define CEC_OP_ABORT_INCORRECT_MODE			1
-#define CEC_OP_ABORT_NO_SOURCE				2
-#define CEC_OP_ABORT_INVALID_OP				3
-#define CEC_OP_ABORT_REFUSED				4
-#define CEC_OP_ABORT_UNDETERMINED			5
-
-#define CEC_MSG_ABORT					0xff
-
-
-/* System Audio Control Feature */
-
-/*
- * Has also:
- *	CEC_MSG_USER_CONTROL_PRESSED
- *	CEC_MSG_USER_CONTROL_RELEASED
- */
-#define CEC_MSG_GIVE_AUDIO_STATUS			0x71
-#define CEC_MSG_GIVE_SYSTEM_AUDIO_MODE_STATUS		0x7d
-#define CEC_MSG_REPORT_AUDIO_STATUS			0x7a
-/* Audio Mute Status Operand (aud_mute_status) */
-#define CEC_OP_AUD_MUTE_STATUS_OFF			0
-#define CEC_OP_AUD_MUTE_STATUS_ON			1
-
-#define CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR		0xa3
-#define CEC_MSG_REQUEST_SHORT_AUDIO_DESCRIPTOR		0xa4
-#define CEC_MSG_SET_SYSTEM_AUDIO_MODE			0x72
-/* System Audio Status Operand (sys_aud_status) */
-#define CEC_OP_SYS_AUD_STATUS_OFF			0
-#define CEC_OP_SYS_AUD_STATUS_ON			1
-
-#define CEC_MSG_SYSTEM_AUDIO_MODE_REQUEST		0x70
-#define CEC_MSG_SYSTEM_AUDIO_MODE_STATUS		0x7e
-/* Audio Format ID Operand (audio_format_id) */
-#define CEC_OP_AUD_FMT_ID_CEA861			0
-#define CEC_OP_AUD_FMT_ID_CEA861_CXT			1
-
-
-/* Audio Rate Control Feature */
-#define CEC_MSG_SET_AUDIO_RATE				0x9a
-/* Audio Rate Operand (audio_rate) */
-#define CEC_OP_AUD_RATE_OFF				0
-#define CEC_OP_AUD_RATE_WIDE_STD			1
-#define CEC_OP_AUD_RATE_WIDE_FAST			2
-#define CEC_OP_AUD_RATE_WIDE_SLOW			3
-#define CEC_OP_AUD_RATE_NARROW_STD			4
-#define CEC_OP_AUD_RATE_NARROW_FAST			5
-#define CEC_OP_AUD_RATE_NARROW_SLOW			6
-
-
-/* Audio Return Channel Control Feature */
-#define CEC_MSG_INITIATE_ARC				0xc0
-#define CEC_MSG_REPORT_ARC_INITIATED			0xc1
-#define CEC_MSG_REPORT_ARC_TERMINATED			0xc2
-#define CEC_MSG_REQUEST_ARC_INITIATION			0xc3
-#define CEC_MSG_REQUEST_ARC_TERMINATION			0xc4
-#define CEC_MSG_TERMINATE_ARC				0xc5
-
-
-/* Dynamic Audio Lipsync Feature */
-/* Only for CEC 2.0 and up */
-#define CEC_MSG_REQUEST_CURRENT_LATENCY			0xa7
-#define CEC_MSG_REPORT_CURRENT_LATENCY			0xa8
-/* Low Latency Mode Operand (low_latency_mode) */
-#define CEC_OP_LOW_LATENCY_MODE_OFF			0
-#define CEC_OP_LOW_LATENCY_MODE_ON			1
-/* Audio Output Compensated Operand (audio_out_compensated) */
-#define CEC_OP_AUD_OUT_COMPENSATED_NA			0
-#define CEC_OP_AUD_OUT_COMPENSATED_DELAY		1
-#define CEC_OP_AUD_OUT_COMPENSATED_NO_DELAY		2
-#define CEC_OP_AUD_OUT_COMPENSATED_PARTIAL_DELAY	3
-
-
-/* Capability Discovery and Control Feature */
-#define CEC_MSG_CDC_MESSAGE				0xf8
-/* Ethernet-over-HDMI: nobody ever does this... */
-#define CEC_MSG_CDC_HEC_INQUIRE_STATE			0x00
-#define CEC_MSG_CDC_HEC_REPORT_STATE			0x01
-/* HEC Functionality State Operand (hec_func_state) */
-#define CEC_OP_HEC_FUNC_STATE_NOT_SUPPORTED		0
-#define CEC_OP_HEC_FUNC_STATE_INACTIVE			1
-#define CEC_OP_HEC_FUNC_STATE_ACTIVE			2
-#define CEC_OP_HEC_FUNC_STATE_ACTIVATION_FIELD		3
-/* Host Functionality State Operand (host_func_state) */
-#define CEC_OP_HOST_FUNC_STATE_NOT_SUPPORTED		0
-#define CEC_OP_HOST_FUNC_STATE_INACTIVE			1
-#define CEC_OP_HOST_FUNC_STATE_ACTIVE			2
-/* ENC Functionality State Operand (enc_func_state) */
-#define CEC_OP_ENC_FUNC_STATE_EXT_CON_NOT_SUPPORTED	0
-#define CEC_OP_ENC_FUNC_STATE_EXT_CON_INACTIVE		1
-#define CEC_OP_ENC_FUNC_STATE_EXT_CON_ACTIVE		2
-/* CDC Error Code Operand (cdc_errcode) */
-#define CEC_OP_CDC_ERROR_CODE_NONE			0
-#define CEC_OP_CDC_ERROR_CODE_CAP_UNSUPPORTED		1
-#define CEC_OP_CDC_ERROR_CODE_WRONG_STATE		2
-#define CEC_OP_CDC_ERROR_CODE_OTHER			3
-/* HEC Support Operand (hec_support) */
-#define CEC_OP_HEC_SUPPORT_NO				0
-#define CEC_OP_HEC_SUPPORT_YES				1
-/* HEC Activation Operand (hec_activation) */
-#define CEC_OP_HEC_ACTIVATION_ON			0
-#define CEC_OP_HEC_ACTIVATION_OFF			1
-
-#define CEC_MSG_CDC_HEC_SET_STATE_ADJACENT		0x02
-#define CEC_MSG_CDC_HEC_SET_STATE			0x03
-/* HEC Set State Operand (hec_set_state) */
-#define CEC_OP_HEC_SET_STATE_DEACTIVATE			0
-#define CEC_OP_HEC_SET_STATE_ACTIVATE			1
-
-#define CEC_MSG_CDC_HEC_REQUEST_DEACTIVATION		0x04
-#define CEC_MSG_CDC_HEC_NOTIFY_ALIVE			0x05
-#define CEC_MSG_CDC_HEC_DISCOVER			0x06
-/* Hotplug Detect messages */
-#define CEC_MSG_CDC_HPD_SET_STATE			0x10
-/* HPD State Operand (hpd_state) */
-#define CEC_OP_HPD_STATE_CP_EDID_DISABLE		0
-#define CEC_OP_HPD_STATE_CP_EDID_ENABLE			1
-#define CEC_OP_HPD_STATE_CP_EDID_DISABLE_ENABLE		2
-#define CEC_OP_HPD_STATE_EDID_DISABLE			3
-#define CEC_OP_HPD_STATE_EDID_ENABLE			4
-#define CEC_OP_HPD_STATE_EDID_DISABLE_ENABLE		5
-#define CEC_MSG_CDC_HPD_REPORT_STATE			0x11
-/* HPD Error Code Operand (hpd_error) */
-#define CEC_OP_HPD_ERROR_NONE				0
-#define CEC_OP_HPD_ERROR_INITIATOR_NOT_CAPABLE		1
-#define CEC_OP_HPD_ERROR_INITIATOR_WRONG_STATE		2
-#define CEC_OP_HPD_ERROR_OTHER				3
-#define CEC_OP_HPD_ERROR_NONE_NO_VIDEO			4
-
-/* End of Messages */
-
-/* Helper functions to identify the 'special' CEC devices */
-
-static inline bool cec_is_2nd_tv(const struct cec_log_addrs *las)
-{
-	/*
-	 * It is a second TV if the logical address is 14 or 15 and the
-	 * primary device type is a TV.
-	 */
-	return las->num_log_addrs &&
-	       las->log_addr[0] >= CEC_LOG_ADDR_SPECIFIC &&
-	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_TV;
-}
-
-static inline bool cec_is_processor(const struct cec_log_addrs *las)
-{
-	/*
-	 * It is a processor if the logical address is 12-15 and the
-	 * primary device type is a Processor.
-	 */
-	return las->num_log_addrs &&
-	       las->log_addr[0] >= CEC_LOG_ADDR_BACKUP_1 &&
-	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_PROCESSOR;
-}
-
-static inline bool cec_is_switch(const struct cec_log_addrs *las)
-{
-	/*
-	 * It is a switch if the logical address is 15 and the
-	 * primary device type is a Switch and the CDC-Only flag is not set.
-	 */
-	return las->num_log_addrs == 1 &&
-	       las->log_addr[0] == CEC_LOG_ADDR_UNREGISTERED &&
-	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_SWITCH &&
-	       !(las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY);
-}
-
-static inline bool cec_is_cdc_only(const struct cec_log_addrs *las)
-{
-	/*
-	 * It is a CDC-only device if the logical address is 15 and the
-	 * primary device type is a Switch and the CDC-Only flag is set.
-	 */
-	return las->num_log_addrs == 1 &&
-	       las->log_addr[0] == CEC_LOG_ADDR_UNREGISTERED &&
-	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_SWITCH &&
-	       (las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY);
-}
-
-#endif
diff --git a/include/media/cec.h b/include/media/cec.h
index fdb5d600e4bb..717eaf552f3d 100644
--- a/include/media/cec.h
+++ b/include/media/cec.h
@@ -196,7 +196,7 @@ static inline bool cec_is_sink(const struct cec_adapter *adap)
 	return adap->phys_addr == 0;
 }
 
-#if IS_ENABLED(CONFIG_MEDIA_CEC)
+#if IS_ENABLED(CONFIG_MEDIA_CEC_SUPPORT)
 struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
 		void *priv, const char *name, u32 caps, u8 available_las,
 		struct device *parent);
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 6965d0909554..c49c448cff92 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -82,6 +82,8 @@ header-y += capi.h
 header-y += cciss_defs.h
 header-y += cciss_ioctl.h
 header-y += cdrom.h
+header-y += cec.h
+header-y += cec-funcs.h
 header-y += cgroupstats.h
 header-y += chio.h
 header-y += cm4000_cs.h
diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h
new file mode 100644
index 000000000000..1a1de2169f48
--- /dev/null
+++ b/include/uapi/linux/cec-funcs.h
@@ -0,0 +1,1965 @@
+/*
+ * cec - HDMI Consumer Electronics Control message functions
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * Alternatively you can redistribute this file under the terms of the
+ * BSD license as stated below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. The names of its contributors may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CEC_UAPI_FUNCS_H
+#define _CEC_UAPI_FUNCS_H
+
+#include <linux/cec.h>
+
+/* One Touch Play Feature */
+static inline void cec_msg_active_source(struct cec_msg *msg, __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_ACTIVE_SOURCE;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+}
+
+static inline void cec_ops_active_source(const struct cec_msg *msg,
+					 __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_image_view_on(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_IMAGE_VIEW_ON;
+}
+
+static inline void cec_msg_text_view_on(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_TEXT_VIEW_ON;
+}
+
+
+/* Routing Control Feature */
+static inline void cec_msg_inactive_source(struct cec_msg *msg,
+					   __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[1] = CEC_MSG_INACTIVE_SOURCE;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+}
+
+static inline void cec_ops_inactive_source(const struct cec_msg *msg,
+					   __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_request_active_source(struct cec_msg *msg,
+						 bool reply)
+{
+	msg->len = 2;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REQUEST_ACTIVE_SOURCE;
+	msg->reply = reply ? CEC_MSG_ACTIVE_SOURCE : 0;
+}
+
+static inline void cec_msg_routing_information(struct cec_msg *msg,
+					       __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_ROUTING_INFORMATION;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+}
+
+static inline void cec_ops_routing_information(const struct cec_msg *msg,
+					       __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_routing_change(struct cec_msg *msg,
+					  bool reply,
+					  __u16 orig_phys_addr,
+					  __u16 new_phys_addr)
+{
+	msg->len = 6;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_ROUTING_CHANGE;
+	msg->msg[2] = orig_phys_addr >> 8;
+	msg->msg[3] = orig_phys_addr & 0xff;
+	msg->msg[4] = new_phys_addr >> 8;
+	msg->msg[5] = new_phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_ROUTING_INFORMATION : 0;
+}
+
+static inline void cec_ops_routing_change(const struct cec_msg *msg,
+					  __u16 *orig_phys_addr,
+					  __u16 *new_phys_addr)
+{
+	*orig_phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*new_phys_addr = (msg->msg[4] << 8) | msg->msg[5];
+}
+
+static inline void cec_msg_set_stream_path(struct cec_msg *msg, __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_SET_STREAM_PATH;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+}
+
+static inline void cec_ops_set_stream_path(const struct cec_msg *msg,
+					   __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+
+/* Standby Feature */
+static inline void cec_msg_standby(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_STANDBY;
+}
+
+
+/* One Touch Record Feature */
+static inline void cec_msg_record_off(struct cec_msg *msg, bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_RECORD_OFF;
+	msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
+}
+
+struct cec_op_arib_data {
+	__u16 transport_id;
+	__u16 service_id;
+	__u16 orig_network_id;
+};
+
+struct cec_op_atsc_data {
+	__u16 transport_id;
+	__u16 program_number;
+};
+
+struct cec_op_dvb_data {
+	__u16 transport_id;
+	__u16 service_id;
+	__u16 orig_network_id;
+};
+
+struct cec_op_channel_data {
+	__u8 channel_number_fmt;
+	__u16 major;
+	__u16 minor;
+};
+
+struct cec_op_digital_service_id {
+	__u8 service_id_method;
+	__u8 dig_bcast_system;
+	union {
+		struct cec_op_arib_data arib;
+		struct cec_op_atsc_data atsc;
+		struct cec_op_dvb_data dvb;
+		struct cec_op_channel_data channel;
+	};
+};
+
+struct cec_op_record_src {
+	__u8 type;
+	union {
+		struct cec_op_digital_service_id digital;
+		struct {
+			__u8 ana_bcast_type;
+			__u16 ana_freq;
+			__u8 bcast_system;
+		} analog;
+		struct {
+			__u8 plug;
+		} ext_plug;
+		struct {
+			__u16 phys_addr;
+		} ext_phys_addr;
+	};
+};
+
+static inline void cec_set_digital_service_id(__u8 *msg,
+	      const struct cec_op_digital_service_id *digital)
+{
+	*msg++ = (digital->service_id_method << 7) | digital->dig_bcast_system;
+	if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) {
+		*msg++ = (digital->channel.channel_number_fmt << 2) |
+			 (digital->channel.major >> 8);
+		*msg++ = digital->channel.major & 0xff;
+		*msg++ = digital->channel.minor >> 8;
+		*msg++ = digital->channel.minor & 0xff;
+		*msg++ = 0;
+		*msg++ = 0;
+		return;
+	}
+	switch (digital->dig_bcast_system) {
+	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_GEN:
+	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_CABLE:
+	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_SAT:
+	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_T:
+		*msg++ = digital->atsc.transport_id >> 8;
+		*msg++ = digital->atsc.transport_id & 0xff;
+		*msg++ = digital->atsc.program_number >> 8;
+		*msg++ = digital->atsc.program_number & 0xff;
+		*msg++ = 0;
+		*msg++ = 0;
+		break;
+	default:
+		*msg++ = digital->dvb.transport_id >> 8;
+		*msg++ = digital->dvb.transport_id & 0xff;
+		*msg++ = digital->dvb.service_id >> 8;
+		*msg++ = digital->dvb.service_id & 0xff;
+		*msg++ = digital->dvb.orig_network_id >> 8;
+		*msg++ = digital->dvb.orig_network_id & 0xff;
+		break;
+	}
+}
+
+static inline void cec_get_digital_service_id(const __u8 *msg,
+	      struct cec_op_digital_service_id *digital)
+{
+	digital->service_id_method = msg[0] >> 7;
+	digital->dig_bcast_system = msg[0] & 0x7f;
+	if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) {
+		digital->channel.channel_number_fmt = msg[1] >> 2;
+		digital->channel.major = ((msg[1] & 3) << 6) | msg[2];
+		digital->channel.minor = (msg[3] << 8) | msg[4];
+		return;
+	}
+	digital->dvb.transport_id = (msg[1] << 8) | msg[2];
+	digital->dvb.service_id = (msg[3] << 8) | msg[4];
+	digital->dvb.orig_network_id = (msg[5] << 8) | msg[6];
+}
+
+static inline void cec_msg_record_on_own(struct cec_msg *msg)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_OWN;
+}
+
+static inline void cec_msg_record_on_digital(struct cec_msg *msg,
+			     const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 10;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_DIGITAL;
+	cec_set_digital_service_id(msg->msg + 3, digital);
+}
+
+static inline void cec_msg_record_on_analog(struct cec_msg *msg,
+					    __u8 ana_bcast_type,
+					    __u16 ana_freq,
+					    __u8 bcast_system)
+{
+	msg->len = 7;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_ANALOG;
+	msg->msg[3] = ana_bcast_type;
+	msg->msg[4] = ana_freq >> 8;
+	msg->msg[5] = ana_freq & 0xff;
+	msg->msg[6] = bcast_system;
+}
+
+static inline void cec_msg_record_on_plug(struct cec_msg *msg,
+					  __u8 plug)
+{
+	msg->len = 4;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_EXT_PLUG;
+	msg->msg[3] = plug;
+}
+
+static inline void cec_msg_record_on_phys_addr(struct cec_msg *msg,
+					       __u16 phys_addr)
+{
+	msg->len = 5;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_EXT_PHYS_ADDR;
+	msg->msg[3] = phys_addr >> 8;
+	msg->msg[4] = phys_addr & 0xff;
+}
+
+static inline void cec_msg_record_on(struct cec_msg *msg,
+				     bool reply,
+				     const struct cec_op_record_src *rec_src)
+{
+	switch (rec_src->type) {
+	case CEC_OP_RECORD_SRC_OWN:
+		cec_msg_record_on_own(msg);
+		break;
+	case CEC_OP_RECORD_SRC_DIGITAL:
+		cec_msg_record_on_digital(msg, &rec_src->digital);
+		break;
+	case CEC_OP_RECORD_SRC_ANALOG:
+		cec_msg_record_on_analog(msg,
+					 rec_src->analog.ana_bcast_type,
+					 rec_src->analog.ana_freq,
+					 rec_src->analog.bcast_system);
+		break;
+	case CEC_OP_RECORD_SRC_EXT_PLUG:
+		cec_msg_record_on_plug(msg, rec_src->ext_plug.plug);
+		break;
+	case CEC_OP_RECORD_SRC_EXT_PHYS_ADDR:
+		cec_msg_record_on_phys_addr(msg,
+					    rec_src->ext_phys_addr.phys_addr);
+		break;
+	}
+	msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
+}
+
+static inline void cec_ops_record_on(const struct cec_msg *msg,
+				     struct cec_op_record_src *rec_src)
+{
+	rec_src->type = msg->msg[2];
+	switch (rec_src->type) {
+	case CEC_OP_RECORD_SRC_OWN:
+		break;
+	case CEC_OP_RECORD_SRC_DIGITAL:
+		cec_get_digital_service_id(msg->msg + 3, &rec_src->digital);
+		break;
+	case CEC_OP_RECORD_SRC_ANALOG:
+		rec_src->analog.ana_bcast_type = msg->msg[3];
+		rec_src->analog.ana_freq =
+			(msg->msg[4] << 8) | msg->msg[5];
+		rec_src->analog.bcast_system = msg->msg[6];
+		break;
+	case CEC_OP_RECORD_SRC_EXT_PLUG:
+		rec_src->ext_plug.plug = msg->msg[3];
+		break;
+	case CEC_OP_RECORD_SRC_EXT_PHYS_ADDR:
+		rec_src->ext_phys_addr.phys_addr =
+			(msg->msg[3] << 8) | msg->msg[4];
+		break;
+	}
+}
+
+static inline void cec_msg_record_status(struct cec_msg *msg, __u8 rec_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_RECORD_STATUS;
+	msg->msg[2] = rec_status;
+}
+
+static inline void cec_ops_record_status(const struct cec_msg *msg,
+					 __u8 *rec_status)
+{
+	*rec_status = msg->msg[2];
+}
+
+static inline void cec_msg_record_tv_screen(struct cec_msg *msg,
+					    bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_RECORD_TV_SCREEN;
+	msg->reply = reply ? CEC_MSG_RECORD_ON : 0;
+}
+
+
+/* Timer Programming Feature */
+static inline void cec_msg_timer_status(struct cec_msg *msg,
+					__u8 timer_overlap_warning,
+					__u8 media_info,
+					__u8 prog_info,
+					__u8 prog_error,
+					__u8 duration_hr,
+					__u8 duration_min)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_TIMER_STATUS;
+	msg->msg[2] = (timer_overlap_warning << 7) |
+		(media_info << 5) |
+		(prog_info ? 0x10 : 0) |
+		(prog_info ? prog_info : prog_error);
+	if (prog_info == CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE ||
+	    prog_info == CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE ||
+	    prog_error == CEC_OP_PROG_ERROR_DUPLICATE) {
+		msg->len += 2;
+		msg->msg[3] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+		msg->msg[4] = ((duration_min / 10) << 4) | (duration_min % 10);
+	}
+}
+
+static inline void cec_ops_timer_status(const struct cec_msg *msg,
+					__u8 *timer_overlap_warning,
+					__u8 *media_info,
+					__u8 *prog_info,
+					__u8 *prog_error,
+					__u8 *duration_hr,
+					__u8 *duration_min)
+{
+	*timer_overlap_warning = msg->msg[2] >> 7;
+	*media_info = (msg->msg[2] >> 5) & 3;
+	if (msg->msg[2] & 0x10) {
+		*prog_info = msg->msg[2] & 0xf;
+		*prog_error = 0;
+	} else {
+		*prog_info = 0;
+		*prog_error = msg->msg[2] & 0xf;
+	}
+	if (*prog_info == CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE ||
+	    *prog_info == CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE ||
+	    *prog_error == CEC_OP_PROG_ERROR_DUPLICATE) {
+		*duration_hr = (msg->msg[3] >> 4) * 10 + (msg->msg[3] & 0xf);
+		*duration_min = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	} else {
+		*duration_hr = *duration_min = 0;
+	}
+}
+
+static inline void cec_msg_timer_cleared_status(struct cec_msg *msg,
+						__u8 timer_cleared_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_TIMER_CLEARED_STATUS;
+	msg->msg[2] = timer_cleared_status;
+}
+
+static inline void cec_ops_timer_cleared_status(const struct cec_msg *msg,
+						__u8 *timer_cleared_status)
+{
+	*timer_cleared_status = msg->msg[2];
+}
+
+static inline void cec_msg_clear_analogue_timer(struct cec_msg *msg,
+						bool reply,
+						__u8 day,
+						__u8 month,
+						__u8 start_hr,
+						__u8 start_min,
+						__u8 duration_hr,
+						__u8 duration_min,
+						__u8 recording_seq,
+						__u8 ana_bcast_type,
+						__u16 ana_freq,
+						__u8 bcast_system)
+{
+	msg->len = 13;
+	msg->msg[1] = CEC_MSG_CLEAR_ANALOGUE_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	msg->msg[9] = ana_bcast_type;
+	msg->msg[10] = ana_freq >> 8;
+	msg->msg[11] = ana_freq & 0xff;
+	msg->msg[12] = bcast_system;
+	msg->reply = reply ? CEC_MSG_TIMER_CLEARED_STATUS : 0;
+}
+
+static inline void cec_ops_clear_analogue_timer(const struct cec_msg *msg,
+						__u8 *day,
+						__u8 *month,
+						__u8 *start_hr,
+						__u8 *start_min,
+						__u8 *duration_hr,
+						__u8 *duration_min,
+						__u8 *recording_seq,
+						__u8 *ana_bcast_type,
+						__u16 *ana_freq,
+						__u8 *bcast_system)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	*ana_bcast_type = msg->msg[9];
+	*ana_freq = (msg->msg[10] << 8) | msg->msg[11];
+	*bcast_system = msg->msg[12];
+}
+
+static inline void cec_msg_clear_digital_timer(struct cec_msg *msg,
+				bool reply,
+				__u8 day,
+				__u8 month,
+				__u8 start_hr,
+				__u8 start_min,
+				__u8 duration_hr,
+				__u8 duration_min,
+				__u8 recording_seq,
+				const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 16;
+	msg->reply = reply ? CEC_MSG_TIMER_CLEARED_STATUS : 0;
+	msg->msg[1] = CEC_MSG_CLEAR_DIGITAL_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	cec_set_digital_service_id(msg->msg + 9, digital);
+}
+
+static inline void cec_ops_clear_digital_timer(const struct cec_msg *msg,
+				__u8 *day,
+				__u8 *month,
+				__u8 *start_hr,
+				__u8 *start_min,
+				__u8 *duration_hr,
+				__u8 *duration_min,
+				__u8 *recording_seq,
+				struct cec_op_digital_service_id *digital)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	cec_get_digital_service_id(msg->msg + 9, digital);
+}
+
+static inline void cec_msg_clear_ext_timer(struct cec_msg *msg,
+					   bool reply,
+					   __u8 day,
+					   __u8 month,
+					   __u8 start_hr,
+					   __u8 start_min,
+					   __u8 duration_hr,
+					   __u8 duration_min,
+					   __u8 recording_seq,
+					   __u8 ext_src_spec,
+					   __u8 plug,
+					   __u16 phys_addr)
+{
+	msg->len = 13;
+	msg->msg[1] = CEC_MSG_CLEAR_EXT_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	msg->msg[9] = ext_src_spec;
+	msg->msg[10] = plug;
+	msg->msg[11] = phys_addr >> 8;
+	msg->msg[12] = phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_TIMER_CLEARED_STATUS : 0;
+}
+
+static inline void cec_ops_clear_ext_timer(const struct cec_msg *msg,
+					   __u8 *day,
+					   __u8 *month,
+					   __u8 *start_hr,
+					   __u8 *start_min,
+					   __u8 *duration_hr,
+					   __u8 *duration_min,
+					   __u8 *recording_seq,
+					   __u8 *ext_src_spec,
+					   __u8 *plug,
+					   __u16 *phys_addr)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	*ext_src_spec = msg->msg[9];
+	*plug = msg->msg[10];
+	*phys_addr = (msg->msg[11] << 8) | msg->msg[12];
+}
+
+static inline void cec_msg_set_analogue_timer(struct cec_msg *msg,
+					      bool reply,
+					      __u8 day,
+					      __u8 month,
+					      __u8 start_hr,
+					      __u8 start_min,
+					      __u8 duration_hr,
+					      __u8 duration_min,
+					      __u8 recording_seq,
+					      __u8 ana_bcast_type,
+					      __u16 ana_freq,
+					      __u8 bcast_system)
+{
+	msg->len = 13;
+	msg->msg[1] = CEC_MSG_SET_ANALOGUE_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	msg->msg[9] = ana_bcast_type;
+	msg->msg[10] = ana_freq >> 8;
+	msg->msg[11] = ana_freq & 0xff;
+	msg->msg[12] = bcast_system;
+	msg->reply = reply ? CEC_MSG_TIMER_STATUS : 0;
+}
+
+static inline void cec_ops_set_analogue_timer(const struct cec_msg *msg,
+					      __u8 *day,
+					      __u8 *month,
+					      __u8 *start_hr,
+					      __u8 *start_min,
+					      __u8 *duration_hr,
+					      __u8 *duration_min,
+					      __u8 *recording_seq,
+					      __u8 *ana_bcast_type,
+					      __u16 *ana_freq,
+					      __u8 *bcast_system)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	*ana_bcast_type = msg->msg[9];
+	*ana_freq = (msg->msg[10] << 8) | msg->msg[11];
+	*bcast_system = msg->msg[12];
+}
+
+static inline void cec_msg_set_digital_timer(struct cec_msg *msg,
+			bool reply,
+			__u8 day,
+			__u8 month,
+			__u8 start_hr,
+			__u8 start_min,
+			__u8 duration_hr,
+			__u8 duration_min,
+			__u8 recording_seq,
+			const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 16;
+	msg->reply = reply ? CEC_MSG_TIMER_STATUS : 0;
+	msg->msg[1] = CEC_MSG_SET_DIGITAL_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	cec_set_digital_service_id(msg->msg + 9, digital);
+}
+
+static inline void cec_ops_set_digital_timer(const struct cec_msg *msg,
+			__u8 *day,
+			__u8 *month,
+			__u8 *start_hr,
+			__u8 *start_min,
+			__u8 *duration_hr,
+			__u8 *duration_min,
+			__u8 *recording_seq,
+			struct cec_op_digital_service_id *digital)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	cec_get_digital_service_id(msg->msg + 9, digital);
+}
+
+static inline void cec_msg_set_ext_timer(struct cec_msg *msg,
+					 bool reply,
+					 __u8 day,
+					 __u8 month,
+					 __u8 start_hr,
+					 __u8 start_min,
+					 __u8 duration_hr,
+					 __u8 duration_min,
+					 __u8 recording_seq,
+					 __u8 ext_src_spec,
+					 __u8 plug,
+					 __u16 phys_addr)
+{
+	msg->len = 13;
+	msg->msg[1] = CEC_MSG_SET_EXT_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	msg->msg[9] = ext_src_spec;
+	msg->msg[10] = plug;
+	msg->msg[11] = phys_addr >> 8;
+	msg->msg[12] = phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_TIMER_STATUS : 0;
+}
+
+static inline void cec_ops_set_ext_timer(const struct cec_msg *msg,
+					 __u8 *day,
+					 __u8 *month,
+					 __u8 *start_hr,
+					 __u8 *start_min,
+					 __u8 *duration_hr,
+					 __u8 *duration_min,
+					 __u8 *recording_seq,
+					 __u8 *ext_src_spec,
+					 __u8 *plug,
+					 __u16 *phys_addr)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	*ext_src_spec = msg->msg[9];
+	*plug = msg->msg[10];
+	*phys_addr = (msg->msg[11] << 8) | msg->msg[12];
+}
+
+static inline void cec_msg_set_timer_program_title(struct cec_msg *msg,
+						   const char *prog_title)
+{
+	unsigned int len = strlen(prog_title);
+
+	if (len > 14)
+		len = 14;
+	msg->len = 2 + len;
+	msg->msg[1] = CEC_MSG_SET_TIMER_PROGRAM_TITLE;
+	memcpy(msg->msg + 2, prog_title, len);
+}
+
+static inline void cec_ops_set_timer_program_title(const struct cec_msg *msg,
+						   char *prog_title)
+{
+	unsigned int len = msg->len > 2 ? msg->len - 2 : 0;
+
+	if (len > 14)
+		len = 14;
+	memcpy(prog_title, msg->msg + 2, len);
+	prog_title[len] = '\0';
+}
+
+/* System Information Feature */
+static inline void cec_msg_cec_version(struct cec_msg *msg, __u8 cec_version)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_CEC_VERSION;
+	msg->msg[2] = cec_version;
+}
+
+static inline void cec_ops_cec_version(const struct cec_msg *msg,
+				       __u8 *cec_version)
+{
+	*cec_version = msg->msg[2];
+}
+
+static inline void cec_msg_get_cec_version(struct cec_msg *msg,
+					   bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GET_CEC_VERSION;
+	msg->reply = reply ? CEC_MSG_CEC_VERSION : 0;
+}
+
+static inline void cec_msg_report_physical_addr(struct cec_msg *msg,
+					__u16 phys_addr, __u8 prim_devtype)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REPORT_PHYSICAL_ADDR;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+	msg->msg[4] = prim_devtype;
+}
+
+static inline void cec_ops_report_physical_addr(const struct cec_msg *msg,
+					__u16 *phys_addr, __u8 *prim_devtype)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*prim_devtype = msg->msg[4];
+}
+
+static inline void cec_msg_give_physical_addr(struct cec_msg *msg,
+					      bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_PHYSICAL_ADDR;
+	msg->reply = reply ? CEC_MSG_REPORT_PHYSICAL_ADDR : 0;
+}
+
+static inline void cec_msg_set_menu_language(struct cec_msg *msg,
+					     const char *language)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_SET_MENU_LANGUAGE;
+	memcpy(msg->msg + 2, language, 3);
+}
+
+static inline void cec_ops_set_menu_language(const struct cec_msg *msg,
+					     char *language)
+{
+	memcpy(language, msg->msg + 2, 3);
+	language[3] = '\0';
+}
+
+static inline void cec_msg_get_menu_language(struct cec_msg *msg,
+					     bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GET_MENU_LANGUAGE;
+	msg->reply = reply ? CEC_MSG_SET_MENU_LANGUAGE : 0;
+}
+
+/*
+ * Assumes a single RC Profile byte and a single Device Features byte,
+ * i.e. no extended features are supported by this helper function.
+ *
+ * As of CEC 2.0 no extended features are defined, should those be added
+ * in the future, then this function needs to be adapted or a new function
+ * should be added.
+ */
+static inline void cec_msg_report_features(struct cec_msg *msg,
+				__u8 cec_version, __u8 all_device_types,
+				__u8 rc_profile, __u8 dev_features)
+{
+	msg->len = 6;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REPORT_FEATURES;
+	msg->msg[2] = cec_version;
+	msg->msg[3] = all_device_types;
+	msg->msg[4] = rc_profile;
+	msg->msg[5] = dev_features;
+}
+
+static inline void cec_ops_report_features(const struct cec_msg *msg,
+			__u8 *cec_version, __u8 *all_device_types,
+			const __u8 **rc_profile, const __u8 **dev_features)
+{
+	const __u8 *p = &msg->msg[4];
+
+	*cec_version = msg->msg[2];
+	*all_device_types = msg->msg[3];
+	*rc_profile = p;
+	while (p < &msg->msg[14] && (*p & CEC_OP_FEAT_EXT))
+		p++;
+	if (!(*p & CEC_OP_FEAT_EXT)) {
+		*dev_features = p + 1;
+		while (p < &msg->msg[15] && (*p & CEC_OP_FEAT_EXT))
+			p++;
+	}
+	if (*p & CEC_OP_FEAT_EXT)
+		*rc_profile = *dev_features = NULL;
+}
+
+static inline void cec_msg_give_features(struct cec_msg *msg,
+					 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_FEATURES;
+	msg->reply = reply ? CEC_MSG_REPORT_FEATURES : 0;
+}
+
+/* Deck Control Feature */
+static inline void cec_msg_deck_control(struct cec_msg *msg,
+					__u8 deck_control_mode)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_DECK_CONTROL;
+	msg->msg[2] = deck_control_mode;
+}
+
+static inline void cec_ops_deck_control(const struct cec_msg *msg,
+					__u8 *deck_control_mode)
+{
+	*deck_control_mode = msg->msg[2];
+}
+
+static inline void cec_msg_deck_status(struct cec_msg *msg,
+				       __u8 deck_info)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_DECK_STATUS;
+	msg->msg[2] = deck_info;
+}
+
+static inline void cec_ops_deck_status(const struct cec_msg *msg,
+				       __u8 *deck_info)
+{
+	*deck_info = msg->msg[2];
+}
+
+static inline void cec_msg_give_deck_status(struct cec_msg *msg,
+					    bool reply,
+					    __u8 status_req)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_GIVE_DECK_STATUS;
+	msg->msg[2] = status_req;
+	msg->reply = reply ? CEC_MSG_DECK_STATUS : 0;
+}
+
+static inline void cec_ops_give_deck_status(const struct cec_msg *msg,
+					    __u8 *status_req)
+{
+	*status_req = msg->msg[2];
+}
+
+static inline void cec_msg_play(struct cec_msg *msg,
+				__u8 play_mode)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_PLAY;
+	msg->msg[2] = play_mode;
+}
+
+static inline void cec_ops_play(const struct cec_msg *msg,
+				__u8 *play_mode)
+{
+	*play_mode = msg->msg[2];
+}
+
+
+/* Tuner Control Feature */
+struct cec_op_tuner_device_info {
+	__u8 rec_flag;
+	__u8 tuner_display_info;
+	bool is_analog;
+	union {
+		struct cec_op_digital_service_id digital;
+		struct {
+			__u8 ana_bcast_type;
+			__u16 ana_freq;
+			__u8 bcast_system;
+		} analog;
+	};
+};
+
+static inline void cec_msg_tuner_device_status_analog(struct cec_msg *msg,
+						      __u8 rec_flag,
+						      __u8 tuner_display_info,
+						      __u8 ana_bcast_type,
+						      __u16 ana_freq,
+						      __u8 bcast_system)
+{
+	msg->len = 7;
+	msg->msg[1] = CEC_MSG_TUNER_DEVICE_STATUS;
+	msg->msg[2] = (rec_flag << 7) | tuner_display_info;
+	msg->msg[3] = ana_bcast_type;
+	msg->msg[4] = ana_freq >> 8;
+	msg->msg[5] = ana_freq & 0xff;
+	msg->msg[6] = bcast_system;
+}
+
+static inline void cec_msg_tuner_device_status_digital(struct cec_msg *msg,
+		   __u8 rec_flag, __u8 tuner_display_info,
+		   const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 10;
+	msg->msg[1] = CEC_MSG_TUNER_DEVICE_STATUS;
+	msg->msg[2] = (rec_flag << 7) | tuner_display_info;
+	cec_set_digital_service_id(msg->msg + 3, digital);
+}
+
+static inline void cec_msg_tuner_device_status(struct cec_msg *msg,
+			const struct cec_op_tuner_device_info *tuner_dev_info)
+{
+	if (tuner_dev_info->is_analog)
+		cec_msg_tuner_device_status_analog(msg,
+			tuner_dev_info->rec_flag,
+			tuner_dev_info->tuner_display_info,
+			tuner_dev_info->analog.ana_bcast_type,
+			tuner_dev_info->analog.ana_freq,
+			tuner_dev_info->analog.bcast_system);
+	else
+		cec_msg_tuner_device_status_digital(msg,
+			tuner_dev_info->rec_flag,
+			tuner_dev_info->tuner_display_info,
+			&tuner_dev_info->digital);
+}
+
+static inline void cec_ops_tuner_device_status(const struct cec_msg *msg,
+				struct cec_op_tuner_device_info *tuner_dev_info)
+{
+	tuner_dev_info->is_analog = msg->len < 10;
+	tuner_dev_info->rec_flag = msg->msg[2] >> 7;
+	tuner_dev_info->tuner_display_info = msg->msg[2] & 0x7f;
+	if (tuner_dev_info->is_analog) {
+		tuner_dev_info->analog.ana_bcast_type = msg->msg[3];
+		tuner_dev_info->analog.ana_freq = (msg->msg[4] << 8) | msg->msg[5];
+		tuner_dev_info->analog.bcast_system = msg->msg[6];
+		return;
+	}
+	cec_get_digital_service_id(msg->msg + 3, &tuner_dev_info->digital);
+}
+
+static inline void cec_msg_give_tuner_device_status(struct cec_msg *msg,
+						    bool reply,
+						    __u8 status_req)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_GIVE_TUNER_DEVICE_STATUS;
+	msg->msg[2] = status_req;
+	msg->reply = reply ? CEC_MSG_TUNER_DEVICE_STATUS : 0;
+}
+
+static inline void cec_ops_give_tuner_device_status(const struct cec_msg *msg,
+						    __u8 *status_req)
+{
+	*status_req = msg->msg[2];
+}
+
+static inline void cec_msg_select_analogue_service(struct cec_msg *msg,
+						   __u8 ana_bcast_type,
+						   __u16 ana_freq,
+						   __u8 bcast_system)
+{
+	msg->len = 6;
+	msg->msg[1] = CEC_MSG_SELECT_ANALOGUE_SERVICE;
+	msg->msg[2] = ana_bcast_type;
+	msg->msg[3] = ana_freq >> 8;
+	msg->msg[4] = ana_freq & 0xff;
+	msg->msg[5] = bcast_system;
+}
+
+static inline void cec_ops_select_analogue_service(const struct cec_msg *msg,
+						   __u8 *ana_bcast_type,
+						   __u16 *ana_freq,
+						   __u8 *bcast_system)
+{
+	*ana_bcast_type = msg->msg[2];
+	*ana_freq = (msg->msg[3] << 8) | msg->msg[4];
+	*bcast_system = msg->msg[5];
+}
+
+static inline void cec_msg_select_digital_service(struct cec_msg *msg,
+				const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 9;
+	msg->msg[1] = CEC_MSG_SELECT_DIGITAL_SERVICE;
+	cec_set_digital_service_id(msg->msg + 2, digital);
+}
+
+static inline void cec_ops_select_digital_service(const struct cec_msg *msg,
+				struct cec_op_digital_service_id *digital)
+{
+	cec_get_digital_service_id(msg->msg + 2, digital);
+}
+
+static inline void cec_msg_tuner_step_decrement(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_TUNER_STEP_DECREMENT;
+}
+
+static inline void cec_msg_tuner_step_increment(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_TUNER_STEP_INCREMENT;
+}
+
+
+/* Vendor Specific Commands Feature */
+static inline void cec_msg_device_vendor_id(struct cec_msg *msg, __u32 vendor_id)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_DEVICE_VENDOR_ID;
+	msg->msg[2] = vendor_id >> 16;
+	msg->msg[3] = (vendor_id >> 8) & 0xff;
+	msg->msg[4] = vendor_id & 0xff;
+}
+
+static inline void cec_ops_device_vendor_id(const struct cec_msg *msg,
+					    __u32 *vendor_id)
+{
+	*vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4];
+}
+
+static inline void cec_msg_give_device_vendor_id(struct cec_msg *msg,
+						 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_DEVICE_VENDOR_ID;
+	msg->reply = reply ? CEC_MSG_DEVICE_VENDOR_ID : 0;
+}
+
+static inline void cec_msg_vendor_command(struct cec_msg *msg,
+					  __u8 size, const __u8 *vendor_cmd)
+{
+	if (size > 14)
+		size = 14;
+	msg->len = 2 + size;
+	msg->msg[1] = CEC_MSG_VENDOR_COMMAND;
+	memcpy(msg->msg + 2, vendor_cmd, size);
+}
+
+static inline void cec_ops_vendor_command(const struct cec_msg *msg,
+					  __u8 *size,
+					  const __u8 **vendor_cmd)
+{
+	*size = msg->len - 2;
+
+	if (*size > 14)
+		*size = 14;
+	*vendor_cmd = msg->msg + 2;
+}
+
+static inline void cec_msg_vendor_command_with_id(struct cec_msg *msg,
+						  __u32 vendor_id, __u8 size,
+						  const __u8 *vendor_cmd)
+{
+	if (size > 11)
+		size = 11;
+	msg->len = 5 + size;
+	msg->msg[1] = CEC_MSG_VENDOR_COMMAND_WITH_ID;
+	msg->msg[2] = vendor_id >> 16;
+	msg->msg[3] = (vendor_id >> 8) & 0xff;
+	msg->msg[4] = vendor_id & 0xff;
+	memcpy(msg->msg + 5, vendor_cmd, size);
+}
+
+static inline void cec_ops_vendor_command_with_id(const struct cec_msg *msg,
+						  __u32 *vendor_id,  __u8 *size,
+						  const __u8 **vendor_cmd)
+{
+	*size = msg->len - 5;
+
+	if (*size > 11)
+		*size = 11;
+	*vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4];
+	*vendor_cmd = msg->msg + 5;
+}
+
+static inline void cec_msg_vendor_remote_button_down(struct cec_msg *msg,
+						     __u8 size,
+						     const __u8 *rc_code)
+{
+	if (size > 14)
+		size = 14;
+	msg->len = 2 + size;
+	msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN;
+	memcpy(msg->msg + 2, rc_code, size);
+}
+
+static inline void cec_ops_vendor_remote_button_down(const struct cec_msg *msg,
+						     __u8 *size,
+						     const __u8 **rc_code)
+{
+	*size = msg->len - 2;
+
+	if (*size > 14)
+		*size = 14;
+	*rc_code = msg->msg + 2;
+}
+
+static inline void cec_msg_vendor_remote_button_up(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_UP;
+}
+
+
+/* OSD Display Feature */
+static inline void cec_msg_set_osd_string(struct cec_msg *msg,
+					  __u8 disp_ctl,
+					  const char *osd)
+{
+	unsigned int len = strlen(osd);
+
+	if (len > 13)
+		len = 13;
+	msg->len = 3 + len;
+	msg->msg[1] = CEC_MSG_SET_OSD_STRING;
+	msg->msg[2] = disp_ctl;
+	memcpy(msg->msg + 3, osd, len);
+}
+
+static inline void cec_ops_set_osd_string(const struct cec_msg *msg,
+					  __u8 *disp_ctl,
+					  char *osd)
+{
+	unsigned int len = msg->len > 3 ? msg->len - 3 : 0;
+
+	*disp_ctl = msg->msg[2];
+	if (len > 13)
+		len = 13;
+	memcpy(osd, msg->msg + 3, len);
+	osd[len] = '\0';
+}
+
+
+/* Device OSD Transfer Feature */
+static inline void cec_msg_set_osd_name(struct cec_msg *msg, const char *name)
+{
+	unsigned int len = strlen(name);
+
+	if (len > 14)
+		len = 14;
+	msg->len = 2 + len;
+	msg->msg[1] = CEC_MSG_SET_OSD_NAME;
+	memcpy(msg->msg + 2, name, len);
+}
+
+static inline void cec_ops_set_osd_name(const struct cec_msg *msg,
+					char *name)
+{
+	unsigned int len = msg->len > 2 ? msg->len - 2 : 0;
+
+	if (len > 14)
+		len = 14;
+	memcpy(name, msg->msg + 2, len);
+	name[len] = '\0';
+}
+
+static inline void cec_msg_give_osd_name(struct cec_msg *msg,
+					 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_OSD_NAME;
+	msg->reply = reply ? CEC_MSG_SET_OSD_NAME : 0;
+}
+
+
+/* Device Menu Control Feature */
+static inline void cec_msg_menu_status(struct cec_msg *msg,
+				       __u8 menu_state)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_MENU_STATUS;
+	msg->msg[2] = menu_state;
+}
+
+static inline void cec_ops_menu_status(const struct cec_msg *msg,
+				       __u8 *menu_state)
+{
+	*menu_state = msg->msg[2];
+}
+
+static inline void cec_msg_menu_request(struct cec_msg *msg,
+					bool reply,
+					__u8 menu_req)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_MENU_REQUEST;
+	msg->msg[2] = menu_req;
+	msg->reply = reply ? CEC_MSG_MENU_STATUS : 0;
+}
+
+static inline void cec_ops_menu_request(const struct cec_msg *msg,
+					__u8 *menu_req)
+{
+	*menu_req = msg->msg[2];
+}
+
+struct cec_op_ui_command {
+	__u8 ui_cmd;
+	bool has_opt_arg;
+	union {
+		struct cec_op_channel_data channel_identifier;
+		__u8 ui_broadcast_type;
+		__u8 ui_sound_presentation_control;
+		__u8 play_mode;
+		__u8 ui_function_media;
+		__u8 ui_function_select_av_input;
+		__u8 ui_function_select_audio_input;
+	};
+};
+
+static inline void cec_msg_user_control_pressed(struct cec_msg *msg,
+					const struct cec_op_ui_command *ui_cmd)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_USER_CONTROL_PRESSED;
+	msg->msg[2] = ui_cmd->ui_cmd;
+	if (!ui_cmd->has_opt_arg)
+		return;
+	switch (ui_cmd->ui_cmd) {
+	case 0x56:
+	case 0x57:
+	case 0x60:
+	case 0x68:
+	case 0x69:
+	case 0x6a:
+		/* The optional operand is one byte for all these ui commands */
+		msg->len++;
+		msg->msg[3] = ui_cmd->play_mode;
+		break;
+	case 0x67:
+		msg->len += 4;
+		msg->msg[3] = (ui_cmd->channel_identifier.channel_number_fmt << 2) |
+			      (ui_cmd->channel_identifier.major >> 8);
+		msg->msg[4] = ui_cmd->channel_identifier.major & 0xff;
+		msg->msg[5] = ui_cmd->channel_identifier.minor >> 8;
+		msg->msg[6] = ui_cmd->channel_identifier.minor & 0xff;
+		break;
+	}
+}
+
+static inline void cec_ops_user_control_pressed(const struct cec_msg *msg,
+						struct cec_op_ui_command *ui_cmd)
+{
+	ui_cmd->ui_cmd = msg->msg[2];
+	ui_cmd->has_opt_arg = false;
+	if (msg->len == 3)
+		return;
+	switch (ui_cmd->ui_cmd) {
+	case 0x56:
+	case 0x57:
+	case 0x60:
+	case 0x68:
+	case 0x69:
+	case 0x6a:
+		/* The optional operand is one byte for all these ui commands */
+		ui_cmd->play_mode = msg->msg[3];
+		ui_cmd->has_opt_arg = true;
+		break;
+	case 0x67:
+		if (msg->len < 7)
+			break;
+		ui_cmd->has_opt_arg = true;
+		ui_cmd->channel_identifier.channel_number_fmt = msg->msg[3] >> 2;
+		ui_cmd->channel_identifier.major = ((msg->msg[3] & 3) << 6) | msg->msg[4];
+		ui_cmd->channel_identifier.minor = (msg->msg[5] << 8) | msg->msg[6];
+		break;
+	}
+}
+
+static inline void cec_msg_user_control_released(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_USER_CONTROL_RELEASED;
+}
+
+/* Remote Control Passthrough Feature */
+
+/* Power Status Feature */
+static inline void cec_msg_report_power_status(struct cec_msg *msg,
+					       __u8 pwr_state)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_REPORT_POWER_STATUS;
+	msg->msg[2] = pwr_state;
+}
+
+static inline void cec_ops_report_power_status(const struct cec_msg *msg,
+					       __u8 *pwr_state)
+{
+	*pwr_state = msg->msg[2];
+}
+
+static inline void cec_msg_give_device_power_status(struct cec_msg *msg,
+						    bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_DEVICE_POWER_STATUS;
+	msg->reply = reply ? CEC_MSG_REPORT_POWER_STATUS : 0;
+}
+
+/* General Protocol Messages */
+static inline void cec_msg_feature_abort(struct cec_msg *msg,
+					 __u8 abort_msg, __u8 reason)
+{
+	msg->len = 4;
+	msg->msg[1] = CEC_MSG_FEATURE_ABORT;
+	msg->msg[2] = abort_msg;
+	msg->msg[3] = reason;
+}
+
+static inline void cec_ops_feature_abort(const struct cec_msg *msg,
+					 __u8 *abort_msg, __u8 *reason)
+{
+	*abort_msg = msg->msg[2];
+	*reason = msg->msg[3];
+}
+
+/* This changes the current message into a feature abort message */
+static inline void cec_msg_reply_feature_abort(struct cec_msg *msg, __u8 reason)
+{
+	cec_msg_set_reply_to(msg, msg);
+	msg->len = 4;
+	msg->msg[2] = msg->msg[1];
+	msg->msg[3] = reason;
+	msg->msg[1] = CEC_MSG_FEATURE_ABORT;
+}
+
+static inline void cec_msg_abort(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_ABORT;
+}
+
+
+/* System Audio Control Feature */
+static inline void cec_msg_report_audio_status(struct cec_msg *msg,
+					       __u8 aud_mute_status,
+					       __u8 aud_vol_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_REPORT_AUDIO_STATUS;
+	msg->msg[2] = (aud_mute_status << 7) | (aud_vol_status & 0x7f);
+}
+
+static inline void cec_ops_report_audio_status(const struct cec_msg *msg,
+					       __u8 *aud_mute_status,
+					       __u8 *aud_vol_status)
+{
+	*aud_mute_status = msg->msg[2] >> 7;
+	*aud_vol_status = msg->msg[2] & 0x7f;
+}
+
+static inline void cec_msg_give_audio_status(struct cec_msg *msg,
+					     bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_AUDIO_STATUS;
+	msg->reply = reply ? CEC_MSG_REPORT_AUDIO_STATUS : 0;
+}
+
+static inline void cec_msg_set_system_audio_mode(struct cec_msg *msg,
+						 __u8 sys_aud_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_SET_SYSTEM_AUDIO_MODE;
+	msg->msg[2] = sys_aud_status;
+}
+
+static inline void cec_ops_set_system_audio_mode(const struct cec_msg *msg,
+						 __u8 *sys_aud_status)
+{
+	*sys_aud_status = msg->msg[2];
+}
+
+static inline void cec_msg_system_audio_mode_request(struct cec_msg *msg,
+						     bool reply,
+						     __u16 phys_addr)
+{
+	msg->len = phys_addr == 0xffff ? 2 : 4;
+	msg->msg[1] = CEC_MSG_SYSTEM_AUDIO_MODE_REQUEST;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_SET_SYSTEM_AUDIO_MODE : 0;
+
+}
+
+static inline void cec_ops_system_audio_mode_request(const struct cec_msg *msg,
+						     __u16 *phys_addr)
+{
+	if (msg->len < 4)
+		*phys_addr = 0xffff;
+	else
+		*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_system_audio_mode_status(struct cec_msg *msg,
+						    __u8 sys_aud_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_SYSTEM_AUDIO_MODE_STATUS;
+	msg->msg[2] = sys_aud_status;
+}
+
+static inline void cec_ops_system_audio_mode_status(const struct cec_msg *msg,
+						    __u8 *sys_aud_status)
+{
+	*sys_aud_status = msg->msg[2];
+}
+
+static inline void cec_msg_give_system_audio_mode_status(struct cec_msg *msg,
+							 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_SYSTEM_AUDIO_MODE_STATUS;
+	msg->reply = reply ? CEC_MSG_SYSTEM_AUDIO_MODE_STATUS : 0;
+}
+
+static inline void cec_msg_report_short_audio_descriptor(struct cec_msg *msg,
+					__u8 num_descriptors,
+					const __u32 *descriptors)
+{
+	unsigned int i;
+
+	if (num_descriptors > 4)
+		num_descriptors = 4;
+	msg->len = 2 + num_descriptors * 3;
+	msg->msg[1] = CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR;
+	for (i = 0; i < num_descriptors; i++) {
+		msg->msg[2 + i * 3] = (descriptors[i] >> 16) & 0xff;
+		msg->msg[3 + i * 3] = (descriptors[i] >> 8) & 0xff;
+		msg->msg[4 + i * 3] = descriptors[i] & 0xff;
+	}
+}
+
+static inline void cec_ops_report_short_audio_descriptor(const struct cec_msg *msg,
+							 __u8 *num_descriptors,
+							 __u32 *descriptors)
+{
+	unsigned int i;
+
+	*num_descriptors = (msg->len - 2) / 3;
+	if (*num_descriptors > 4)
+		*num_descriptors = 4;
+	for (i = 0; i < *num_descriptors; i++)
+		descriptors[i] = (msg->msg[2 + i * 3] << 16) |
+			(msg->msg[3 + i * 3] << 8) |
+			msg->msg[4 + i * 3];
+}
+
+static inline void cec_msg_request_short_audio_descriptor(struct cec_msg *msg,
+					bool reply,
+					__u8 num_descriptors,
+					const __u8 *audio_format_id,
+					const __u8 *audio_format_code)
+{
+	unsigned int i;
+
+	if (num_descriptors > 4)
+		num_descriptors = 4;
+	msg->len = 2 + num_descriptors;
+	msg->msg[1] = CEC_MSG_REQUEST_SHORT_AUDIO_DESCRIPTOR;
+	msg->reply = reply ? CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR : 0;
+	for (i = 0; i < num_descriptors; i++)
+		msg->msg[2 + i] = (audio_format_id[i] << 6) |
+				  (audio_format_code[i] & 0x3f);
+}
+
+static inline void cec_ops_request_short_audio_descriptor(const struct cec_msg *msg,
+					__u8 *num_descriptors,
+					__u8 *audio_format_id,
+					__u8 *audio_format_code)
+{
+	unsigned int i;
+
+	*num_descriptors = msg->len - 2;
+	if (*num_descriptors > 4)
+		*num_descriptors = 4;
+	for (i = 0; i < *num_descriptors; i++) {
+		audio_format_id[i] = msg->msg[2 + i] >> 6;
+		audio_format_code[i] = msg->msg[2 + i] & 0x3f;
+	}
+}
+
+
+/* Audio Rate Control Feature */
+static inline void cec_msg_set_audio_rate(struct cec_msg *msg,
+					  __u8 audio_rate)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_SET_AUDIO_RATE;
+	msg->msg[2] = audio_rate;
+}
+
+static inline void cec_ops_set_audio_rate(const struct cec_msg *msg,
+					  __u8 *audio_rate)
+{
+	*audio_rate = msg->msg[2];
+}
+
+
+/* Audio Return Channel Control Feature */
+static inline void cec_msg_report_arc_initiated(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_REPORT_ARC_INITIATED;
+}
+
+static inline void cec_msg_initiate_arc(struct cec_msg *msg,
+					bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_INITIATE_ARC;
+	msg->reply = reply ? CEC_MSG_REPORT_ARC_INITIATED : 0;
+}
+
+static inline void cec_msg_request_arc_initiation(struct cec_msg *msg,
+						  bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_REQUEST_ARC_INITIATION;
+	msg->reply = reply ? CEC_MSG_INITIATE_ARC : 0;
+}
+
+static inline void cec_msg_report_arc_terminated(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_REPORT_ARC_TERMINATED;
+}
+
+static inline void cec_msg_terminate_arc(struct cec_msg *msg,
+					 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_TERMINATE_ARC;
+	msg->reply = reply ? CEC_MSG_REPORT_ARC_TERMINATED : 0;
+}
+
+static inline void cec_msg_request_arc_termination(struct cec_msg *msg,
+						   bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_REQUEST_ARC_TERMINATION;
+	msg->reply = reply ? CEC_MSG_TERMINATE_ARC : 0;
+}
+
+
+/* Dynamic Audio Lipsync Feature */
+/* Only for CEC 2.0 and up */
+static inline void cec_msg_report_current_latency(struct cec_msg *msg,
+						  __u16 phys_addr,
+						  __u8 video_latency,
+						  __u8 low_latency_mode,
+						  __u8 audio_out_compensated,
+						  __u8 audio_out_delay)
+{
+	msg->len = 7;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REPORT_CURRENT_LATENCY;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+	msg->msg[4] = video_latency;
+	msg->msg[5] = (low_latency_mode << 2) | audio_out_compensated;
+	msg->msg[6] = audio_out_delay;
+}
+
+static inline void cec_ops_report_current_latency(const struct cec_msg *msg,
+						  __u16 *phys_addr,
+						  __u8 *video_latency,
+						  __u8 *low_latency_mode,
+						  __u8 *audio_out_compensated,
+						  __u8 *audio_out_delay)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*video_latency = msg->msg[4];
+	*low_latency_mode = (msg->msg[5] >> 2) & 1;
+	*audio_out_compensated = msg->msg[5] & 3;
+	*audio_out_delay = msg->msg[6];
+}
+
+static inline void cec_msg_request_current_latency(struct cec_msg *msg,
+						   bool reply,
+						   __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REQUEST_CURRENT_LATENCY;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_REPORT_CURRENT_LATENCY : 0;
+}
+
+static inline void cec_ops_request_current_latency(const struct cec_msg *msg,
+						   __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+
+/* Capability Discovery and Control Feature */
+static inline void cec_msg_cdc_hec_inquire_state(struct cec_msg *msg,
+						 __u16 phys_addr1,
+						 __u16 phys_addr2)
+{
+	msg->len = 9;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_INQUIRE_STATE;
+	msg->msg[5] = phys_addr1 >> 8;
+	msg->msg[6] = phys_addr1 & 0xff;
+	msg->msg[7] = phys_addr2 >> 8;
+	msg->msg[8] = phys_addr2 & 0xff;
+}
+
+static inline void cec_ops_cdc_hec_inquire_state(const struct cec_msg *msg,
+						 __u16 *phys_addr,
+						 __u16 *phys_addr1,
+						 __u16 *phys_addr2)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
+	*phys_addr2 = (msg->msg[7] << 8) | msg->msg[8];
+}
+
+static inline void cec_msg_cdc_hec_report_state(struct cec_msg *msg,
+						__u16 target_phys_addr,
+						__u8 hec_func_state,
+						__u8 host_func_state,
+						__u8 enc_func_state,
+						__u8 cdc_errcode,
+						__u8 has_field,
+						__u16 hec_field)
+{
+	msg->len = has_field ? 10 : 8;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_REPORT_STATE;
+	msg->msg[5] = target_phys_addr >> 8;
+	msg->msg[6] = target_phys_addr & 0xff;
+	msg->msg[7] = (hec_func_state << 6) |
+		      (host_func_state << 4) |
+		      (enc_func_state << 2) |
+		      cdc_errcode;
+	if (has_field) {
+		msg->msg[8] = hec_field >> 8;
+		msg->msg[9] = hec_field & 0xff;
+	}
+}
+
+static inline void cec_ops_cdc_hec_report_state(const struct cec_msg *msg,
+						__u16 *phys_addr,
+						__u16 *target_phys_addr,
+						__u8 *hec_func_state,
+						__u8 *host_func_state,
+						__u8 *enc_func_state,
+						__u8 *cdc_errcode,
+						__u8 *has_field,
+						__u16 *hec_field)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*target_phys_addr = (msg->msg[5] << 8) | msg->msg[6];
+	*hec_func_state = msg->msg[7] >> 6;
+	*host_func_state = (msg->msg[7] >> 4) & 3;
+	*enc_func_state = (msg->msg[7] >> 4) & 3;
+	*cdc_errcode = msg->msg[7] & 3;
+	*has_field = msg->len >= 10;
+	*hec_field = *has_field ? ((msg->msg[8] << 8) | msg->msg[9]) : 0;
+}
+
+static inline void cec_msg_cdc_hec_set_state(struct cec_msg *msg,
+					     __u16 phys_addr1,
+					     __u16 phys_addr2,
+					     __u8 hec_set_state,
+					     __u16 phys_addr3,
+					     __u16 phys_addr4,
+					     __u16 phys_addr5)
+{
+	msg->len = 10;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_INQUIRE_STATE;
+	msg->msg[5] = phys_addr1 >> 8;
+	msg->msg[6] = phys_addr1 & 0xff;
+	msg->msg[7] = phys_addr2 >> 8;
+	msg->msg[8] = phys_addr2 & 0xff;
+	msg->msg[9] = hec_set_state;
+	if (phys_addr3 != CEC_PHYS_ADDR_INVALID) {
+		msg->msg[msg->len++] = phys_addr3 >> 8;
+		msg->msg[msg->len++] = phys_addr3 & 0xff;
+		if (phys_addr4 != CEC_PHYS_ADDR_INVALID) {
+			msg->msg[msg->len++] = phys_addr4 >> 8;
+			msg->msg[msg->len++] = phys_addr4 & 0xff;
+			if (phys_addr5 != CEC_PHYS_ADDR_INVALID) {
+				msg->msg[msg->len++] = phys_addr5 >> 8;
+				msg->msg[msg->len++] = phys_addr5 & 0xff;
+			}
+		}
+	}
+}
+
+static inline void cec_ops_cdc_hec_set_state(const struct cec_msg *msg,
+					     __u16 *phys_addr,
+					     __u16 *phys_addr1,
+					     __u16 *phys_addr2,
+					     __u8 *hec_set_state,
+					     __u16 *phys_addr3,
+					     __u16 *phys_addr4,
+					     __u16 *phys_addr5)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
+	*phys_addr2 = (msg->msg[7] << 8) | msg->msg[8];
+	*hec_set_state = msg->msg[9];
+	*phys_addr3 = *phys_addr4 = *phys_addr5 = CEC_PHYS_ADDR_INVALID;
+	if (msg->len >= 12)
+		*phys_addr3 = (msg->msg[10] << 8) | msg->msg[11];
+	if (msg->len >= 14)
+		*phys_addr4 = (msg->msg[12] << 8) | msg->msg[13];
+	if (msg->len >= 16)
+		*phys_addr5 = (msg->msg[14] << 8) | msg->msg[15];
+}
+
+static inline void cec_msg_cdc_hec_set_state_adjacent(struct cec_msg *msg,
+						      __u16 phys_addr1,
+						      __u8 hec_set_state)
+{
+	msg->len = 8;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_SET_STATE_ADJACENT;
+	msg->msg[5] = phys_addr1 >> 8;
+	msg->msg[6] = phys_addr1 & 0xff;
+	msg->msg[7] = hec_set_state;
+}
+
+static inline void cec_ops_cdc_hec_set_state_adjacent(const struct cec_msg *msg,
+						      __u16 *phys_addr,
+						      __u16 *phys_addr1,
+						      __u8 *hec_set_state)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
+	*hec_set_state = msg->msg[7];
+}
+
+static inline void cec_msg_cdc_hec_request_deactivation(struct cec_msg *msg,
+							__u16 phys_addr1,
+							__u16 phys_addr2,
+							__u16 phys_addr3)
+{
+	msg->len = 11;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_REQUEST_DEACTIVATION;
+	msg->msg[5] = phys_addr1 >> 8;
+	msg->msg[6] = phys_addr1 & 0xff;
+	msg->msg[7] = phys_addr2 >> 8;
+	msg->msg[8] = phys_addr2 & 0xff;
+	msg->msg[9] = phys_addr3 >> 8;
+	msg->msg[10] = phys_addr3 & 0xff;
+}
+
+static inline void cec_ops_cdc_hec_request_deactivation(const struct cec_msg *msg,
+							__u16 *phys_addr,
+							__u16 *phys_addr1,
+							__u16 *phys_addr2,
+							__u16 *phys_addr3)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
+	*phys_addr2 = (msg->msg[7] << 8) | msg->msg[8];
+	*phys_addr3 = (msg->msg[9] << 8) | msg->msg[10];
+}
+
+static inline void cec_msg_cdc_hec_notify_alive(struct cec_msg *msg)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_NOTIFY_ALIVE;
+}
+
+static inline void cec_ops_cdc_hec_notify_alive(const struct cec_msg *msg,
+						__u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_cdc_hec_discover(struct cec_msg *msg)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_DISCOVER;
+}
+
+static inline void cec_ops_cdc_hec_discover(const struct cec_msg *msg,
+					    __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_cdc_hpd_set_state(struct cec_msg *msg,
+					     __u8 input_port,
+					     __u8 hpd_state)
+{
+	msg->len = 6;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HPD_SET_STATE;
+	msg->msg[5] = (input_port << 4) | hpd_state;
+}
+
+static inline void cec_ops_cdc_hpd_set_state(const struct cec_msg *msg,
+					    __u16 *phys_addr,
+					    __u8 *input_port,
+					    __u8 *hpd_state)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*input_port = msg->msg[5] >> 4;
+	*hpd_state = msg->msg[5] & 0xf;
+}
+
+static inline void cec_msg_cdc_hpd_report_state(struct cec_msg *msg,
+						__u8 hpd_state,
+						__u8 hpd_error)
+{
+	msg->len = 6;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HPD_REPORT_STATE;
+	msg->msg[5] = (hpd_state << 4) | hpd_error;
+}
+
+static inline void cec_ops_cdc_hpd_report_state(const struct cec_msg *msg,
+						__u16 *phys_addr,
+						__u8 *hpd_state,
+						__u8 *hpd_error)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*hpd_state = msg->msg[5] >> 4;
+	*hpd_error = msg->msg[5] & 0xf;
+}
+
+#endif
diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h
new file mode 100644
index 000000000000..f4ec0af67707
--- /dev/null
+++ b/include/uapi/linux/cec.h
@@ -0,0 +1,1065 @@
+/*
+ * cec - HDMI Consumer Electronics Control public header
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * Alternatively you can redistribute this file under the terms of the
+ * BSD license as stated below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. The names of its contributors may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CEC_UAPI_H
+#define _CEC_UAPI_H
+
+#include <linux/types.h>
+
+#define CEC_MAX_MSG_SIZE	16
+
+/**
+ * struct cec_msg - CEC message structure.
+ * @tx_ts:	Timestamp in nanoseconds using CLOCK_MONOTONIC. Set by the
+ *		driver when the message transmission has finished.
+ * @rx_ts:	Timestamp in nanoseconds using CLOCK_MONOTONIC. Set by the
+ *		driver when the message was received.
+ * @len:	Length in bytes of the message.
+ * @timeout:	The timeout (in ms) that is used to timeout CEC_RECEIVE.
+ *		Set to 0 if you want to wait forever. This timeout can also be
+ *		used with CEC_TRANSMIT as the timeout for waiting for a reply.
+ *		If 0, then it will use a 1 second timeout instead of waiting
+ *		forever as is done with CEC_RECEIVE.
+ * @sequence:	The framework assigns a sequence number to messages that are
+ *		sent. This can be used to track replies to previously sent
+ *		messages.
+ * @flags:	Set to 0.
+ * @msg:	The message payload.
+ * @reply:	This field is ignored with CEC_RECEIVE and is only used by
+ *		CEC_TRANSMIT. If non-zero, then wait for a reply with this
+ *		opcode. Set to CEC_MSG_FEATURE_ABORT if you want to wait for
+ *		a possible ABORT reply. If there was an error when sending the
+ *		msg or FeatureAbort was returned, then reply is set to 0.
+ *		If reply is non-zero upon return, then len/msg are set to
+ *		the received message.
+ *		If reply is zero upon return and status has the
+ *		CEC_TX_STATUS_FEATURE_ABORT bit set, then len/msg are set to
+ *		the received feature abort message.
+ *		If reply is zero upon return and status has the
+ *		CEC_TX_STATUS_MAX_RETRIES bit set, then no reply was seen at
+ *		all. If reply is non-zero for CEC_TRANSMIT and the message is a
+ *		broadcast, then -EINVAL is returned.
+ *		if reply is non-zero, then timeout is set to 1000 (the required
+ *		maximum response time).
+ * @rx_status:	The message receive status bits. Set by the driver.
+ * @tx_status:	The message transmit status bits. Set by the driver.
+ * @tx_arb_lost_cnt: The number of 'Arbitration Lost' events. Set by the driver.
+ * @tx_nack_cnt: The number of 'Not Acknowledged' events. Set by the driver.
+ * @tx_low_drive_cnt: The number of 'Low Drive Detected' events. Set by the
+ *		driver.
+ * @tx_error_cnt: The number of 'Error' events. Set by the driver.
+ */
+struct cec_msg {
+	__u64 tx_ts;
+	__u64 rx_ts;
+	__u32 len;
+	__u32 timeout;
+	__u32 sequence;
+	__u32 flags;
+	__u8 msg[CEC_MAX_MSG_SIZE];
+	__u8 reply;
+	__u8 rx_status;
+	__u8 tx_status;
+	__u8 tx_arb_lost_cnt;
+	__u8 tx_nack_cnt;
+	__u8 tx_low_drive_cnt;
+	__u8 tx_error_cnt;
+};
+
+/**
+ * cec_msg_initiator - return the initiator's logical address.
+ * @msg:	the message structure
+ */
+static inline __u8 cec_msg_initiator(const struct cec_msg *msg)
+{
+	return msg->msg[0] >> 4;
+}
+
+/**
+ * cec_msg_destination - return the destination's logical address.
+ * @msg:	the message structure
+ */
+static inline __u8 cec_msg_destination(const struct cec_msg *msg)
+{
+	return msg->msg[0] & 0xf;
+}
+
+/**
+ * cec_msg_opcode - return the opcode of the message, -1 for poll
+ * @msg:	the message structure
+ */
+static inline int cec_msg_opcode(const struct cec_msg *msg)
+{
+	return msg->len > 1 ? msg->msg[1] : -1;
+}
+
+/**
+ * cec_msg_is_broadcast - return true if this is a broadcast message.
+ * @msg:	the message structure
+ */
+static inline bool cec_msg_is_broadcast(const struct cec_msg *msg)
+{
+	return (msg->msg[0] & 0xf) == 0xf;
+}
+
+/**
+ * cec_msg_init - initialize the message structure.
+ * @msg:	the message structure
+ * @initiator:	the logical address of the initiator
+ * @destination:the logical address of the destination (0xf for broadcast)
+ *
+ * The whole structure is zeroed, the len field is set to 1 (i.e. a poll
+ * message) and the initiator and destination are filled in.
+ */
+static inline void cec_msg_init(struct cec_msg *msg,
+				__u8 initiator, __u8 destination)
+{
+	memset(msg, 0, sizeof(*msg));
+	msg->msg[0] = (initiator << 4) | destination;
+	msg->len = 1;
+}
+
+/**
+ * cec_msg_set_reply_to - fill in destination/initiator in a reply message.
+ * @msg:	the message structure for the reply
+ * @orig:	the original message structure
+ *
+ * Set the msg destination to the orig initiator and the msg initiator to the
+ * orig destination. Note that msg and orig may be the same pointer, in which
+ * case the change is done in place.
+ */
+static inline void cec_msg_set_reply_to(struct cec_msg *msg,
+					struct cec_msg *orig)
+{
+	/* The destination becomes the initiator and vice versa */
+	msg->msg[0] = (cec_msg_destination(orig) << 4) |
+		      cec_msg_initiator(orig);
+	msg->reply = msg->timeout = 0;
+}
+
+/* cec_msg flags field */
+#define CEC_MSG_FL_REPLY_TO_FOLLOWERS	(1 << 0)
+
+/* cec_msg tx/rx_status field */
+#define CEC_TX_STATUS_OK		(1 << 0)
+#define CEC_TX_STATUS_ARB_LOST		(1 << 1)
+#define CEC_TX_STATUS_NACK		(1 << 2)
+#define CEC_TX_STATUS_LOW_DRIVE		(1 << 3)
+#define CEC_TX_STATUS_ERROR		(1 << 4)
+#define CEC_TX_STATUS_MAX_RETRIES	(1 << 5)
+
+#define CEC_RX_STATUS_OK		(1 << 0)
+#define CEC_RX_STATUS_TIMEOUT		(1 << 1)
+#define CEC_RX_STATUS_FEATURE_ABORT	(1 << 2)
+
+static inline bool cec_msg_status_is_ok(const struct cec_msg *msg)
+{
+	if (msg->tx_status && !(msg->tx_status & CEC_TX_STATUS_OK))
+		return false;
+	if (msg->rx_status && !(msg->rx_status & CEC_RX_STATUS_OK))
+		return false;
+	if (!msg->tx_status && !msg->rx_status)
+		return false;
+	return !(msg->rx_status & CEC_RX_STATUS_FEATURE_ABORT);
+}
+
+#define CEC_LOG_ADDR_INVALID		0xff
+#define CEC_PHYS_ADDR_INVALID		0xffff
+
+/*
+ * The maximum number of logical addresses one device can be assigned to.
+ * The CEC 2.0 spec allows for only 2 logical addresses at the moment. The
+ * Analog Devices CEC hardware supports 3. So let's go wild and go for 4.
+ */
+#define CEC_MAX_LOG_ADDRS 4
+
+/* The logical addresses defined by CEC 2.0 */
+#define CEC_LOG_ADDR_TV			0
+#define CEC_LOG_ADDR_RECORD_1		1
+#define CEC_LOG_ADDR_RECORD_2		2
+#define CEC_LOG_ADDR_TUNER_1		3
+#define CEC_LOG_ADDR_PLAYBACK_1		4
+#define CEC_LOG_ADDR_AUDIOSYSTEM	5
+#define CEC_LOG_ADDR_TUNER_2		6
+#define CEC_LOG_ADDR_TUNER_3		7
+#define CEC_LOG_ADDR_PLAYBACK_2		8
+#define CEC_LOG_ADDR_RECORD_3		9
+#define CEC_LOG_ADDR_TUNER_4		10
+#define CEC_LOG_ADDR_PLAYBACK_3		11
+#define CEC_LOG_ADDR_BACKUP_1		12
+#define CEC_LOG_ADDR_BACKUP_2		13
+#define CEC_LOG_ADDR_SPECIFIC		14
+#define CEC_LOG_ADDR_UNREGISTERED	15 /* as initiator address */
+#define CEC_LOG_ADDR_BROADCAST		15 /* ad destination address */
+
+/* The logical address types that the CEC device wants to claim */
+#define CEC_LOG_ADDR_TYPE_TV		0
+#define CEC_LOG_ADDR_TYPE_RECORD	1
+#define CEC_LOG_ADDR_TYPE_TUNER		2
+#define CEC_LOG_ADDR_TYPE_PLAYBACK	3
+#define CEC_LOG_ADDR_TYPE_AUDIOSYSTEM	4
+#define CEC_LOG_ADDR_TYPE_SPECIFIC	5
+#define CEC_LOG_ADDR_TYPE_UNREGISTERED	6
+/*
+ * Switches should use UNREGISTERED.
+ * Processors should use SPECIFIC.
+ */
+
+#define CEC_LOG_ADDR_MASK_TV		(1 << CEC_LOG_ADDR_TV)
+#define CEC_LOG_ADDR_MASK_RECORD	((1 << CEC_LOG_ADDR_RECORD_1) | \
+					 (1 << CEC_LOG_ADDR_RECORD_2) | \
+					 (1 << CEC_LOG_ADDR_RECORD_3))
+#define CEC_LOG_ADDR_MASK_TUNER		((1 << CEC_LOG_ADDR_TUNER_1) | \
+					 (1 << CEC_LOG_ADDR_TUNER_2) | \
+					 (1 << CEC_LOG_ADDR_TUNER_3) | \
+					 (1 << CEC_LOG_ADDR_TUNER_4))
+#define CEC_LOG_ADDR_MASK_PLAYBACK	((1 << CEC_LOG_ADDR_PLAYBACK_1) | \
+					 (1 << CEC_LOG_ADDR_PLAYBACK_2) | \
+					 (1 << CEC_LOG_ADDR_PLAYBACK_3))
+#define CEC_LOG_ADDR_MASK_AUDIOSYSTEM	(1 << CEC_LOG_ADDR_AUDIOSYSTEM)
+#define CEC_LOG_ADDR_MASK_BACKUP	((1 << CEC_LOG_ADDR_BACKUP_1) | \
+					 (1 << CEC_LOG_ADDR_BACKUP_2))
+#define CEC_LOG_ADDR_MASK_SPECIFIC	(1 << CEC_LOG_ADDR_SPECIFIC)
+#define CEC_LOG_ADDR_MASK_UNREGISTERED	(1 << CEC_LOG_ADDR_UNREGISTERED)
+
+static inline bool cec_has_tv(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_TV;
+}
+
+static inline bool cec_has_record(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_RECORD;
+}
+
+static inline bool cec_has_tuner(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_TUNER;
+}
+
+static inline bool cec_has_playback(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_PLAYBACK;
+}
+
+static inline bool cec_has_audiosystem(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_AUDIOSYSTEM;
+}
+
+static inline bool cec_has_backup(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_BACKUP;
+}
+
+static inline bool cec_has_specific(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_SPECIFIC;
+}
+
+static inline bool cec_is_unregistered(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_UNREGISTERED;
+}
+
+static inline bool cec_is_unconfigured(__u16 log_addr_mask)
+{
+	return log_addr_mask == 0;
+}
+
+/*
+ * Use this if there is no vendor ID (CEC_G_VENDOR_ID) or if the vendor ID
+ * should be disabled (CEC_S_VENDOR_ID)
+ */
+#define CEC_VENDOR_ID_NONE		0xffffffff
+
+/* The message handling modes */
+/* Modes for initiator */
+#define CEC_MODE_NO_INITIATOR		(0x0 << 0)
+#define CEC_MODE_INITIATOR		(0x1 << 0)
+#define CEC_MODE_EXCL_INITIATOR		(0x2 << 0)
+#define CEC_MODE_INITIATOR_MSK		0x0f
+
+/* Modes for follower */
+#define CEC_MODE_NO_FOLLOWER		(0x0 << 4)
+#define CEC_MODE_FOLLOWER		(0x1 << 4)
+#define CEC_MODE_EXCL_FOLLOWER		(0x2 << 4)
+#define CEC_MODE_EXCL_FOLLOWER_PASSTHRU	(0x3 << 4)
+#define CEC_MODE_MONITOR		(0xe << 4)
+#define CEC_MODE_MONITOR_ALL		(0xf << 4)
+#define CEC_MODE_FOLLOWER_MSK		0xf0
+
+/* Userspace has to configure the physical address */
+#define CEC_CAP_PHYS_ADDR	(1 << 0)
+/* Userspace has to configure the logical addresses */
+#define CEC_CAP_LOG_ADDRS	(1 << 1)
+/* Userspace can transmit messages (and thus become follower as well) */
+#define CEC_CAP_TRANSMIT	(1 << 2)
+/*
+ * Passthrough all messages instead of processing them.
+ */
+#define CEC_CAP_PASSTHROUGH	(1 << 3)
+/* Supports remote control */
+#define CEC_CAP_RC		(1 << 4)
+/* Hardware can monitor all messages, not just directed and broadcast. */
+#define CEC_CAP_MONITOR_ALL	(1 << 5)
+
+/**
+ * struct cec_caps - CEC capabilities structure.
+ * @driver: name of the CEC device driver.
+ * @name: name of the CEC device. @driver + @name must be unique.
+ * @available_log_addrs: number of available logical addresses.
+ * @capabilities: capabilities of the CEC adapter.
+ * @version: version of the CEC adapter framework.
+ */
+struct cec_caps {
+	char driver[32];
+	char name[32];
+	__u32 available_log_addrs;
+	__u32 capabilities;
+	__u32 version;
+};
+
+/**
+ * struct cec_log_addrs - CEC logical addresses structure.
+ * @log_addr: the claimed logical addresses. Set by the driver.
+ * @log_addr_mask: current logical address mask. Set by the driver.
+ * @cec_version: the CEC version that the adapter should implement. Set by the
+ *	caller.
+ * @num_log_addrs: how many logical addresses should be claimed. Set by the
+ *	caller.
+ * @vendor_id: the vendor ID of the device. Set by the caller.
+ * @flags: flags.
+ * @osd_name: the OSD name of the device. Set by the caller.
+ * @primary_device_type: the primary device type for each logical address.
+ *	Set by the caller.
+ * @log_addr_type: the logical address types. Set by the caller.
+ * @all_device_types: CEC 2.0: all device types represented by the logical
+ *	address. Set by the caller.
+ * @features:	CEC 2.0: The logical address features. Set by the caller.
+ */
+struct cec_log_addrs {
+	__u8 log_addr[CEC_MAX_LOG_ADDRS];
+	__u16 log_addr_mask;
+	__u8 cec_version;
+	__u8 num_log_addrs;
+	__u32 vendor_id;
+	__u32 flags;
+	char osd_name[15];
+	__u8 primary_device_type[CEC_MAX_LOG_ADDRS];
+	__u8 log_addr_type[CEC_MAX_LOG_ADDRS];
+
+	/* CEC 2.0 */
+	__u8 all_device_types[CEC_MAX_LOG_ADDRS];
+	__u8 features[CEC_MAX_LOG_ADDRS][12];
+};
+
+/* Allow a fallback to unregistered */
+#define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK	(1 << 0)
+/* Passthrough RC messages to the input subsystem */
+#define CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU	(1 << 1)
+/* CDC-Only device: supports only CDC messages */
+#define CEC_LOG_ADDRS_FL_CDC_ONLY		(1 << 2)
+
+/* Events */
+
+/* Event that occurs when the adapter state changes */
+#define CEC_EVENT_STATE_CHANGE		1
+/*
+ * This event is sent when messages are lost because the application
+ * didn't empty the message queue in time
+ */
+#define CEC_EVENT_LOST_MSGS		2
+
+#define CEC_EVENT_FL_INITIAL_STATE	(1 << 0)
+
+/**
+ * struct cec_event_state_change - used when the CEC adapter changes state.
+ * @phys_addr: the current physical address
+ * @log_addr_mask: the current logical address mask
+ */
+struct cec_event_state_change {
+	__u16 phys_addr;
+	__u16 log_addr_mask;
+};
+
+/**
+ * struct cec_event_lost_msgs - tells you how many messages were lost due.
+ * @lost_msgs: how many messages were lost.
+ */
+struct cec_event_lost_msgs {
+	__u32 lost_msgs;
+};
+
+/**
+ * struct cec_event - CEC event structure
+ * @ts: the timestamp of when the event was sent.
+ * @event: the event.
+ * array.
+ * @state_change: the event payload for CEC_EVENT_STATE_CHANGE.
+ * @lost_msgs: the event payload for CEC_EVENT_LOST_MSGS.
+ * @raw: array to pad the union.
+ */
+struct cec_event {
+	__u64 ts;
+	__u32 event;
+	__u32 flags;
+	union {
+		struct cec_event_state_change state_change;
+		struct cec_event_lost_msgs lost_msgs;
+		__u32 raw[16];
+	};
+};
+
+/* ioctls */
+
+/* Adapter capabilities */
+#define CEC_ADAP_G_CAPS		_IOWR('a',  0, struct cec_caps)
+
+/*
+ * phys_addr is either 0 (if this is the CEC root device)
+ * or a valid physical address obtained from the sink's EDID
+ * as read by this CEC device (if this is a source device)
+ * or a physical address obtained and modified from a sink
+ * EDID and used for a sink CEC device.
+ * If nothing is connected, then phys_addr is 0xffff.
+ * See HDMI 1.4b, section 8.7 (Physical Address).
+ *
+ * The CEC_ADAP_S_PHYS_ADDR ioctl may not be available if that is handled
+ * internally.
+ */
+#define CEC_ADAP_G_PHYS_ADDR	_IOR('a',  1, __u16)
+#define CEC_ADAP_S_PHYS_ADDR	_IOW('a',  2, __u16)
+
+/*
+ * Configure the CEC adapter. It sets the device type and which
+ * logical types it will try to claim. It will return which
+ * logical addresses it could actually claim.
+ * An error is returned if the adapter is disabled or if there
+ * is no physical address assigned.
+ */
+
+#define CEC_ADAP_G_LOG_ADDRS	_IOR('a',  3, struct cec_log_addrs)
+#define CEC_ADAP_S_LOG_ADDRS	_IOWR('a',  4, struct cec_log_addrs)
+
+/* Transmit/receive a CEC command */
+#define CEC_TRANSMIT		_IOWR('a',  5, struct cec_msg)
+#define CEC_RECEIVE		_IOWR('a',  6, struct cec_msg)
+
+/* Dequeue CEC events */
+#define CEC_DQEVENT		_IOWR('a',  7, struct cec_event)
+
+/*
+ * Get and set the message handling mode for this filehandle.
+ */
+#define CEC_G_MODE		_IOR('a',  8, __u32)
+#define CEC_S_MODE		_IOW('a',  9, __u32)
+
+/*
+ * The remainder of this header defines all CEC messages and operands.
+ * The format matters since it the cec-ctl utility parses it to generate
+ * code for implementing all these messages.
+ *
+ * Comments ending with 'Feature' group messages for each feature.
+ * If messages are part of multiple features, then the "Has also"
+ * comment is used to list the previously defined messages that are
+ * supported by the feature.
+ *
+ * Before operands are defined a comment is added that gives the
+ * name of the operand and in brackets the variable name of the
+ * corresponding argument in the cec-funcs.h function.
+ */
+
+/* Messages */
+
+/* One Touch Play Feature */
+#define CEC_MSG_ACTIVE_SOURCE				0x82
+#define CEC_MSG_IMAGE_VIEW_ON				0x04
+#define CEC_MSG_TEXT_VIEW_ON				0x0d
+
+
+/* Routing Control Feature */
+
+/*
+ * Has also:
+ *	CEC_MSG_ACTIVE_SOURCE
+ */
+
+#define CEC_MSG_INACTIVE_SOURCE				0x9d
+#define CEC_MSG_REQUEST_ACTIVE_SOURCE			0x85
+#define CEC_MSG_ROUTING_CHANGE				0x80
+#define CEC_MSG_ROUTING_INFORMATION			0x81
+#define CEC_MSG_SET_STREAM_PATH				0x86
+
+
+/* Standby Feature */
+#define CEC_MSG_STANDBY					0x36
+
+
+/* One Touch Record Feature */
+#define CEC_MSG_RECORD_OFF				0x0b
+#define CEC_MSG_RECORD_ON				0x09
+/* Record Source Type Operand (rec_src_type) */
+#define CEC_OP_RECORD_SRC_OWN				1
+#define CEC_OP_RECORD_SRC_DIGITAL			2
+#define CEC_OP_RECORD_SRC_ANALOG			3
+#define CEC_OP_RECORD_SRC_EXT_PLUG			4
+#define CEC_OP_RECORD_SRC_EXT_PHYS_ADDR			5
+/* Service Identification Method Operand (service_id_method) */
+#define CEC_OP_SERVICE_ID_METHOD_BY_DIG_ID		0
+#define CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL		1
+/* Digital Service Broadcast System Operand (dig_bcast_system) */
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_GEN	0x00
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_GEN	0x01
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_GEN		0x02
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_BS		0x08
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_CS		0x09
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_T		0x0a
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_CABLE	0x10
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_SAT	0x11
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_T		0x12
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_C		0x18
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_S		0x19
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_S2		0x1a
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_T		0x1b
+/* Analogue Broadcast Type Operand (ana_bcast_type) */
+#define CEC_OP_ANA_BCAST_TYPE_CABLE			0
+#define CEC_OP_ANA_BCAST_TYPE_SATELLITE			1
+#define CEC_OP_ANA_BCAST_TYPE_TERRESTRIAL		2
+/* Broadcast System Operand (bcast_system) */
+#define CEC_OP_BCAST_SYSTEM_PAL_BG			0x00
+#define CEC_OP_BCAST_SYSTEM_SECAM_LQ			0x01 /* SECAM L' */
+#define CEC_OP_BCAST_SYSTEM_PAL_M			0x02
+#define CEC_OP_BCAST_SYSTEM_NTSC_M			0x03
+#define CEC_OP_BCAST_SYSTEM_PAL_I			0x04
+#define CEC_OP_BCAST_SYSTEM_SECAM_DK			0x05
+#define CEC_OP_BCAST_SYSTEM_SECAM_BG			0x06
+#define CEC_OP_BCAST_SYSTEM_SECAM_L			0x07
+#define CEC_OP_BCAST_SYSTEM_PAL_DK			0x08
+#define CEC_OP_BCAST_SYSTEM_OTHER			0x1f
+/* Channel Number Format Operand (channel_number_fmt) */
+#define CEC_OP_CHANNEL_NUMBER_FMT_1_PART		0x01
+#define CEC_OP_CHANNEL_NUMBER_FMT_2_PART		0x02
+
+#define CEC_MSG_RECORD_STATUS				0x0a
+/* Record Status Operand (rec_status) */
+#define CEC_OP_RECORD_STATUS_CUR_SRC			0x01
+#define CEC_OP_RECORD_STATUS_DIG_SERVICE		0x02
+#define CEC_OP_RECORD_STATUS_ANA_SERVICE		0x03
+#define CEC_OP_RECORD_STATUS_EXT_INPUT			0x04
+#define CEC_OP_RECORD_STATUS_NO_DIG_SERVICE		0x05
+#define CEC_OP_RECORD_STATUS_NO_ANA_SERVICE		0x06
+#define CEC_OP_RECORD_STATUS_NO_SERVICE			0x07
+#define CEC_OP_RECORD_STATUS_INVALID_EXT_PLUG		0x09
+#define CEC_OP_RECORD_STATUS_INVALID_EXT_PHYS_ADDR	0x0a
+#define CEC_OP_RECORD_STATUS_UNSUP_CA			0x0b
+#define CEC_OP_RECORD_STATUS_NO_CA_ENTITLEMENTS		0x0c
+#define CEC_OP_RECORD_STATUS_CANT_COPY_SRC		0x0d
+#define CEC_OP_RECORD_STATUS_NO_MORE_COPIES		0x0e
+#define CEC_OP_RECORD_STATUS_NO_MEDIA			0x10
+#define CEC_OP_RECORD_STATUS_PLAYING			0x11
+#define CEC_OP_RECORD_STATUS_ALREADY_RECORDING		0x12
+#define CEC_OP_RECORD_STATUS_MEDIA_PROT			0x13
+#define CEC_OP_RECORD_STATUS_NO_SIGNAL			0x14
+#define CEC_OP_RECORD_STATUS_MEDIA_PROBLEM		0x15
+#define CEC_OP_RECORD_STATUS_NO_SPACE			0x16
+#define CEC_OP_RECORD_STATUS_PARENTAL_LOCK		0x17
+#define CEC_OP_RECORD_STATUS_TERMINATED_OK		0x1a
+#define CEC_OP_RECORD_STATUS_ALREADY_TERM		0x1b
+#define CEC_OP_RECORD_STATUS_OTHER			0x1f
+
+#define CEC_MSG_RECORD_TV_SCREEN			0x0f
+
+
+/* Timer Programming Feature */
+#define CEC_MSG_CLEAR_ANALOGUE_TIMER			0x33
+/* Recording Sequence Operand (recording_seq) */
+#define CEC_OP_REC_SEQ_SUNDAY				0x01
+#define CEC_OP_REC_SEQ_MONDAY				0x02
+#define CEC_OP_REC_SEQ_TUESDAY				0x04
+#define CEC_OP_REC_SEQ_WEDNESDAY			0x08
+#define CEC_OP_REC_SEQ_THURSDAY				0x10
+#define CEC_OP_REC_SEQ_FRIDAY				0x20
+#define CEC_OP_REC_SEQ_SATERDAY				0x40
+#define CEC_OP_REC_SEQ_ONCE_ONLY			0x00
+
+#define CEC_MSG_CLEAR_DIGITAL_TIMER			0x99
+
+#define CEC_MSG_CLEAR_EXT_TIMER				0xa1
+/* External Source Specifier Operand (ext_src_spec) */
+#define CEC_OP_EXT_SRC_PLUG				0x04
+#define CEC_OP_EXT_SRC_PHYS_ADDR			0x05
+
+#define CEC_MSG_SET_ANALOGUE_TIMER			0x34
+#define CEC_MSG_SET_DIGITAL_TIMER			0x97
+#define CEC_MSG_SET_EXT_TIMER				0xa2
+
+#define CEC_MSG_SET_TIMER_PROGRAM_TITLE			0x67
+#define CEC_MSG_TIMER_CLEARED_STATUS			0x43
+/* Timer Cleared Status Data Operand (timer_cleared_status) */
+#define CEC_OP_TIMER_CLR_STAT_RECORDING			0x00
+#define CEC_OP_TIMER_CLR_STAT_NO_MATCHING		0x01
+#define CEC_OP_TIMER_CLR_STAT_NO_INFO			0x02
+#define CEC_OP_TIMER_CLR_STAT_CLEARED			0x80
+
+#define CEC_MSG_TIMER_STATUS				0x35
+/* Timer Overlap Warning Operand (timer_overlap_warning) */
+#define CEC_OP_TIMER_OVERLAP_WARNING_NO_OVERLAP		0
+#define CEC_OP_TIMER_OVERLAP_WARNING_OVERLAP		1
+/* Media Info Operand (media_info) */
+#define CEC_OP_MEDIA_INFO_UNPROT_MEDIA			0
+#define CEC_OP_MEDIA_INFO_PROT_MEDIA			1
+#define CEC_OP_MEDIA_INFO_NO_MEDIA			2
+/* Programmed Indicator Operand (prog_indicator) */
+#define CEC_OP_PROG_IND_NOT_PROGRAMMED			0
+#define CEC_OP_PROG_IND_PROGRAMMED			1
+/* Programmed Info Operand (prog_info) */
+#define CEC_OP_PROG_INFO_ENOUGH_SPACE			0x08
+#define CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE		0x09
+#define CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE	0x0b
+#define CEC_OP_PROG_INFO_NONE_AVAILABLE			0x0a
+/* Not Programmed Error Info Operand (prog_error) */
+#define CEC_OP_PROG_ERROR_NO_FREE_TIMER			0x01
+#define CEC_OP_PROG_ERROR_DATE_OUT_OF_RANGE		0x02
+#define CEC_OP_PROG_ERROR_REC_SEQ_ERROR			0x03
+#define CEC_OP_PROG_ERROR_INV_EXT_PLUG			0x04
+#define CEC_OP_PROG_ERROR_INV_EXT_PHYS_ADDR		0x05
+#define CEC_OP_PROG_ERROR_CA_UNSUPP			0x06
+#define CEC_OP_PROG_ERROR_INSUF_CA_ENTITLEMENTS		0x07
+#define CEC_OP_PROG_ERROR_RESOLUTION_UNSUPP		0x08
+#define CEC_OP_PROG_ERROR_PARENTAL_LOCK			0x09
+#define CEC_OP_PROG_ERROR_CLOCK_FAILURE			0x0a
+#define CEC_OP_PROG_ERROR_DUPLICATE			0x0e
+
+
+/* System Information Feature */
+#define CEC_MSG_CEC_VERSION				0x9e
+/* CEC Version Operand (cec_version) */
+#define CEC_OP_CEC_VERSION_1_3A				4
+#define CEC_OP_CEC_VERSION_1_4				5
+#define CEC_OP_CEC_VERSION_2_0				6
+
+#define CEC_MSG_GET_CEC_VERSION				0x9f
+#define CEC_MSG_GIVE_PHYSICAL_ADDR			0x83
+#define CEC_MSG_GET_MENU_LANGUAGE			0x91
+#define CEC_MSG_REPORT_PHYSICAL_ADDR			0x84
+/* Primary Device Type Operand (prim_devtype) */
+#define CEC_OP_PRIM_DEVTYPE_TV				0
+#define CEC_OP_PRIM_DEVTYPE_RECORD			1
+#define CEC_OP_PRIM_DEVTYPE_TUNER			3
+#define CEC_OP_PRIM_DEVTYPE_PLAYBACK			4
+#define CEC_OP_PRIM_DEVTYPE_AUDIOSYSTEM			5
+#define CEC_OP_PRIM_DEVTYPE_SWITCH			6
+#define CEC_OP_PRIM_DEVTYPE_PROCESSOR			7
+
+#define CEC_MSG_SET_MENU_LANGUAGE			0x32
+#define CEC_MSG_REPORT_FEATURES				0xa6	/* HDMI 2.0 */
+/* All Device Types Operand (all_device_types) */
+#define CEC_OP_ALL_DEVTYPE_TV				0x80
+#define CEC_OP_ALL_DEVTYPE_RECORD			0x40
+#define CEC_OP_ALL_DEVTYPE_TUNER			0x20
+#define CEC_OP_ALL_DEVTYPE_PLAYBACK			0x10
+#define CEC_OP_ALL_DEVTYPE_AUDIOSYSTEM			0x08
+#define CEC_OP_ALL_DEVTYPE_SWITCH			0x04
+/*
+ * And if you wondering what happened to PROCESSOR devices: those should
+ * be mapped to a SWITCH.
+ */
+
+/* Valid for RC Profile and Device Feature operands */
+#define CEC_OP_FEAT_EXT					0x80	/* Extension bit */
+/* RC Profile Operand (rc_profile) */
+#define CEC_OP_FEAT_RC_TV_PROFILE_NONE			0x00
+#define CEC_OP_FEAT_RC_TV_PROFILE_1			0x02
+#define CEC_OP_FEAT_RC_TV_PROFILE_2			0x06
+#define CEC_OP_FEAT_RC_TV_PROFILE_3			0x0a
+#define CEC_OP_FEAT_RC_TV_PROFILE_4			0x0e
+#define CEC_OP_FEAT_RC_SRC_HAS_DEV_ROOT_MENU		0x50
+#define CEC_OP_FEAT_RC_SRC_HAS_DEV_SETUP_MENU		0x48
+#define CEC_OP_FEAT_RC_SRC_HAS_CONTENTS_MENU		0x44
+#define CEC_OP_FEAT_RC_SRC_HAS_MEDIA_TOP_MENU		0x42
+#define CEC_OP_FEAT_RC_SRC_HAS_MEDIA_CONTEXT_MENU	0x41
+/* Device Feature Operand (dev_features) */
+#define CEC_OP_FEAT_DEV_HAS_RECORD_TV_SCREEN		0x40
+#define CEC_OP_FEAT_DEV_HAS_SET_OSD_STRING		0x20
+#define CEC_OP_FEAT_DEV_HAS_DECK_CONTROL		0x10
+#define CEC_OP_FEAT_DEV_HAS_SET_AUDIO_RATE		0x08
+#define CEC_OP_FEAT_DEV_SINK_HAS_ARC_TX			0x04
+#define CEC_OP_FEAT_DEV_SOURCE_HAS_ARC_RX		0x02
+
+#define CEC_MSG_GIVE_FEATURES				0xa5	/* HDMI 2.0 */
+
+
+/* Deck Control Feature */
+#define CEC_MSG_DECK_CONTROL				0x42
+/* Deck Control Mode Operand (deck_control_mode) */
+#define CEC_OP_DECK_CTL_MODE_SKIP_FWD			1
+#define CEC_OP_DECK_CTL_MODE_SKIP_REV			2
+#define CEC_OP_DECK_CTL_MODE_STOP			3
+#define CEC_OP_DECK_CTL_MODE_EJECT			4
+
+#define CEC_MSG_DECK_STATUS				0x1b
+/* Deck Info Operand (deck_info) */
+#define CEC_OP_DECK_INFO_PLAY				0x11
+#define CEC_OP_DECK_INFO_RECORD				0x12
+#define CEC_OP_DECK_INFO_PLAY_REV			0x13
+#define CEC_OP_DECK_INFO_STILL				0x14
+#define CEC_OP_DECK_INFO_SLOW				0x15
+#define CEC_OP_DECK_INFO_SLOW_REV			0x16
+#define CEC_OP_DECK_INFO_FAST_FWD			0x17
+#define CEC_OP_DECK_INFO_FAST_REV			0x18
+#define CEC_OP_DECK_INFO_NO_MEDIA			0x19
+#define CEC_OP_DECK_INFO_STOP				0x1a
+#define CEC_OP_DECK_INFO_SKIP_FWD			0x1b
+#define CEC_OP_DECK_INFO_SKIP_REV			0x1c
+#define CEC_OP_DECK_INFO_INDEX_SEARCH_FWD		0x1d
+#define CEC_OP_DECK_INFO_INDEX_SEARCH_REV		0x1e
+#define CEC_OP_DECK_INFO_OTHER				0x1f
+
+#define CEC_MSG_GIVE_DECK_STATUS			0x1a
+/* Status Request Operand (status_req) */
+#define CEC_OP_STATUS_REQ_ON				1
+#define CEC_OP_STATUS_REQ_OFF				2
+#define CEC_OP_STATUS_REQ_ONCE				3
+
+#define CEC_MSG_PLAY					0x41
+/* Play Mode Operand (play_mode) */
+#define CEC_OP_PLAY_MODE_PLAY_FWD			0x24
+#define CEC_OP_PLAY_MODE_PLAY_REV			0x20
+#define CEC_OP_PLAY_MODE_PLAY_STILL			0x25
+#define CEC_OP_PLAY_MODE_PLAY_FAST_FWD_MIN		0x05
+#define CEC_OP_PLAY_MODE_PLAY_FAST_FWD_MED		0x06
+#define CEC_OP_PLAY_MODE_PLAY_FAST_FWD_MAX		0x07
+#define CEC_OP_PLAY_MODE_PLAY_FAST_REV_MIN		0x09
+#define CEC_OP_PLAY_MODE_PLAY_FAST_REV_MED		0x0a
+#define CEC_OP_PLAY_MODE_PLAY_FAST_REV_MAX		0x0b
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_FWD_MIN		0x15
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_FWD_MED		0x16
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_FWD_MAX		0x17
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_REV_MIN		0x19
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_REV_MED		0x1a
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_REV_MAX		0x1b
+
+
+/* Tuner Control Feature */
+#define CEC_MSG_GIVE_TUNER_DEVICE_STATUS		0x08
+#define CEC_MSG_SELECT_ANALOGUE_SERVICE			0x92
+#define CEC_MSG_SELECT_DIGITAL_SERVICE			0x93
+#define CEC_MSG_TUNER_DEVICE_STATUS			0x07
+/* Recording Flag Operand (rec_flag) */
+#define CEC_OP_REC_FLAG_USED				0
+#define CEC_OP_REC_FLAG_NOT_USED			1
+/* Tuner Display Info Operand (tuner_display_info) */
+#define CEC_OP_TUNER_DISPLAY_INFO_DIGITAL		0
+#define CEC_OP_TUNER_DISPLAY_INFO_NONE			1
+#define CEC_OP_TUNER_DISPLAY_INFO_ANALOGUE		2
+
+#define CEC_MSG_TUNER_STEP_DECREMENT			0x06
+#define CEC_MSG_TUNER_STEP_INCREMENT			0x05
+
+
+/* Vendor Specific Commands Feature */
+
+/*
+ * Has also:
+ *	CEC_MSG_CEC_VERSION
+ *	CEC_MSG_GET_CEC_VERSION
+ */
+#define CEC_MSG_DEVICE_VENDOR_ID			0x87
+#define CEC_MSG_GIVE_DEVICE_VENDOR_ID			0x8c
+#define CEC_MSG_VENDOR_COMMAND				0x89
+#define CEC_MSG_VENDOR_COMMAND_WITH_ID			0xa0
+#define CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN		0x8a
+#define CEC_MSG_VENDOR_REMOTE_BUTTON_UP			0x8b
+
+
+/* OSD Display Feature */
+#define CEC_MSG_SET_OSD_STRING				0x64
+/* Display Control Operand (disp_ctl) */
+#define CEC_OP_DISP_CTL_DEFAULT				0x00
+#define CEC_OP_DISP_CTL_UNTIL_CLEARED			0x40
+#define CEC_OP_DISP_CTL_CLEAR				0x80
+
+
+/* Device OSD Transfer Feature */
+#define CEC_MSG_GIVE_OSD_NAME				0x46
+#define CEC_MSG_SET_OSD_NAME				0x47
+
+
+/* Device Menu Control Feature */
+#define CEC_MSG_MENU_REQUEST				0x8d
+/* Menu Request Type Operand (menu_req) */
+#define CEC_OP_MENU_REQUEST_ACTIVATE			0x00
+#define CEC_OP_MENU_REQUEST_DEACTIVATE			0x01
+#define CEC_OP_MENU_REQUEST_QUERY			0x02
+
+#define CEC_MSG_MENU_STATUS				0x8e
+/* Menu State Operand (menu_state) */
+#define CEC_OP_MENU_STATE_ACTIVATED			0x00
+#define CEC_OP_MENU_STATE_DEACTIVATED			0x01
+
+#define CEC_MSG_USER_CONTROL_PRESSED			0x44
+/* UI Broadcast Type Operand (ui_bcast_type) */
+#define CEC_OP_UI_BCAST_TYPE_TOGGLE_ALL			0x00
+#define CEC_OP_UI_BCAST_TYPE_TOGGLE_DIG_ANA		0x01
+#define CEC_OP_UI_BCAST_TYPE_ANALOGUE			0x10
+#define CEC_OP_UI_BCAST_TYPE_ANALOGUE_T			0x20
+#define CEC_OP_UI_BCAST_TYPE_ANALOGUE_CABLE		0x30
+#define CEC_OP_UI_BCAST_TYPE_ANALOGUE_SAT		0x40
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL			0x50
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_T			0x60
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_CABLE		0x70
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_SAT		0x80
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_COM_SAT		0x90
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_COM_SAT2		0x91
+#define CEC_OP_UI_BCAST_TYPE_IP				0xa0
+/* UI Sound Presentation Control Operand (ui_snd_pres_ctl) */
+#define CEC_OP_UI_SND_PRES_CTL_DUAL_MONO		0x10
+#define CEC_OP_UI_SND_PRES_CTL_KARAOKE			0x20
+#define CEC_OP_UI_SND_PRES_CTL_DOWNMIX			0x80
+#define CEC_OP_UI_SND_PRES_CTL_REVERB			0x90
+#define CEC_OP_UI_SND_PRES_CTL_EQUALIZER		0xa0
+#define CEC_OP_UI_SND_PRES_CTL_BASS_UP			0xb1
+#define CEC_OP_UI_SND_PRES_CTL_BASS_NEUTRAL		0xb2
+#define CEC_OP_UI_SND_PRES_CTL_BASS_DOWN		0xb3
+#define CEC_OP_UI_SND_PRES_CTL_TREBLE_UP		0xc1
+#define CEC_OP_UI_SND_PRES_CTL_TREBLE_NEUTRAL		0xc2
+#define CEC_OP_UI_SND_PRES_CTL_TREBLE_DOWN		0xc3
+
+#define CEC_MSG_USER_CONTROL_RELEASED			0x45
+
+
+/* Remote Control Passthrough Feature */
+
+/*
+ * Has also:
+ *	CEC_MSG_USER_CONTROL_PRESSED
+ *	CEC_MSG_USER_CONTROL_RELEASED
+ */
+
+
+/* Power Status Feature */
+#define CEC_MSG_GIVE_DEVICE_POWER_STATUS		0x8f
+#define CEC_MSG_REPORT_POWER_STATUS			0x90
+/* Power Status Operand (pwr_state) */
+#define CEC_OP_POWER_STATUS_ON				0
+#define CEC_OP_POWER_STATUS_STANDBY			1
+#define CEC_OP_POWER_STATUS_TO_ON			2
+#define CEC_OP_POWER_STATUS_TO_STANDBY			3
+
+
+/* General Protocol Messages */
+#define CEC_MSG_FEATURE_ABORT				0x00
+/* Abort Reason Operand (reason) */
+#define CEC_OP_ABORT_UNRECOGNIZED_OP			0
+#define CEC_OP_ABORT_INCORRECT_MODE			1
+#define CEC_OP_ABORT_NO_SOURCE				2
+#define CEC_OP_ABORT_INVALID_OP				3
+#define CEC_OP_ABORT_REFUSED				4
+#define CEC_OP_ABORT_UNDETERMINED			5
+
+#define CEC_MSG_ABORT					0xff
+
+
+/* System Audio Control Feature */
+
+/*
+ * Has also:
+ *	CEC_MSG_USER_CONTROL_PRESSED
+ *	CEC_MSG_USER_CONTROL_RELEASED
+ */
+#define CEC_MSG_GIVE_AUDIO_STATUS			0x71
+#define CEC_MSG_GIVE_SYSTEM_AUDIO_MODE_STATUS		0x7d
+#define CEC_MSG_REPORT_AUDIO_STATUS			0x7a
+/* Audio Mute Status Operand (aud_mute_status) */
+#define CEC_OP_AUD_MUTE_STATUS_OFF			0
+#define CEC_OP_AUD_MUTE_STATUS_ON			1
+
+#define CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR		0xa3
+#define CEC_MSG_REQUEST_SHORT_AUDIO_DESCRIPTOR		0xa4
+#define CEC_MSG_SET_SYSTEM_AUDIO_MODE			0x72
+/* System Audio Status Operand (sys_aud_status) */
+#define CEC_OP_SYS_AUD_STATUS_OFF			0
+#define CEC_OP_SYS_AUD_STATUS_ON			1
+
+#define CEC_MSG_SYSTEM_AUDIO_MODE_REQUEST		0x70
+#define CEC_MSG_SYSTEM_AUDIO_MODE_STATUS		0x7e
+/* Audio Format ID Operand (audio_format_id) */
+#define CEC_OP_AUD_FMT_ID_CEA861			0
+#define CEC_OP_AUD_FMT_ID_CEA861_CXT			1
+
+
+/* Audio Rate Control Feature */
+#define CEC_MSG_SET_AUDIO_RATE				0x9a
+/* Audio Rate Operand (audio_rate) */
+#define CEC_OP_AUD_RATE_OFF				0
+#define CEC_OP_AUD_RATE_WIDE_STD			1
+#define CEC_OP_AUD_RATE_WIDE_FAST			2
+#define CEC_OP_AUD_RATE_WIDE_SLOW			3
+#define CEC_OP_AUD_RATE_NARROW_STD			4
+#define CEC_OP_AUD_RATE_NARROW_FAST			5
+#define CEC_OP_AUD_RATE_NARROW_SLOW			6
+
+
+/* Audio Return Channel Control Feature */
+#define CEC_MSG_INITIATE_ARC				0xc0
+#define CEC_MSG_REPORT_ARC_INITIATED			0xc1
+#define CEC_MSG_REPORT_ARC_TERMINATED			0xc2
+#define CEC_MSG_REQUEST_ARC_INITIATION			0xc3
+#define CEC_MSG_REQUEST_ARC_TERMINATION			0xc4
+#define CEC_MSG_TERMINATE_ARC				0xc5
+
+
+/* Dynamic Audio Lipsync Feature */
+/* Only for CEC 2.0 and up */
+#define CEC_MSG_REQUEST_CURRENT_LATENCY			0xa7
+#define CEC_MSG_REPORT_CURRENT_LATENCY			0xa8
+/* Low Latency Mode Operand (low_latency_mode) */
+#define CEC_OP_LOW_LATENCY_MODE_OFF			0
+#define CEC_OP_LOW_LATENCY_MODE_ON			1
+/* Audio Output Compensated Operand (audio_out_compensated) */
+#define CEC_OP_AUD_OUT_COMPENSATED_NA			0
+#define CEC_OP_AUD_OUT_COMPENSATED_DELAY		1
+#define CEC_OP_AUD_OUT_COMPENSATED_NO_DELAY		2
+#define CEC_OP_AUD_OUT_COMPENSATED_PARTIAL_DELAY	3
+
+
+/* Capability Discovery and Control Feature */
+#define CEC_MSG_CDC_MESSAGE				0xf8
+/* Ethernet-over-HDMI: nobody ever does this... */
+#define CEC_MSG_CDC_HEC_INQUIRE_STATE			0x00
+#define CEC_MSG_CDC_HEC_REPORT_STATE			0x01
+/* HEC Functionality State Operand (hec_func_state) */
+#define CEC_OP_HEC_FUNC_STATE_NOT_SUPPORTED		0
+#define CEC_OP_HEC_FUNC_STATE_INACTIVE			1
+#define CEC_OP_HEC_FUNC_STATE_ACTIVE			2
+#define CEC_OP_HEC_FUNC_STATE_ACTIVATION_FIELD		3
+/* Host Functionality State Operand (host_func_state) */
+#define CEC_OP_HOST_FUNC_STATE_NOT_SUPPORTED		0
+#define CEC_OP_HOST_FUNC_STATE_INACTIVE			1
+#define CEC_OP_HOST_FUNC_STATE_ACTIVE			2
+/* ENC Functionality State Operand (enc_func_state) */
+#define CEC_OP_ENC_FUNC_STATE_EXT_CON_NOT_SUPPORTED	0
+#define CEC_OP_ENC_FUNC_STATE_EXT_CON_INACTIVE		1
+#define CEC_OP_ENC_FUNC_STATE_EXT_CON_ACTIVE		2
+/* CDC Error Code Operand (cdc_errcode) */
+#define CEC_OP_CDC_ERROR_CODE_NONE			0
+#define CEC_OP_CDC_ERROR_CODE_CAP_UNSUPPORTED		1
+#define CEC_OP_CDC_ERROR_CODE_WRONG_STATE		2
+#define CEC_OP_CDC_ERROR_CODE_OTHER			3
+/* HEC Support Operand (hec_support) */
+#define CEC_OP_HEC_SUPPORT_NO				0
+#define CEC_OP_HEC_SUPPORT_YES				1
+/* HEC Activation Operand (hec_activation) */
+#define CEC_OP_HEC_ACTIVATION_ON			0
+#define CEC_OP_HEC_ACTIVATION_OFF			1
+
+#define CEC_MSG_CDC_HEC_SET_STATE_ADJACENT		0x02
+#define CEC_MSG_CDC_HEC_SET_STATE			0x03
+/* HEC Set State Operand (hec_set_state) */
+#define CEC_OP_HEC_SET_STATE_DEACTIVATE			0
+#define CEC_OP_HEC_SET_STATE_ACTIVATE			1
+
+#define CEC_MSG_CDC_HEC_REQUEST_DEACTIVATION		0x04
+#define CEC_MSG_CDC_HEC_NOTIFY_ALIVE			0x05
+#define CEC_MSG_CDC_HEC_DISCOVER			0x06
+/* Hotplug Detect messages */
+#define CEC_MSG_CDC_HPD_SET_STATE			0x10
+/* HPD State Operand (hpd_state) */
+#define CEC_OP_HPD_STATE_CP_EDID_DISABLE		0
+#define CEC_OP_HPD_STATE_CP_EDID_ENABLE			1
+#define CEC_OP_HPD_STATE_CP_EDID_DISABLE_ENABLE		2
+#define CEC_OP_HPD_STATE_EDID_DISABLE			3
+#define CEC_OP_HPD_STATE_EDID_ENABLE			4
+#define CEC_OP_HPD_STATE_EDID_DISABLE_ENABLE		5
+#define CEC_MSG_CDC_HPD_REPORT_STATE			0x11
+/* HPD Error Code Operand (hpd_error) */
+#define CEC_OP_HPD_ERROR_NONE				0
+#define CEC_OP_HPD_ERROR_INITIATOR_NOT_CAPABLE		1
+#define CEC_OP_HPD_ERROR_INITIATOR_WRONG_STATE		2
+#define CEC_OP_HPD_ERROR_OTHER				3
+#define CEC_OP_HPD_ERROR_NONE_NO_VIDEO			4
+
+/* End of Messages */
+
+/* Helper functions to identify the 'special' CEC devices */
+
+static inline bool cec_is_2nd_tv(const struct cec_log_addrs *las)
+{
+	/*
+	 * It is a second TV if the logical address is 14 or 15 and the
+	 * primary device type is a TV.
+	 */
+	return las->num_log_addrs &&
+	       las->log_addr[0] >= CEC_LOG_ADDR_SPECIFIC &&
+	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_TV;
+}
+
+static inline bool cec_is_processor(const struct cec_log_addrs *las)
+{
+	/*
+	 * It is a processor if the logical address is 12-15 and the
+	 * primary device type is a Processor.
+	 */
+	return las->num_log_addrs &&
+	       las->log_addr[0] >= CEC_LOG_ADDR_BACKUP_1 &&
+	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_PROCESSOR;
+}
+
+static inline bool cec_is_switch(const struct cec_log_addrs *las)
+{
+	/*
+	 * It is a switch if the logical address is 15 and the
+	 * primary device type is a Switch and the CDC-Only flag is not set.
+	 */
+	return las->num_log_addrs == 1 &&
+	       las->log_addr[0] == CEC_LOG_ADDR_UNREGISTERED &&
+	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_SWITCH &&
+	       !(las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY);
+}
+
+static inline bool cec_is_cdc_only(const struct cec_log_addrs *las)
+{
+	/*
+	 * It is a CDC-only device if the logical address is 15 and the
+	 * primary device type is a Switch and the CDC-Only flag is set.
+	 */
+	return las->num_log_addrs == 1 &&
+	       las->log_addr[0] == CEC_LOG_ADDR_UNREGISTERED &&
+	       las->primary_device_type[0] == CEC_OP_PRIM_DEVTYPE_SWITCH &&
+	       (las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY);
+}
+
+#endif
-- 
cgit v1.2.3


From 217f6974368188fd8bd7804bf5a036aa5762c5e4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Nov 2016 10:15:11 -0800
Subject: net: busy-poll: allow preemption in sk_busy_loop()

After commit 4cd13c21b207 ("softirq: Let ksoftirqd do its job"),
sk_busy_loop() needs a bit of care :
softirqs might be delayed since we do not allow preemption yet.

This patch adds preemptiom points in sk_busy_loop(),
and makes sure no unnecessary cache line dirtying
or atomic operations are done while looping.

A new flag is added into napi->state : NAPI_STATE_IN_BUSY_POLL

This prevents napi_complete_done() from clearing NAPIF_STATE_SCHED,
so that sk_busy_loop() does not have to grab it again.

Similarly, netpoll_poll_lock() is done one time.

This gives about 10 to 20 % improvement in various busy polling
tests, especially when many threads are busy polling in
configurations with large number of NIC queues.

This should allow experimenting with bigger delays without
hurting overall latencies.

Tested:
 On a 40Gb mlx4 NIC, 32 RX/TX queues.

 echo 70 >/proc/sys/net/core/busy_read
 for i in `seq 1 40`; do echo -n $i: ; ./super_netperf $i -H lpaa24 -t UDP_RR -- -N -n; done

    Before:      After:
 1:   90072   92819
 2:  157289  184007
 3:  235772  213504
 4:  344074  357513
 5:  394755  458267
 6:  461151  487819
 7:  549116  625963
 8:  544423  716219
 9:  720460  738446
10:  794686  837612
11:  915998  923960
12:  937507  925107
13: 1019677  971506
14: 1046831 1113650
15: 1114154 1148902
16: 1105221 1179263
17: 1266552 1299585
18: 1258454 1383817
19: 1341453 1312194
20: 1363557 1488487
21: 1387979 1501004
22: 1417552 1601683
23: 1550049 1642002
24: 1568876 1601915
25: 1560239 1683607
26: 1640207 1745211
27: 1706540 1723574
28: 1638518 1722036
29: 1734309 1757447
30: 1782007 1855436
31: 1724806 1888539
32: 1717716 1944297
33: 1778716 1869118
34: 1805738 1983466
35: 1815694 2020758
36: 1893059 2035632
37: 1843406 2034653
38: 1888830 2086580
39: 1972827 2143567
40: 1877729 2181851

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Adam Belay <abelay@google.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Cc: Yuval Mintz <Yuval.Mintz@cavium.com>
Cc: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  10 +++++
 net/core/dev.c            | 102 +++++++++++++++++++++++++++++++++++++---------
 2 files changed, 92 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 86bacf6a64f0..e71de66e3792 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -334,6 +334,16 @@ enum {
 	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
 	NAPI_STATE_HASHED,	/* In NAPI hash (busy polling possible) */
 	NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
+	NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+};
+
+enum {
+	NAPIF_STATE_SCHED	 = (1UL << NAPI_STATE_SCHED),
+	NAPIF_STATE_DISABLE	 = (1UL << NAPI_STATE_DISABLE),
+	NAPIF_STATE_NPSVC	 = (1UL << NAPI_STATE_NPSVC),
+	NAPIF_STATE_HASHED	 = (1UL << NAPI_STATE_HASHED),
+	NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
+	NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
 };
 
 enum gro_result {
diff --git a/net/core/dev.c b/net/core/dev.c
index 6deba68ad9e4..369dcc8efc01 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4902,6 +4902,12 @@ void __napi_complete(struct napi_struct *n)
 {
 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
 
+	/* Some drivers call us directly, instead of calling
+	 * napi_complete_done().
+	 */
+	if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state)))
+		return;
+
 	list_del_init(&n->poll_list);
 	smp_mb__before_atomic();
 	clear_bit(NAPI_STATE_SCHED, &n->state);
@@ -4913,10 +4919,13 @@ void napi_complete_done(struct napi_struct *n, int work_done)
 	unsigned long flags;
 
 	/*
-	 * don't let napi dequeue from the cpu poll list
-	 * just in case its running on a different cpu
+	 * 1) Don't let napi dequeue from the cpu poll list
+	 *    just in case its running on a different cpu.
+	 * 2) If we are busy polling, do nothing here, we have
+	 *    the guarantee we will be called later.
 	 */
-	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
+	if (unlikely(n->state & (NAPIF_STATE_NPSVC |
+				 NAPIF_STATE_IN_BUSY_POLL)))
 		return;
 
 	if (n->gro_list) {
@@ -4956,13 +4965,41 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
 }
 
 #if defined(CONFIG_NET_RX_BUSY_POLL)
+
 #define BUSY_POLL_BUDGET 8
+
+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
+{
+	int rc;
+
+	clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
+
+	local_bh_disable();
+
+	/* All we really want here is to re-enable device interrupts.
+	 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
+	 */
+	rc = napi->poll(napi, BUSY_POLL_BUDGET);
+	netpoll_poll_unlock(have_poll_lock);
+	if (rc == BUSY_POLL_BUDGET)
+		__napi_schedule(napi);
+	local_bh_enable();
+	if (local_softirq_pending())
+		do_softirq();
+}
+
 bool sk_busy_loop(struct sock *sk, int nonblock)
 {
 	unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+	int (*napi_poll)(struct napi_struct *napi, int budget);
 	int (*busy_poll)(struct napi_struct *dev);
+	void *have_poll_lock = NULL;
 	struct napi_struct *napi;
-	int rc = false;
+	int rc;
+
+restart:
+	rc = false;
+	napi_poll = NULL;
 
 	rcu_read_lock();
 
@@ -4973,24 +5010,33 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
 	/* Note: ndo_busy_poll method is optional in linux-4.5 */
 	busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
 
-	do {
+	preempt_disable();
+	for (;;) {
 		rc = 0;
 		local_bh_disable();
 		if (busy_poll) {
 			rc = busy_poll(napi);
-		} else if (napi_schedule_prep(napi)) {
-			void *have = netpoll_poll_lock(napi);
-
-			if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
-				rc = napi->poll(napi, BUSY_POLL_BUDGET);
-				trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
-				if (rc == BUSY_POLL_BUDGET) {
-					napi_complete_done(napi, rc);
-					napi_schedule(napi);
-				}
-			}
-			netpoll_poll_unlock(have);
+			goto count;
 		}
+		if (!napi_poll) {
+			unsigned long val = READ_ONCE(napi->state);
+
+			/* If multiple threads are competing for this napi,
+			 * we avoid dirtying napi->state as much as we can.
+			 */
+			if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
+				   NAPIF_STATE_IN_BUSY_POLL))
+				goto count;
+			if (cmpxchg(&napi->state, val,
+				    val | NAPIF_STATE_IN_BUSY_POLL |
+					  NAPIF_STATE_SCHED) != val)
+				goto count;
+			have_poll_lock = netpoll_poll_lock(napi);
+			napi_poll = napi->poll;
+		}
+		rc = napi_poll(napi, BUSY_POLL_BUDGET);
+		trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+count:
 		if (rc > 0)
 			__NET_ADD_STATS(sock_net(sk),
 					LINUX_MIB_BUSYPOLLRXPACKETS, rc);
@@ -4999,10 +5045,26 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
 		if (rc == LL_FLUSH_FAILED)
 			break; /* permanent failure */
 
-		cpu_relax();
-	} while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
-		 !need_resched() && !busy_loop_timeout(end_time));
+		if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
+		    busy_loop_timeout(end_time))
+			break;
 
+		if (unlikely(need_resched())) {
+			if (napi_poll)
+				busy_poll_stop(napi, have_poll_lock);
+			preempt_enable();
+			rcu_read_unlock();
+			cond_resched();
+			rc = !skb_queue_empty(&sk->sk_receive_queue);
+			if (rc || busy_loop_timeout(end_time))
+				return rc;
+			goto restart;
+		}
+		cpu_relax_lowlatency();
+	}
+	if (napi_poll)
+		busy_poll_stop(napi, have_poll_lock);
+	preempt_enable();
 	rc = !skb_queue_empty(&sk->sk_receive_queue);
 out:
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 364b6055738b4c752c30ccaaf25c624e69d76195 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Nov 2016 10:15:13 -0800
Subject: net: busy-poll: return busypolling status to drivers

NAPI drivers use napi_complete_done() or napi_complete() when
they drained RX ring and right before re-enabling device interrupts.

In busy polling, we can avoid interrupts being delivered since
we are polling RX ring in a controlled loop.

Drivers can chose to use napi_complete_done() return value
to reduce interrupts overhead while busy polling is active.

This is optional, legacy drivers should work fine even
if not updated.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Adam Belay <abelay@google.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Cc: Yuval Mintz <Yuval.Mintz@cavium.com>
Cc: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 ++++---
 net/core/dev.c            | 10 ++++++----
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e71de66e3792..bcddf951ccee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -463,16 +463,17 @@ static inline bool napi_reschedule(struct napi_struct *napi)
 	return false;
 }
 
-void __napi_complete(struct napi_struct *n);
-void napi_complete_done(struct napi_struct *n, int work_done);
+bool __napi_complete(struct napi_struct *n);
+bool napi_complete_done(struct napi_struct *n, int work_done);
 /**
  *	napi_complete - NAPI processing complete
  *	@n: NAPI context
  *
  * Mark NAPI processing as complete.
  * Consider using napi_complete_done() instead.
+ * Return false if device should avoid rearming interrupts.
  */
-static inline void napi_complete(struct napi_struct *n)
+static inline bool napi_complete(struct napi_struct *n)
 {
 	return napi_complete_done(n, 0);
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 369dcc8efc01..edba9efeb2e9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4898,7 +4898,7 @@ void __napi_schedule_irqoff(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule_irqoff);
 
-void __napi_complete(struct napi_struct *n)
+bool __napi_complete(struct napi_struct *n)
 {
 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
 
@@ -4906,15 +4906,16 @@ void __napi_complete(struct napi_struct *n)
 	 * napi_complete_done().
 	 */
 	if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state)))
-		return;
+		return false;
 
 	list_del_init(&n->poll_list);
 	smp_mb__before_atomic();
 	clear_bit(NAPI_STATE_SCHED, &n->state);
+	return true;
 }
 EXPORT_SYMBOL(__napi_complete);
 
-void napi_complete_done(struct napi_struct *n, int work_done)
+bool napi_complete_done(struct napi_struct *n, int work_done)
 {
 	unsigned long flags;
 
@@ -4926,7 +4927,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
 	 */
 	if (unlikely(n->state & (NAPIF_STATE_NPSVC |
 				 NAPIF_STATE_IN_BUSY_POLL)))
-		return;
+		return false;
 
 	if (n->gro_list) {
 		unsigned long timeout = 0;
@@ -4948,6 +4949,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
 		__napi_complete(n);
 		local_irq_restore(flags);
 	}
+	return true;
 }
 EXPORT_SYMBOL(napi_complete_done);
 
-- 
cgit v1.2.3


From c139c6b7b75abfaa2246cbde2393656806d37b51 Mon Sep 17 00:00:00 2001
From: "Longpeng \\(Mike\\)" <longpeng2@huawei.com>
Date: Wed, 9 Nov 2016 10:51:23 +0800
Subject: ACPI / tebles: remove redundant declare of acpi_table_parse_entries()

This function declared twice, so remove one declaration of it.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 61a3d90f32b3..cb4616641235 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -220,10 +220,6 @@ int __init acpi_table_parse_entries(char *id, unsigned long table_size,
 			      int entry_id,
 			      acpi_tbl_entry_handler handler,
 			      unsigned int max_entries);
-int __init acpi_table_parse_entries(char *id, unsigned long table_size,
-			      int entry_id,
-			      acpi_tbl_entry_handler handler,
-			      unsigned int max_entries);
 int __init acpi_table_parse_entries_array(char *id, unsigned long table_size,
 			      struct acpi_subtable_proc *proc, int proc_num,
 			      unsigned int max_entries);
-- 
cgit v1.2.3


From 89c4b442b78bdba388337cc746fe63caba85f46c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 16 Nov 2016 14:54:50 -0800
Subject: netpoll: more efficient locking

Callers of netpoll_poll_lock() own NAPI_STATE_SCHED

Callers of netpoll_poll_unlock() have BH blocked between
the NAPI_STATE_SCHED being cleared and poll_lock is released.

We can avoid the spinlock which has no contention, and use cmpxchg()
on poll_owner which we need to set anyway.

This removes a possible lockdep violation after the cited commit,
since sk_busy_loop() re-enables BH before calling busy_poll_stop()

Fixes: 217f69743681 ("net: busy-poll: allow preemption in sk_busy_loop()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 include/linux/netpoll.h   | 13 +++++++------
 net/core/dev.c            |  1 -
 net/core/netpoll.c        |  6 +++---
 4 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bcddf951ccee..e84800edd249 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -316,7 +316,6 @@ struct napi_struct {
 	unsigned int		gro_count;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
-	spinlock_t		poll_lock;
 	int			poll_owner;
 #endif
 	struct net_device	*dev;
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index b25ee9ffdbe6..1828900c9411 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -78,8 +78,11 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi)
 	struct net_device *dev = napi->dev;
 
 	if (dev && dev->npinfo) {
-		spin_lock(&napi->poll_lock);
-		napi->poll_owner = smp_processor_id();
+		int owner = smp_processor_id();
+
+		while (cmpxchg(&napi->poll_owner, -1, owner) != -1)
+			cpu_relax();
+
 		return napi;
 	}
 	return NULL;
@@ -89,10 +92,8 @@ static inline void netpoll_poll_unlock(void *have)
 {
 	struct napi_struct *napi = have;
 
-	if (napi) {
-		napi->poll_owner = -1;
-		spin_unlock(&napi->poll_lock);
-	}
+	if (napi)
+		smp_store_release(&napi->poll_owner, -1);
 }
 
 static inline bool netpoll_tx_running(struct net_device *dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index edba9efeb2e9..f71b34ab57a5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5143,7 +5143,6 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	list_add(&napi->dev_list, &dev->napi_list);
 	napi->dev = dev;
 #ifdef CONFIG_NETPOLL
-	spin_lock_init(&napi->poll_lock);
 	napi->poll_owner = -1;
 #endif
 	set_bit(NAPI_STATE_SCHED, &napi->state);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 53599bd0c82d..9424673009c1 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -171,12 +171,12 @@ static void poll_one_napi(struct napi_struct *napi)
 static void poll_napi(struct net_device *dev)
 {
 	struct napi_struct *napi;
+	int cpu = smp_processor_id();
 
 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
-		if (napi->poll_owner != smp_processor_id() &&
-		    spin_trylock(&napi->poll_lock)) {
+		if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) {
 			poll_one_napi(napi);
-			spin_unlock(&napi->poll_lock);
+			smp_store_release(&napi->poll_owner, -1);
 		}
 	}
 }
-- 
cgit v1.2.3


From 6d0d287891a022ebba572327cbd70b5de69a63a2 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 16 Nov 2016 13:23:05 +0100
Subject: locking/core: Provide common cpu_relax_yield() definition

No need to duplicate the same define everywhere. Since
the only user is stop-machine and the only provider is
s390, we can use a default implementation of cpu_relax_yield()
in sched.h.

Suggested-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Noam Camus <noamc@ezchip.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: kvm@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-s390 <linux-s390@vger.kernel.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: sparclinux@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1479298985-191589-1-git-send-email-borntraeger@de.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/processor.h      | 1 -
 arch/arc/include/asm/processor.h        | 3 ---
 arch/arm/include/asm/processor.h        | 2 --
 arch/arm64/include/asm/processor.h      | 2 --
 arch/avr32/include/asm/processor.h      | 1 -
 arch/blackfin/include/asm/processor.h   | 1 -
 arch/c6x/include/asm/processor.h        | 1 -
 arch/cris/include/asm/processor.h       | 1 -
 arch/frv/include/asm/processor.h        | 1 -
 arch/h8300/include/asm/processor.h      | 1 -
 arch/hexagon/include/asm/processor.h    | 1 -
 arch/ia64/include/asm/processor.h       | 1 -
 arch/m32r/include/asm/processor.h       | 1 -
 arch/m68k/include/asm/processor.h       | 1 -
 arch/metag/include/asm/processor.h      | 1 -
 arch/microblaze/include/asm/processor.h | 1 -
 arch/mips/include/asm/processor.h       | 1 -
 arch/mn10300/include/asm/processor.h    | 1 -
 arch/nios2/include/asm/processor.h      | 1 -
 arch/openrisc/include/asm/processor.h   | 1 -
 arch/parisc/include/asm/processor.h     | 1 -
 arch/powerpc/include/asm/processor.h    | 2 --
 arch/s390/include/asm/processor.h       | 1 +
 arch/score/include/asm/processor.h      | 1 -
 arch/sh/include/asm/processor.h         | 1 -
 arch/sparc/include/asm/processor_32.h   | 1 -
 arch/sparc/include/asm/processor_64.h   | 1 -
 arch/tile/include/asm/processor.h       | 2 --
 arch/unicore32/include/asm/processor.h  | 1 -
 arch/x86/include/asm/processor.h        | 2 --
 arch/x86/um/asm/processor.h             | 1 -
 arch/xtensa/include/asm/processor.h     | 1 -
 include/linux/sched.h                   | 4 ++++
 33 files changed, 5 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 31e8dbeef10b..2fec2dee3020 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -58,7 +58,6 @@ unsigned long get_wchan(struct task_struct *p);
   ((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp)
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index d102a49ad8c5..6e1242da0159 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -60,15 +60,12 @@ struct task_struct;
 #ifndef CONFIG_EZNPS_MTM_EXT
 
 #define cpu_relax()		barrier()
-#define cpu_relax_yield()	cpu_relax()
 
 #else
 
 #define cpu_relax()     \
 	__asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
 
-#define cpu_relax_yield()	cpu_relax()
-
 #endif
 
 #define copy_segments(tsk, mm)      do { } while (0)
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 9e71c58bfa6f..c3d5fc124a05 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -82,8 +82,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define cpu_relax()			barrier()
 #endif
 
-#define cpu_relax_yield()  	              cpu_relax()
-
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 6132f64af68d..747c65a616ed 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -149,8 +149,6 @@ static inline void cpu_relax(void)
 	asm volatile("yield" ::: "memory");
 }
 
-#define cpu_relax_yield()                     cpu_relax()
-
 /* Thread switching */
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 					 struct task_struct *next);
diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h
index ee62365ad0da..972adcc1e8f4 100644
--- a/arch/avr32/include/asm/processor.h
+++ b/arch/avr32/include/asm/processor.h
@@ -92,7 +92,6 @@ extern struct avr32_cpuinfo boot_cpu_data;
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
 
 #define cpu_relax()		barrier()
-#define cpu_relax_yield()	cpu_relax()
 #define cpu_sync_pipeline()	asm volatile("sub pc, -2" : : : "memory")
 
 struct cpu_context {
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 57acfb1acfe5..85d4af97c986 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -92,7 +92,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define	KSTK_ESP(tsk)	((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
 #define cpu_relax()    	smp_mb()
-#define cpu_relax_yield()      cpu_relax()
 
 /* Get the Silicon Revision of the chip */
 static inline uint32_t __pure bfin_revid(void)
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index 1fd22e727e44..b9eb3da7f278 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -121,7 +121,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(task)	(task_pt_regs(task)->sp)
 
 #define cpu_relax()		do { } while (0)
-#define cpu_relax_yield()             cpu_relax()
 
 extern const struct seq_operations cpuinfo_op;
 
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index 1a578417397f..15b815df29c1 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -63,7 +63,6 @@ static inline void release_thread(struct task_struct *dead_task)
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
-#define cpu_relax_yield() cpu_relax()
 
 void default_idle(void);
 
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index c1e5f2a0ca31..ddaeb9cc9143 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -107,7 +107,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define	KSTK_ESP(tsk)	((tsk)->thread.frame0->sp)
 
 #define cpu_relax() barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /* data cache prefetch */
 #define ARCH_HAS_PREFETCH
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index 42d605369217..65132d7ae9e5 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -127,7 +127,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define	KSTK_ESP(tsk)	((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
 #define cpu_relax()    barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #define HARD_RESET_NOW() ({		\
 	local_irq_disable();		\
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 5d694cccc85a..45a825402f63 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -56,7 +56,6 @@ struct thread_struct {
 }
 
 #define cpu_relax() __vmyield()
-#define cpu_relax_yield() cpu_relax()
 
 /*
  * Decides where the kernel will search for a free chunk of vm space during
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 0c2c3b256f6c..03911a336406 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -547,7 +547,6 @@ ia64_eoi (void)
 }
 
 #define cpu_relax()	ia64_hint(ia64_hint_pause)
-#define cpu_relax_yield() cpu_relax()
 
 static inline int
 ia64_get_irr(unsigned int vector)
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 9b83a13290fc..5767367550c6 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -133,6 +133,5 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)  ((tsk)->thread.sp)
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #endif /* _ASM_M32R_PROCESSOR_H */
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index b0d044224ce5..f5f790c31bf8 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -156,6 +156,5 @@ unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk)	((struct pt_regs *) ((tsk)->thread.esp0))
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #endif
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
index ee302a637f24..ec6a49076980 100644
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@@ -152,7 +152,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define user_stack_pointer(regs)        ((regs)->ctx.AX[0].U0)
 
 #define cpu_relax()     barrier()
-#define cpu_relax_yield() cpu_relax()
 
 extern void setup_priv(void);
 
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 08ec1f725b7f..37ef196e4519 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -22,7 +22,6 @@
 extern const struct seq_operations cpuinfo_op;
 
 # define cpu_relax()		barrier()
-# define cpu_relax_yield() cpu_relax()
 
 #define task_pt_regs(tsk) \
 		(((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 8ea95e77ec9d..95b8c471f572 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -389,7 +389,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status)
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /*
  * Return_address is a replacement for __builtin_return_address(count)
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index d11397bc9429..18e17abf7664 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -69,7 +69,6 @@ extern void print_cpu_info(struct mn10300_cpuinfo *);
 extern void dodgy_tsc(void);
 
 #define cpu_relax() barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /*
  * User space process size: 1.75GB (default).
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index d32c17669123..3bbbc3d798e5 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -88,7 +88,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)	((tsk)->thread.kregs->sp)
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index 7f47fc7b7eae..a908e6c30a00 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -92,7 +92,6 @@ extern unsigned long thread_saved_pc(struct task_struct *t);
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_OPENRISC_PROCESSOR_H */
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index a4a07f4f7c20..ca40741378be 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -309,7 +309,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)	((tsk)->thread.regs.gr[30])
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /*
  * parisc_requires_coherency() is used to identify the combined VIPT/PIPT
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 5684e6872473..dac83fcb9445 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -404,8 +404,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
 #define cpu_relax()	barrier()
 #endif
 
-#define cpu_relax_yield() cpu_relax()
-
 /* Check that a certain kernel stack pointer is valid in task_struct p */
 int validate_sp(unsigned long sp, struct task_struct *p,
                        unsigned long nbytes);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 9e32f25bdea3..9d3a21aedc97 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -234,6 +234,7 @@ static inline unsigned short stap(void)
 /*
  * Give up the time slice of the virtual PU.
  */
+#define cpu_relax_yield cpu_relax_yield
 void cpu_relax_yield(void);
 
 #define cpu_relax() barrier()
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index a1e97c063ed4..d9a922d8711b 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -24,7 +24,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define current_text_addr() ({ __label__ _l; _l: &&_l; })
 
 #define cpu_relax()		barrier()
-#define cpu_relax_yield()	cpu_relax()
 #define release_thread(thread)	do {} while (0)
 
 /*
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 9454ff1ad0d9..5addd69f70ef 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -97,7 +97,6 @@ extern struct sh_cpuinfo cpu_data[];
 
 #define cpu_sleep()	__asm__ __volatile__ ("sleep" : : : "memory")
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 void default_idle(void);
 void stop_this_cpu(void *);
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index fc32b7311481..365d4cb267b4 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -119,7 +119,6 @@ extern struct task_struct *last_task_used_math;
 int do_mathemu(struct pt_regs *regs, struct task_struct *fpt);
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 extern void (*sparc_idle)(void);
 
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 12787dfeb11c..6448cfc8292f 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -216,7 +216,6 @@ unsigned long get_wchan(struct task_struct *task);
 				     "nop\n\t"				\
 				     ".previous"			\
 				     ::: "memory")
-#define cpu_relax_yield() cpu_relax()
 
 /* Prefetch support.  This is tuned for UltraSPARC-III and later.
  * UltraSPARC-I will treat these as nops, and UltraSPARC-II has
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index c1c228b16f7f..0bc9968b97a1 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -264,8 +264,6 @@ static inline void cpu_relax(void)
 	barrier();
 }
 
-#define cpu_relax_yield() cpu_relax()
-
 /* Info on this processor (see fs/proc/cpuinfo.c) */
 struct seq_operations;
 extern const struct seq_operations cpuinfo_op;
diff --git a/arch/unicore32/include/asm/processor.h b/arch/unicore32/include/asm/processor.h
index eeefe7c529eb..4eaa42167667 100644
--- a/arch/unicore32/include/asm/processor.h
+++ b/arch/unicore32/include/asm/processor.h
@@ -71,7 +71,6 @@ extern void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
 #define cpu_relax()			barrier()
-#define cpu_relax_yield()		cpu_relax()
 
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7513c996f673..c84605bb2a15 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -588,8 +588,6 @@ static __always_inline void cpu_relax(void)
 	rep_nop();
 }
 
-#define cpu_relax_yield() cpu_relax()
-
 /* Stop speculative execution and prefetching of modified code. */
 static inline void sync_core(void)
 {
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index b4bd63b22b7f..c77db2288982 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -26,7 +26,6 @@ static inline void rep_nop(void)
 }
 
 #define cpu_relax()		rep_nop()
-#define cpu_relax_yield()	cpu_relax()
 
 #define task_pt_regs(t) (&(t)->thread.regs)
 
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 7d8d6bececfc..86ffcd68e496 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -206,7 +206,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)		(task_pt_regs(tsk)->areg[1])
 
 #define cpu_relax()  barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /* Special register access. */
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b0ec92..c1aa3b02f6ac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2444,6 +2444,10 @@ static inline void calc_load_enter_idle(void) { }
 static inline void calc_load_exit_idle(void) { }
 #endif /* CONFIG_NO_HZ_COMMON */
 
+#ifndef cpu_relax_yield
+#define cpu_relax_yield() cpu_relax()
+#endif
+
 /*
  * Do not use outside of architecture code which knows its limitations.
  *
-- 
cgit v1.2.3


From da6f8ca13fb7d40f263ef647ebb41ff0a575d194 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Thu, 10 Nov 2016 16:17:49 +0100
Subject: dmaengine: pl08x: Add support for the DMA slave map

This patch adds support for the new channel request API introduced
in commit a8135d0d79e9d0ad3a4ff494fceeaae83
"dmaengine: core: Introduce new, universal API to request a channel".

param field of struct dma_slave_map type entries in the platform
data structure should be pointing to struct pl08x_channel_data
of related DMA channel.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Tested-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/amba-pl08x.c   | 11 +++++++++++
 include/linux/amba/pl08x.h |  4 ++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 939a7c31f760..0b7c6ce629a6 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1793,6 +1793,13 @@ bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
 }
 EXPORT_SYMBOL_GPL(pl08x_filter_id);
 
+static bool pl08x_filter_fn(struct dma_chan *chan, void *chan_id)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+
+	return plchan->cd == chan_id;
+}
+
 /*
  * Just check that the device is there and active
  * TODO: turn this bit on/off depending on the number of physical channels
@@ -2307,6 +2314,10 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 			ret = -EINVAL;
 			goto out_no_platdata;
 		}
+	} else {
+		pl08x->slave.filter.map = pl08x->pd->slave_map;
+		pl08x->slave.filter.mapcnt = pl08x->pd->slave_map_len;
+		pl08x->slave.filter.fn = pl08x_filter_fn;
 	}
 
 	/* By default, AHB1 only.  If dualmaster, from platform */
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 27e9ec8778eb..5308eae9ce35 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -84,6 +84,8 @@ struct pl08x_channel_data {
  * running any DMA transfer and multiplexing can be recycled
  * @lli_buses: buses which LLIs can be fetched from: PL08X_AHB1 | PL08X_AHB2
  * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2
+ * @slave_map: DMA slave matching table
+ * @slave_map_len: number of elements in @slave_map
  */
 struct pl08x_platform_data {
 	struct pl08x_channel_data *slave_channels;
@@ -93,6 +95,8 @@ struct pl08x_platform_data {
 	void (*put_xfer_signal)(const struct pl08x_channel_data *, int);
 	u8 lli_buses;
 	u8 mem_buses;
+	const struct dma_slave_map *slave_map;
+	int slave_map_len;
 };
 
 #ifdef CONFIG_AMBA_PL08X
-- 
cgit v1.2.3


From c41668ad5de939855636650d39e961893dc5d6fc Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Thu, 10 Nov 2016 16:17:52 +0100
Subject: ARM: s3c64xx: Drop unused DMA fields from struct s3c64xx_spi_csinfo

There is no drivers using those fields so remove them and
the remaining initializations.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Tested-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/plat-samsung/devs.c              | 24 ------------------------
 include/linux/platform_data/spi-s3c64xx.h |  3 ---
 2 files changed, 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index e93aa6734147..cf7b95fddbb3 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -1124,15 +1124,6 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
 	pd.num_cs = num_cs;
 	pd.src_clk_nr = src_clk_nr;
 	pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi0_cfg_gpio;
-	pd.dma_tx = (void *)DMACH_SPI0_TX;
-	pd.dma_rx = (void *)DMACH_SPI0_RX;
-#if defined(CONFIG_PL330_DMA)
-	pd.filter = pl330_filter;
-#elif defined(CONFIG_S3C64XX_PL080)
-	pd.filter = pl08x_filter_id;
-#elif defined(CONFIG_S3C24XX_DMAC)
-	pd.filter = s3c24xx_dma_filter;
-#endif
 
 	s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi0);
 }
@@ -1169,14 +1160,6 @@ void __init s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
 	pd.num_cs = num_cs;
 	pd.src_clk_nr = src_clk_nr;
 	pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi1_cfg_gpio;
-	pd.dma_tx = (void *)DMACH_SPI1_TX;
-	pd.dma_rx = (void *)DMACH_SPI1_RX;
-#if defined(CONFIG_PL330_DMA)
-	pd.filter = pl330_filter;
-#elif defined(CONFIG_S3C64XX_PL080)
-	pd.filter = pl08x_filter_id;
-#endif
-
 
 	s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi1);
 }
@@ -1213,13 +1196,6 @@ void __init s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
 	pd.num_cs = num_cs;
 	pd.src_clk_nr = src_clk_nr;
 	pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi2_cfg_gpio;
-	pd.dma_tx = (void *)DMACH_SPI2_TX;
-	pd.dma_rx = (void *)DMACH_SPI2_RX;
-#if defined(CONFIG_PL330_DMA)
-	pd.filter = pl330_filter;
-#elif defined(CONFIG_S3C64XX_PL080)
-	pd.filter = pl08x_filter_id;
-#endif
 
 	s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi2);
 }
diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h
index 5c1e21c87270..da79774078a7 100644
--- a/include/linux/platform_data/spi-s3c64xx.h
+++ b/include/linux/platform_data/spi-s3c64xx.h
@@ -40,9 +40,6 @@ struct s3c64xx_spi_info {
 	int num_cs;
 	bool no_cs;
 	int (*cfg_gpio)(void);
-	dma_filter_fn filter;
-	void *dma_tx;
-	void *dma_rx;
 };
 
 /**
-- 
cgit v1.2.3


From f97df70b1c879f764f88b25b0e67b03a5213968a Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Mon, 14 Nov 2016 11:12:56 +0000
Subject: xenfs: Use proc_create_mount_point() to create /proc/xen

Mounting proc in user namespace containers fails if the xenbus
filesystem is mounted on /proc/xen because this directory fails
the "permanently empty" test. proc_create_mount_point() exists
specifically to create such mountpoints in proc but is currently
proc-internal. Export this interface to modules, then use it in
xenbus when creating /proc/xen.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xenbus/xenbus_probe.c | 2 +-
 fs/proc/generic.c                 | 1 +
 fs/proc/internal.h                | 1 -
 include/linux/proc_fs.h           | 2 ++
 4 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 33a31cfef55d..b5c1dec4a7c2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -826,7 +826,7 @@ static int __init xenbus_init(void)
 	 * Create xenfs mountpoint in /proc for compatibility with
 	 * utilities that expect to find "xenbus" under "/proc/xen".
 	 */
-	proc_mkdir("xen", NULL);
+	proc_create_mount_point("xen");
 #endif
 
 out_error:
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5f2dc2032c79..7eb3cefcf2a3 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -479,6 +479,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
 	}
 	return ent;
 }
+EXPORT_SYMBOL(proc_create_mount_point);
 
 struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
 					struct proc_dir_entry *parent,
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5378441ec1b7..7de679572111 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -195,7 +195,6 @@ static inline bool is_empty_pde(const struct proc_dir_entry *pde)
 {
 	return S_ISDIR(pde->mode) && !pde->proc_iops;
 }
-struct proc_dir_entry *proc_create_mount_point(const char *name);
 
 /*
  * inode.c
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index b97bf2ef996e..8bd2f726436a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -21,6 +21,7 @@ extern struct proc_dir_entry *proc_mkdir_data(const char *, umode_t,
 					      struct proc_dir_entry *, void *);
 extern struct proc_dir_entry *proc_mkdir_mode(const char *, umode_t,
 					      struct proc_dir_entry *);
+struct proc_dir_entry *proc_create_mount_point(const char *name);
  
 extern struct proc_dir_entry *proc_create_data(const char *, umode_t,
 					       struct proc_dir_entry *,
@@ -56,6 +57,7 @@ static inline struct proc_dir_entry *proc_symlink(const char *name,
 		struct proc_dir_entry *parent,const char *dest) { return NULL;}
 static inline struct proc_dir_entry *proc_mkdir(const char *name,
 	struct proc_dir_entry *parent) {return NULL;}
+static inline struct proc_dir_entry *proc_create_mount_point(const char *name) { return NULL; }
 static inline struct proc_dir_entry *proc_mkdir_data(const char *name,
 	umode_t mode, struct proc_dir_entry *parent, void *data) { return NULL; }
 static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
-- 
cgit v1.2.3


From 298d4de1ed003ad91cb49e068d744db0343cacb6 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 7 Nov 2016 12:47:38 +0000
Subject: i2c: Match using traditional OF methods, then by vendor-less
 compatible strings

This function provides a single call for all I2C devices which need to
match firstly using traditional OF means i.e by of_node, then if that
fails we attempt to match using the supplied I2C client name with a
list of supplied compatible strings with the '<vendor>,' string
removed.  The latter is required due to the unruly naming conventions
used currently by I2C devices.

Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
[Kieran: Fix static inline usage on !CONFIG_OF]
Tested-by: Kieran Bingham <kieran@bingham.xyz>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Tested-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Kieran Bingham <kieran@bingham.xyz>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 16 ++++++++++++++++
 include/linux/i2c.h    | 12 ++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index c588a8504c99..2151d0b353b3 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1799,6 +1799,22 @@ i2c_of_match_device_sysfs(const struct of_device_id *matches,
 	return NULL;
 }
 
+const struct of_device_id
+*i2c_of_match_device(const struct of_device_id *matches,
+		     struct i2c_client *client)
+{
+	const struct of_device_id *match;
+
+	if (!(client && matches))
+		return NULL;
+
+	match = of_match_device(matches, &client->dev);
+	if (match)
+		return match;
+
+	return i2c_of_match_device_sysfs(matches, client);
+}
+EXPORT_SYMBOL_GPL(i2c_of_match_device);
 #else
 static void of_i2c_register_devices(struct i2c_adapter *adap) { }
 #endif /* CONFIG_OF */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 6422eef428c4..c0a4a12815aa 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -774,6 +774,10 @@ extern struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node)
 /* must call i2c_put_adapter() when done with returned i2c_adapter device */
 struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node);
 
+extern const struct of_device_id
+*i2c_of_match_device(const struct of_device_id *matches,
+		     struct i2c_client *client);
+
 #else
 
 static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
@@ -790,6 +794,14 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node
 {
 	return NULL;
 }
+
+static inline const struct of_device_id
+*i2c_of_match_device(const struct of_device_id *matches,
+		     struct i2c_client *client)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_OF */
 
 #if IS_ENABLED(CONFIG_ACPI)
-- 
cgit v1.2.3


From 5f441fcaa3ce54681923475cf0040216d190e646 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 7 Nov 2016 12:47:40 +0000
Subject: i2c: Export i2c_match_id() for direct use by device drivers

When there was no other way to match a I2C device to driver i2c_match_id()
was exclusively used.  However, now there are other types of tables which
are commonly supplied, matching on an i2c_device_id table is used less
frequently.  Instead of _always_ calling i2c_match_id() from within the
framework, we only need to do so from drivers which have no other way of
matching.  This patch makes i2c_match_id() available to the aforementioned
device drivers.

Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Tested-by: Kieran Bingham <kieran@bingham.xyz>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Tested-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Kieran Bingham <kieran@bingham.xyz>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 3 ++-
 include/linux/i2c.h    | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index a97f7a0b0d6e..52b2cd4ac8b2 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -676,7 +676,7 @@ static inline int i2c_acpi_install_space_handler(struct i2c_adapter *adapter)
 
 /* ------------------------------------------------------------------------- */
 
-static const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
+const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
 						const struct i2c_client *client)
 {
 	if (!(id && client))
@@ -689,6 +689,7 @@ static const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
 	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(i2c_match_id);
 
 static int i2c_device_match(struct device *dev, struct device_driver *drv)
 {
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index c0a4a12815aa..7e00efd6a62f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -243,6 +243,8 @@ struct i2c_client {
 
 extern struct i2c_client *i2c_verify_client(struct device *dev);
 extern struct i2c_adapter *i2c_verify_adapter(struct device *dev);
+extern const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
+					const struct i2c_client *client);
 
 static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj)
 {
-- 
cgit v1.2.3


From b8a1a4cd5a98a2adf8dfd6902cd98e57d910ee12 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 7 Nov 2016 12:47:41 +0000
Subject: i2c: Provide a temporary .probe_new() call-back type

This will aid the seamless removal of the current probe()'s, more
commonly unused than used second parameter.  Most I2C drivers can
simply switch over to the new interface, others which have DT
support can use its own matching instead and others can call
i2c_match_id() themselves.  This brings I2C's device probe method
into line with other similar interfaces in the kernel and prevents
the requirement to pass an i2c_device_id table.

Suggested-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
[Kieran: fix rebase conflicts and adapt for dev_pm_domain_{attach,detach}]
Tested-by: Kieran Bingham <kieran@bingham.xyz>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Tested-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Kieran Bingham <kieran@bingham.xyz>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 15 ++++++++++++---
 include/linux/i2c.h    |  8 +++++++-
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 52b2cd4ac8b2..8b93a262e237 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -924,8 +924,6 @@ static int i2c_device_probe(struct device *dev)
 	}
 
 	driver = to_i2c_driver(dev->driver);
-	if (!driver->probe)
-		return -EINVAL;
 
 	/*
 	 * An I2C ID table is not mandatory, if and only if, a suitable Device
@@ -967,7 +965,18 @@ static int i2c_device_probe(struct device *dev)
 	if (status == -EPROBE_DEFER)
 		goto err_clear_wakeup_irq;
 
-	status = driver->probe(client, i2c_match_id(driver->id_table, client));
+	/*
+	 * When there are no more users of probe(),
+	 * rename probe_new to probe.
+	 */
+	if (driver->probe_new)
+		status = driver->probe_new(client);
+	else if (driver->probe)
+		status = driver->probe(client,
+				       i2c_match_id(driver->id_table, client));
+	else
+		status = -EINVAL;
+
 	if (status)
 		goto err_detach_pm_domain;
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 7e00efd6a62f..82cf90945bb8 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -135,7 +135,8 @@ enum i2c_alert_protocol {
  * struct i2c_driver - represent an I2C device driver
  * @class: What kind of i2c device we instantiate (for detect)
  * @attach_adapter: Callback for bus addition (deprecated)
- * @probe: Callback for device binding
+ * @probe: Callback for device binding - soon to be deprecated
+ * @probe_new: New callback for device binding
  * @remove: Callback for device unbinding
  * @shutdown: Callback for device shutdown
  * @alert: Alert callback, for example for the SMBus alert protocol
@@ -178,6 +179,11 @@ struct i2c_driver {
 	int (*probe)(struct i2c_client *, const struct i2c_device_id *);
 	int (*remove)(struct i2c_client *);
 
+	/* New driver model interface to aid the seamless removal of the
+	 * current probe()'s, more commonly unused than used second parameter.
+	 */
+	int (*probe_new)(struct i2c_client *);
+
 	/* driver model interfaces that don't relate to enumeration  */
 	void (*shutdown)(struct i2c_client *);
 
-- 
cgit v1.2.3


From 7b96953bc640b6b25665fe17ffca4b668b371f14 Mon Sep 17 00:00:00 2001
From: Kirti Wankhede <kwankhede@nvidia.com>
Date: Thu, 17 Nov 2016 02:16:13 +0530
Subject: vfio: Mediated device Core driver

Design for Mediated Device Driver:
Main purpose of this driver is to provide a common interface for mediated
device management that can be used by different drivers of different
devices.

This module provides a generic interface to create the device, add it to
mediated bus, add device to IOMMU group and then add it to vfio group.

Below is the high Level block diagram, with Nvidia, Intel and IBM devices
as example, since these are the devices which are going to actively use
this module as of now.

 +---------------+
 |               |
 | +-----------+ |  mdev_register_driver() +--------------+
 | |           | +<------------------------+ __init()     |
 | |  mdev     | |                         |              |
 | |  bus      | +------------------------>+              |<-> VFIO user
 | |  driver   | |     probe()/remove()    | vfio_mdev.ko |    APIs
 | |           | |                         |              |
 | +-----------+ |                         +--------------+
 |               |
 |  MDEV CORE    |
 |   MODULE      |
 |   mdev.ko     |
 | +-----------+ |  mdev_register_device() +--------------+
 | |           | +<------------------------+              |
 | |           | |                         |  nvidia.ko   |<-> physical
 | |           | +------------------------>+              |    device
 | |           | |        callback         +--------------+
 | | Physical  | |
 | |  device   | |  mdev_register_device() +--------------+
 | | interface | |<------------------------+              |
 | |           | |                         |  i915.ko     |<-> physical
 | |           | +------------------------>+              |    device
 | |           | |        callback         +--------------+
 | |           | |
 | |           | |  mdev_register_device() +--------------+
 | |           | +<------------------------+              |
 | |           | |                         | ccw_device.ko|<-> physical
 | |           | +------------------------>+              |    device
 | |           | |        callback         +--------------+
 | +-----------+ |
 +---------------+

Core driver provides two types of registration interfaces:
1. Registration interface for mediated bus driver:

/**
  * struct mdev_driver - Mediated device's driver
  * @name: driver name
  * @probe: called when new device created
  * @remove:called when device removed
  * @driver:device driver structure
  *
  **/
struct mdev_driver {
         const char *name;
         int  (*probe)  (struct device *dev);
         void (*remove) (struct device *dev);
         struct device_driver    driver;
};

Mediated bus driver for mdev device should use this interface to register
and unregister with core driver respectively:

int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
void mdev_unregister_driver(struct mdev_driver *drv);

Mediated bus driver is responsible to add/delete mediated devices to/from
VFIO group when devices are bound and unbound to the driver.

2. Physical device driver interface
This interface provides vendor driver the set APIs to manage physical
device related work in its driver. APIs are :

* dev_attr_groups: attributes of the parent device.
* mdev_attr_groups: attributes of the mediated device.
* supported_type_groups: attributes to define supported type. This is
			 mandatory field.
* create: to allocate basic resources in vendor driver for a mediated
         device. This is mandatory to be provided by vendor driver.
* remove: to free resources in vendor driver when mediated device is
         destroyed. This is mandatory to be provided by vendor driver.
* open: open callback of mediated device
* release: release callback of mediated device
* read : read emulation callback.
* write: write emulation callback.
* ioctl: ioctl callback.
* mmap: mmap emulation callback.

Drivers should use these interfaces to register and unregister device to
mdev core driver respectively:

extern int  mdev_register_device(struct device *dev,
                                 const struct parent_ops *ops);
extern void mdev_unregister_device(struct device *dev);

There are no locks to serialize above callbacks in mdev driver and
vfio_mdev driver. If required, vendor driver can have locks to serialize
above APIs in their driver.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
Reviewed-by: Jike Song <jike.song@intel.com>
Reviewed-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/Kconfig             |   1 +
 drivers/vfio/Makefile            |   1 +
 drivers/vfio/mdev/Kconfig        |   9 +
 drivers/vfio/mdev/Makefile       |   4 +
 drivers/vfio/mdev/mdev_core.c    | 374 +++++++++++++++++++++++++++++++++++++++
 drivers/vfio/mdev/mdev_driver.c  | 119 +++++++++++++
 drivers/vfio/mdev/mdev_private.h |  41 +++++
 drivers/vfio/mdev/mdev_sysfs.c   | 286 ++++++++++++++++++++++++++++++
 include/linux/mdev.h             | 168 ++++++++++++++++++
 9 files changed, 1003 insertions(+)
 create mode 100644 drivers/vfio/mdev/Kconfig
 create mode 100644 drivers/vfio/mdev/Makefile
 create mode 100644 drivers/vfio/mdev/mdev_core.c
 create mode 100644 drivers/vfio/mdev/mdev_driver.c
 create mode 100644 drivers/vfio/mdev/mdev_private.h
 create mode 100644 drivers/vfio/mdev/mdev_sysfs.c
 create mode 100644 include/linux/mdev.h

(limited to 'include/linux')

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index da6e2ce77495..23eced02aaf6 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -48,4 +48,5 @@ menuconfig VFIO_NOIOMMU
 
 source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
+source "drivers/vfio/mdev/Kconfig"
 source "virt/lib/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 7b8a31f63fea..4a23c13b6be4 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
 obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
 obj-$(CONFIG_VFIO_PLATFORM) += platform/
+obj-$(CONFIG_VFIO_MDEV) += mdev/
diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
new file mode 100644
index 000000000000..258481d65ebd
--- /dev/null
+++ b/drivers/vfio/mdev/Kconfig
@@ -0,0 +1,9 @@
+
+config VFIO_MDEV
+	tristate "Mediated device driver framework"
+	depends on VFIO
+	default n
+	help
+	  Provides a framework to virtualize devices.
+
+	  If you don't know what do here, say N.
diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile
new file mode 100644
index 000000000000..31bc04801d94
--- /dev/null
+++ b/drivers/vfio/mdev/Makefile
@@ -0,0 +1,4 @@
+
+mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o
+
+obj-$(CONFIG_VFIO_MDEV) += mdev.o
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
new file mode 100644
index 000000000000..613e8a8a3b2a
--- /dev/null
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -0,0 +1,374 @@
+/*
+ * Mediated device Core Driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *             Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/sysfs.h>
+#include <linux/mdev.h>
+
+#include "mdev_private.h"
+
+#define DRIVER_VERSION		"0.1"
+#define DRIVER_AUTHOR		"NVIDIA Corporation"
+#define DRIVER_DESC		"Mediated device Core Driver"
+
+static LIST_HEAD(parent_list);
+static DEFINE_MUTEX(parent_list_lock);
+static struct class_compat *mdev_bus_compat_class;
+
+static int _find_mdev_device(struct device *dev, void *data)
+{
+	struct mdev_device *mdev;
+
+	if (!dev_is_mdev(dev))
+		return 0;
+
+	mdev = to_mdev_device(dev);
+
+	if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
+		return 1;
+
+	return 0;
+}
+
+static bool mdev_device_exist(struct parent_device *parent, uuid_le uuid)
+{
+	struct device *dev;
+
+	dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
+	if (dev) {
+		put_device(dev);
+		return true;
+	}
+
+	return false;
+}
+
+/* Should be called holding parent_list_lock */
+static struct parent_device *__find_parent_device(struct device *dev)
+{
+	struct parent_device *parent;
+
+	list_for_each_entry(parent, &parent_list, next) {
+		if (parent->dev == dev)
+			return parent;
+	}
+	return NULL;
+}
+
+static void mdev_release_parent(struct kref *kref)
+{
+	struct parent_device *parent = container_of(kref, struct parent_device,
+						    ref);
+	struct device *dev = parent->dev;
+
+	kfree(parent);
+	put_device(dev);
+}
+
+static
+inline struct parent_device *mdev_get_parent(struct parent_device *parent)
+{
+	if (parent)
+		kref_get(&parent->ref);
+
+	return parent;
+}
+
+static inline void mdev_put_parent(struct parent_device *parent)
+{
+	if (parent)
+		kref_put(&parent->ref, mdev_release_parent);
+}
+
+static int mdev_device_create_ops(struct kobject *kobj,
+				  struct mdev_device *mdev)
+{
+	struct parent_device *parent = mdev->parent;
+	int ret;
+
+	ret = parent->ops->create(kobj, mdev);
+	if (ret)
+		return ret;
+
+	ret = sysfs_create_groups(&mdev->dev.kobj,
+				  parent->ops->mdev_attr_groups);
+	if (ret)
+		parent->ops->remove(mdev);
+
+	return ret;
+}
+
+/*
+ * mdev_device_remove_ops gets called from sysfs's 'remove' and when parent
+ * device is being unregistered from mdev device framework.
+ * - 'force_remove' is set to 'false' when called from sysfs's 'remove' which
+ *   indicates that if the mdev device is active, used by VMM or userspace
+ *   application, vendor driver could return error then don't remove the device.
+ * - 'force_remove' is set to 'true' when called from mdev_unregister_device()
+ *   which indicate that parent device is being removed from mdev device
+ *   framework so remove mdev device forcefully.
+ */
+static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
+{
+	struct parent_device *parent = mdev->parent;
+	int ret;
+
+	/*
+	 * Vendor driver can return error if VMM or userspace application is
+	 * using this mdev device.
+	 */
+	ret = parent->ops->remove(mdev);
+	if (ret && !force_remove)
+		return -EBUSY;
+
+	sysfs_remove_groups(&mdev->dev.kobj, parent->ops->mdev_attr_groups);
+	return 0;
+}
+
+static int mdev_device_remove_cb(struct device *dev, void *data)
+{
+	if (!dev_is_mdev(dev))
+		return 0;
+
+	return mdev_device_remove(dev, data ? *(bool *)data : true);
+}
+
+/*
+ * mdev_register_device : Register a device
+ * @dev: device structure representing parent device.
+ * @ops: Parent device operation structure to be registered.
+ *
+ * Add device to list of registered parent devices.
+ * Returns a negative value on error, otherwise 0.
+ */
+int mdev_register_device(struct device *dev, const struct parent_ops *ops)
+{
+	int ret;
+	struct parent_device *parent;
+
+	/* check for mandatory ops */
+	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
+		return -EINVAL;
+
+	dev = get_device(dev);
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&parent_list_lock);
+
+	/* Check for duplicate */
+	parent = __find_parent_device(dev);
+	if (parent) {
+		ret = -EEXIST;
+		goto add_dev_err;
+	}
+
+	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
+	if (!parent) {
+		ret = -ENOMEM;
+		goto add_dev_err;
+	}
+
+	kref_init(&parent->ref);
+	mutex_init(&parent->lock);
+
+	parent->dev = dev;
+	parent->ops = ops;
+
+	if (!mdev_bus_compat_class) {
+		mdev_bus_compat_class = class_compat_register("mdev_bus");
+		if (!mdev_bus_compat_class) {
+			ret = -ENOMEM;
+			goto add_dev_err;
+		}
+	}
+
+	ret = parent_create_sysfs_files(parent);
+	if (ret)
+		goto add_dev_err;
+
+	ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
+	if (ret)
+		dev_warn(dev, "Failed to create compatibility class link\n");
+
+	list_add(&parent->next, &parent_list);
+	mutex_unlock(&parent_list_lock);
+
+	dev_info(dev, "MDEV: Registered\n");
+	return 0;
+
+add_dev_err:
+	mutex_unlock(&parent_list_lock);
+	if (parent)
+		mdev_put_parent(parent);
+	else
+		put_device(dev);
+	return ret;
+}
+EXPORT_SYMBOL(mdev_register_device);
+
+/*
+ * mdev_unregister_device : Unregister a parent device
+ * @dev: device structure representing parent device.
+ *
+ * Remove device from list of registered parent devices. Give a chance to free
+ * existing mediated devices for given device.
+ */
+
+void mdev_unregister_device(struct device *dev)
+{
+	struct parent_device *parent;
+	bool force_remove = true;
+
+	mutex_lock(&parent_list_lock);
+	parent = __find_parent_device(dev);
+
+	if (!parent) {
+		mutex_unlock(&parent_list_lock);
+		return;
+	}
+	dev_info(dev, "MDEV: Unregistering\n");
+
+	list_del(&parent->next);
+	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
+
+	device_for_each_child(dev, (void *)&force_remove,
+			      mdev_device_remove_cb);
+
+	parent_remove_sysfs_files(parent);
+
+	mutex_unlock(&parent_list_lock);
+	mdev_put_parent(parent);
+}
+EXPORT_SYMBOL(mdev_unregister_device);
+
+static void mdev_device_release(struct device *dev)
+{
+	struct mdev_device *mdev = to_mdev_device(dev);
+
+	dev_dbg(&mdev->dev, "MDEV: destroying\n");
+	kfree(mdev);
+}
+
+int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
+{
+	int ret;
+	struct mdev_device *mdev;
+	struct parent_device *parent;
+	struct mdev_type *type = to_mdev_type(kobj);
+
+	parent = mdev_get_parent(type->parent);
+	if (!parent)
+		return -EINVAL;
+
+	mutex_lock(&parent->lock);
+
+	/* Check for duplicate */
+	if (mdev_device_exist(parent, uuid)) {
+		ret = -EEXIST;
+		goto create_err;
+	}
+
+	mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+	if (!mdev) {
+		ret = -ENOMEM;
+		goto create_err;
+	}
+
+	memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
+	mdev->parent = parent;
+	kref_init(&mdev->ref);
+
+	mdev->dev.parent  = dev;
+	mdev->dev.bus     = &mdev_bus_type;
+	mdev->dev.release = mdev_device_release;
+	dev_set_name(&mdev->dev, "%pUl", uuid.b);
+
+	ret = device_register(&mdev->dev);
+	if (ret) {
+		put_device(&mdev->dev);
+		goto create_err;
+	}
+
+	ret = mdev_device_create_ops(kobj, mdev);
+	if (ret)
+		goto create_failed;
+
+	ret = mdev_create_sysfs_files(&mdev->dev, type);
+	if (ret) {
+		mdev_device_remove_ops(mdev, true);
+		goto create_failed;
+	}
+
+	mdev->type_kobj = kobj;
+	dev_dbg(&mdev->dev, "MDEV: created\n");
+
+	mutex_unlock(&parent->lock);
+	return ret;
+
+create_failed:
+	device_unregister(&mdev->dev);
+
+create_err:
+	mutex_unlock(&parent->lock);
+	mdev_put_parent(parent);
+	return ret;
+}
+
+int mdev_device_remove(struct device *dev, bool force_remove)
+{
+	struct mdev_device *mdev;
+	struct parent_device *parent;
+	struct mdev_type *type;
+	int ret;
+
+	mdev = to_mdev_device(dev);
+	type = to_mdev_type(mdev->type_kobj);
+	parent = mdev->parent;
+	mutex_lock(&parent->lock);
+
+	ret = mdev_device_remove_ops(mdev, force_remove);
+	if (ret) {
+		mutex_unlock(&parent->lock);
+		return ret;
+	}
+
+	mdev_remove_sysfs_files(dev, type);
+	device_unregister(dev);
+	mutex_unlock(&parent->lock);
+	mdev_put_parent(parent);
+	return ret;
+}
+
+static int __init mdev_init(void)
+{
+	return mdev_bus_register();
+}
+
+static void __exit mdev_exit(void)
+{
+	if (mdev_bus_compat_class)
+		class_compat_unregister(mdev_bus_compat_class);
+
+	mdev_bus_unregister();
+}
+
+module_init(mdev_init)
+module_exit(mdev_exit)
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
new file mode 100644
index 000000000000..6f0391f6f9b6
--- /dev/null
+++ b/drivers/vfio/mdev/mdev_driver.c
@@ -0,0 +1,119 @@
+/*
+ * MDEV driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *             Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/mdev.h>
+
+#include "mdev_private.h"
+
+static int mdev_attach_iommu(struct mdev_device *mdev)
+{
+	int ret;
+	struct iommu_group *group;
+
+	group = iommu_group_alloc();
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	ret = iommu_group_add_device(group, &mdev->dev);
+	if (!ret)
+		dev_info(&mdev->dev, "MDEV: group_id = %d\n",
+			 iommu_group_id(group));
+
+	iommu_group_put(group);
+	return ret;
+}
+
+static void mdev_detach_iommu(struct mdev_device *mdev)
+{
+	iommu_group_remove_device(&mdev->dev);
+	dev_info(&mdev->dev, "MDEV: detaching iommu\n");
+}
+
+static int mdev_probe(struct device *dev)
+{
+	struct mdev_driver *drv = to_mdev_driver(dev->driver);
+	struct mdev_device *mdev = to_mdev_device(dev);
+	int ret;
+
+	ret = mdev_attach_iommu(mdev);
+	if (ret)
+		return ret;
+
+	if (drv && drv->probe) {
+		ret = drv->probe(dev);
+		if (ret)
+			mdev_detach_iommu(mdev);
+	}
+
+	return ret;
+}
+
+static int mdev_remove(struct device *dev)
+{
+	struct mdev_driver *drv = to_mdev_driver(dev->driver);
+	struct mdev_device *mdev = to_mdev_device(dev);
+
+	if (drv && drv->remove)
+		drv->remove(dev);
+
+	mdev_detach_iommu(mdev);
+
+	return 0;
+}
+
+struct bus_type mdev_bus_type = {
+	.name		= "mdev",
+	.probe		= mdev_probe,
+	.remove		= mdev_remove,
+};
+EXPORT_SYMBOL_GPL(mdev_bus_type);
+
+/**
+ * mdev_register_driver - register a new MDEV driver
+ * @drv: the driver to register
+ * @owner: module owner of driver to be registered
+ *
+ * Returns a negative value on error, otherwise 0.
+ **/
+int mdev_register_driver(struct mdev_driver *drv, struct module *owner)
+{
+	/* initialize common driver fields */
+	drv->driver.name = drv->name;
+	drv->driver.bus = &mdev_bus_type;
+	drv->driver.owner = owner;
+
+	/* register with core */
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(mdev_register_driver);
+
+/*
+ * mdev_unregister_driver - unregister MDEV driver
+ * @drv: the driver to unregister
+ */
+void mdev_unregister_driver(struct mdev_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(mdev_unregister_driver);
+
+int mdev_bus_register(void)
+{
+	return bus_register(&mdev_bus_type);
+}
+
+void mdev_bus_unregister(void)
+{
+	bus_unregister(&mdev_bus_type);
+}
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
new file mode 100644
index 000000000000..d35097cbf3d7
--- /dev/null
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -0,0 +1,41 @@
+/*
+ * Mediated device interal definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *             Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MDEV_PRIVATE_H
+#define MDEV_PRIVATE_H
+
+int  mdev_bus_register(void);
+void mdev_bus_unregister(void);
+
+struct mdev_type {
+	struct kobject kobj;
+	struct kobject *devices_kobj;
+	struct parent_device *parent;
+	struct list_head next;
+	struct attribute_group *group;
+};
+
+#define to_mdev_type_attr(_attr)	\
+	container_of(_attr, struct mdev_type_attribute, attr)
+#define to_mdev_type(_kobj)		\
+	container_of(_kobj, struct mdev_type, kobj)
+
+int  parent_create_sysfs_files(struct parent_device *parent);
+void parent_remove_sysfs_files(struct parent_device *parent);
+
+int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type);
+void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
+
+int  mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid);
+int  mdev_device_remove(struct device *dev, bool force_remove);
+
+#endif /* MDEV_PRIVATE_H */
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
new file mode 100644
index 000000000000..1a53deb2ee10
--- /dev/null
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -0,0 +1,286 @@
+/*
+ * File attributes for Mediated devices
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *             Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/mdev.h>
+
+#include "mdev_private.h"
+
+/* Static functions */
+
+static ssize_t mdev_type_attr_show(struct kobject *kobj,
+				     struct attribute *__attr, char *buf)
+{
+	struct mdev_type_attribute *attr = to_mdev_type_attr(__attr);
+	struct mdev_type *type = to_mdev_type(kobj);
+	ssize_t ret = -EIO;
+
+	if (attr->show)
+		ret = attr->show(kobj, type->parent->dev, buf);
+	return ret;
+}
+
+static ssize_t mdev_type_attr_store(struct kobject *kobj,
+				      struct attribute *__attr,
+				      const char *buf, size_t count)
+{
+	struct mdev_type_attribute *attr = to_mdev_type_attr(__attr);
+	struct mdev_type *type = to_mdev_type(kobj);
+	ssize_t ret = -EIO;
+
+	if (attr->store)
+		ret = attr->store(&type->kobj, type->parent->dev, buf, count);
+	return ret;
+}
+
+static const struct sysfs_ops mdev_type_sysfs_ops = {
+	.show = mdev_type_attr_show,
+	.store = mdev_type_attr_store,
+};
+
+static ssize_t create_store(struct kobject *kobj, struct device *dev,
+			    const char *buf, size_t count)
+{
+	char *str;
+	uuid_le uuid;
+	int ret;
+
+	if ((count < UUID_STRING_LEN) || (count > UUID_STRING_LEN + 1))
+		return -EINVAL;
+
+	str = kstrndup(buf, count, GFP_KERNEL);
+	if (!str)
+		return -ENOMEM;
+
+	ret = uuid_le_to_bin(str, &uuid);
+	kfree(str);
+	if (ret)
+		return ret;
+
+	ret = mdev_device_create(kobj, dev, uuid);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+MDEV_TYPE_ATTR_WO(create);
+
+static void mdev_type_release(struct kobject *kobj)
+{
+	struct mdev_type *type = to_mdev_type(kobj);
+
+	pr_debug("Releasing group %s\n", kobj->name);
+	kfree(type);
+}
+
+static struct kobj_type mdev_type_ktype = {
+	.sysfs_ops = &mdev_type_sysfs_ops,
+	.release = mdev_type_release,
+};
+
+struct mdev_type *add_mdev_supported_type(struct parent_device *parent,
+					  struct attribute_group *group)
+{
+	struct mdev_type *type;
+	int ret;
+
+	if (!group->name) {
+		pr_err("%s: Type name empty!\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
+	type = kzalloc(sizeof(*type), GFP_KERNEL);
+	if (!type)
+		return ERR_PTR(-ENOMEM);
+
+	type->kobj.kset = parent->mdev_types_kset;
+
+	ret = kobject_init_and_add(&type->kobj, &mdev_type_ktype, NULL,
+				   "%s-%s", dev_driver_string(parent->dev),
+				   group->name);
+	if (ret) {
+		kfree(type);
+		return ERR_PTR(ret);
+	}
+
+	ret = sysfs_create_file(&type->kobj, &mdev_type_attr_create.attr);
+	if (ret)
+		goto attr_create_failed;
+
+	type->devices_kobj = kobject_create_and_add("devices", &type->kobj);
+	if (!type->devices_kobj) {
+		ret = -ENOMEM;
+		goto attr_devices_failed;
+	}
+
+	ret = sysfs_create_files(&type->kobj,
+				 (const struct attribute **)group->attrs);
+	if (ret) {
+		ret = -ENOMEM;
+		goto attrs_failed;
+	}
+
+	type->group = group;
+	type->parent = parent;
+	return type;
+
+attrs_failed:
+	kobject_put(type->devices_kobj);
+attr_devices_failed:
+	sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
+attr_create_failed:
+	kobject_del(&type->kobj);
+	kobject_put(&type->kobj);
+	return ERR_PTR(ret);
+}
+
+static void remove_mdev_supported_type(struct mdev_type *type)
+{
+	sysfs_remove_files(&type->kobj,
+			   (const struct attribute **)type->group->attrs);
+	kobject_put(type->devices_kobj);
+	sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
+	kobject_del(&type->kobj);
+	kobject_put(&type->kobj);
+}
+
+static int add_mdev_supported_type_groups(struct parent_device *parent)
+{
+	int i;
+
+	for (i = 0; parent->ops->supported_type_groups[i]; i++) {
+		struct mdev_type *type;
+
+		type = add_mdev_supported_type(parent,
+					parent->ops->supported_type_groups[i]);
+		if (IS_ERR(type)) {
+			struct mdev_type *ltype, *tmp;
+
+			list_for_each_entry_safe(ltype, tmp, &parent->type_list,
+						  next) {
+				list_del(&ltype->next);
+				remove_mdev_supported_type(ltype);
+			}
+			return PTR_ERR(type);
+		}
+		list_add(&type->next, &parent->type_list);
+	}
+	return 0;
+}
+
+/* mdev sysfs functions */
+void parent_remove_sysfs_files(struct parent_device *parent)
+{
+	struct mdev_type *type, *tmp;
+
+	list_for_each_entry_safe(type, tmp, &parent->type_list, next) {
+		list_del(&type->next);
+		remove_mdev_supported_type(type);
+	}
+
+	sysfs_remove_groups(&parent->dev->kobj, parent->ops->dev_attr_groups);
+	kset_unregister(parent->mdev_types_kset);
+}
+
+int parent_create_sysfs_files(struct parent_device *parent)
+{
+	int ret;
+
+	parent->mdev_types_kset = kset_create_and_add("mdev_supported_types",
+					       NULL, &parent->dev->kobj);
+
+	if (!parent->mdev_types_kset)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&parent->type_list);
+
+	ret = sysfs_create_groups(&parent->dev->kobj,
+				  parent->ops->dev_attr_groups);
+	if (ret)
+		goto create_err;
+
+	ret = add_mdev_supported_type_groups(parent);
+	if (ret)
+		sysfs_remove_groups(&parent->dev->kobj,
+				    parent->ops->dev_attr_groups);
+	else
+		return ret;
+
+create_err:
+	kset_unregister(parent->mdev_types_kset);
+	return ret;
+}
+
+static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val && device_remove_file_self(dev, attr)) {
+		int ret;
+
+		ret = mdev_device_remove(dev, false);
+		if (ret) {
+			device_create_file(dev, attr);
+			return ret;
+		}
+	}
+
+	return count;
+}
+
+static DEVICE_ATTR_WO(remove);
+
+static const struct attribute *mdev_device_attrs[] = {
+	&dev_attr_remove.attr,
+	NULL,
+};
+
+int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
+{
+	int ret;
+
+	ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
+	if (ret)
+		return ret;
+
+	ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
+	if (ret)
+		goto device_link_failed;
+
+	ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
+	if (ret)
+		goto type_link_failed;
+
+	return ret;
+
+type_link_failed:
+	sysfs_remove_link(type->devices_kobj, dev_name(dev));
+device_link_failed:
+	sysfs_remove_files(&dev->kobj, mdev_device_attrs);
+	return ret;
+}
+
+void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type)
+{
+	sysfs_remove_link(&dev->kobj, "mdev_type");
+	sysfs_remove_link(type->devices_kobj, dev_name(dev));
+	sysfs_remove_files(&dev->kobj, mdev_device_attrs);
+}
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
new file mode 100644
index 000000000000..ec819e9a115a
--- /dev/null
+++ b/include/linux/mdev.h
@@ -0,0 +1,168 @@
+/*
+ * Mediated device definition
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *             Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MDEV_H
+#define MDEV_H
+
+/* Parent device */
+struct parent_device {
+	struct device		*dev;
+	const struct parent_ops	*ops;
+
+	/* internal */
+	struct kref		ref;
+	struct mutex		lock;
+	struct list_head	next;
+	struct kset		*mdev_types_kset;
+	struct list_head	type_list;
+};
+
+/* Mediated device */
+struct mdev_device {
+	struct device		dev;
+	struct parent_device	*parent;
+	uuid_le			uuid;
+	void			*driver_data;
+
+	/* internal */
+	struct kref		ref;
+	struct list_head	next;
+	struct kobject		*type_kobj;
+};
+
+/**
+ * struct parent_ops - Structure to be registered for each parent device to
+ * register the device to mdev module.
+ *
+ * @owner:		The module owner.
+ * @dev_attr_groups:	Attributes of the parent device.
+ * @mdev_attr_groups:	Attributes of the mediated device.
+ * @supported_type_groups: Attributes to define supported types. It is mandatory
+ *			to provide supported types.
+ * @create:		Called to allocate basic resources in parent device's
+ *			driver for a particular mediated device. It is
+ *			mandatory to provide create ops.
+ *			@kobj: kobject of type for which 'create' is called.
+ *			@mdev: mdev_device structure on of mediated device
+ *			      that is being created
+ *			Returns integer: success (0) or error (< 0)
+ * @remove:		Called to free resources in parent device's driver for a
+ *			a mediated device. It is mandatory to provide 'remove'
+ *			ops.
+ *			@mdev: mdev_device device structure which is being
+ *			       destroyed
+ *			Returns integer: success (0) or error (< 0)
+ * @open:		Open mediated device.
+ *			@mdev: mediated device.
+ *			Returns integer: success (0) or error (< 0)
+ * @release:		release mediated device
+ *			@mdev: mediated device.
+ * @read:		Read emulation callback
+ *			@mdev: mediated device structure
+ *			@buf: read buffer
+ *			@count: number of bytes to read
+ *			@ppos: address.
+ *			Retuns number on bytes read on success or error.
+ * @write:		Write emulation callback
+ *			@mdev: mediated device structure
+ *			@buf: write buffer
+ *			@count: number of bytes to be written
+ *			@ppos: address.
+ *			Retuns number on bytes written on success or error.
+ * @ioctl:		IOCTL callback
+ *			@mdev: mediated device structure
+ *			@cmd: ioctl command
+ *			@arg: arguments to ioctl
+ * @mmap:		mmap callback
+ *			@mdev: mediated device structure
+ *			@vma: vma structure
+ * Parent device that support mediated device should be registered with mdev
+ * module with parent_ops structure.
+ **/
+
+struct parent_ops {
+	struct module   *owner;
+	const struct attribute_group **dev_attr_groups;
+	const struct attribute_group **mdev_attr_groups;
+	struct attribute_group **supported_type_groups;
+
+	int     (*create)(struct kobject *kobj, struct mdev_device *mdev);
+	int     (*remove)(struct mdev_device *mdev);
+	int     (*open)(struct mdev_device *mdev);
+	void    (*release)(struct mdev_device *mdev);
+	ssize_t (*read)(struct mdev_device *mdev, char __user *buf,
+			size_t count, loff_t *ppos);
+	ssize_t (*write)(struct mdev_device *mdev, const char __user *buf,
+			 size_t count, loff_t *ppos);
+	ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd,
+			 unsigned long arg);
+	int	(*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma);
+};
+
+/* interface for exporting mdev supported type attributes */
+struct mdev_type_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct kobject *kobj, struct device *dev, char *buf);
+	ssize_t (*store)(struct kobject *kobj, struct device *dev,
+			 const char *buf, size_t count);
+};
+
+#define MDEV_TYPE_ATTR(_name, _mode, _show, _store)		\
+struct mdev_type_attribute mdev_type_attr_##_name =		\
+	__ATTR(_name, _mode, _show, _store)
+#define MDEV_TYPE_ATTR_RW(_name) \
+	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RW(_name)
+#define MDEV_TYPE_ATTR_RO(_name) \
+	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name)
+#define MDEV_TYPE_ATTR_WO(_name) \
+	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name)
+
+/**
+ * struct mdev_driver - Mediated device driver
+ * @name: driver name
+ * @probe: called when new device created
+ * @remove: called when device removed
+ * @driver: device driver structure
+ *
+ **/
+struct mdev_driver {
+	const char *name;
+	int  (*probe)(struct device *dev);
+	void (*remove)(struct device *dev);
+	struct device_driver driver;
+};
+
+#define to_mdev_driver(drv)	container_of(drv, struct mdev_driver, driver)
+#define to_mdev_device(dev)	container_of(dev, struct mdev_device, dev)
+
+static inline void *mdev_get_drvdata(struct mdev_device *mdev)
+{
+	return mdev->driver_data;
+}
+
+static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data)
+{
+	mdev->driver_data = data;
+}
+
+extern struct bus_type mdev_bus_type;
+
+#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type)
+
+extern int  mdev_register_device(struct device *dev,
+				 const struct parent_ops *ops);
+extern void mdev_unregister_device(struct device *dev);
+
+extern int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
+extern void mdev_unregister_driver(struct mdev_driver *drv);
+
+#endif /* MDEV_H */
-- 
cgit v1.2.3


From 2169037dc322d8baa84d9bd4468995f818f25d82 Mon Sep 17 00:00:00 2001
From: Kirti Wankhede <kwankhede@nvidia.com>
Date: Thu, 17 Nov 2016 02:16:17 +0530
Subject: vfio iommu: Added pin and unpin callback functions to
 vfio_iommu_driver_ops

Added APIs for pining and unpining set of pages. These call back into
backend iommu module to actually pin and unpin pages.
Added two new callback functions to struct vfio_iommu_driver_ops. Backend
IOMMU module that supports pining and unpinning pages for mdev devices
should provide these functions.

Renamed static functions in vfio_type1_iommu.c to resolve conflicts

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
Reviewed-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c             | 102 ++++++++++++++++++++++++++++++++++++++++
 drivers/vfio/vfio_iommu_type1.c |  20 ++++----
 include/linux/vfio.h            |  13 ++++-
 3 files changed, 124 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 2e83bdf007fe..bd36c16b0ef2 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1799,6 +1799,108 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
 }
 EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
 
+
+/*
+ * Pin a set of guest PFNs and return their associated host PFNs for local
+ * domain only.
+ * @dev [in]     : device
+ * @user_pfn [in]: array of user/guest PFNs to be unpinned.
+ * @npage [in]   : count of elements in user_pfn array.  This count should not
+ *		   be greater VFIO_PIN_PAGES_MAX_ENTRIES.
+ * @prot [in]    : protection flags
+ * @phys_pfn[out]: array of host PFNs
+ * Return error or number of pages pinned.
+ */
+int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
+		   int prot, unsigned long *phys_pfn)
+{
+	struct vfio_container *container;
+	struct vfio_group *group;
+	struct vfio_iommu_driver *driver;
+	int ret;
+
+	if (!dev || !user_pfn || !phys_pfn || !npage)
+		return -EINVAL;
+
+	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
+		return -E2BIG;
+
+	group = vfio_group_get_from_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	ret = vfio_group_add_container_user(group);
+	if (ret)
+		goto err_pin_pages;
+
+	container = group->container;
+	down_read(&container->group_lock);
+
+	driver = container->iommu_driver;
+	if (likely(driver && driver->ops->pin_pages))
+		ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
+					     npage, prot, phys_pfn);
+	else
+		ret = -ENOTTY;
+
+	up_read(&container->group_lock);
+	vfio_group_try_dissolve_container(group);
+
+err_pin_pages:
+	vfio_group_put(group);
+	return ret;
+}
+EXPORT_SYMBOL(vfio_pin_pages);
+
+/*
+ * Unpin set of host PFNs for local domain only.
+ * @dev [in]     : device
+ * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
+ *		   PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ * @npage [in]   : count of elements in user_pfn array.  This count should not
+ *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ * Return error or number of pages unpinned.
+ */
+int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
+{
+	struct vfio_container *container;
+	struct vfio_group *group;
+	struct vfio_iommu_driver *driver;
+	int ret;
+
+	if (!dev || !user_pfn || !npage)
+		return -EINVAL;
+
+	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
+		return -E2BIG;
+
+	group = vfio_group_get_from_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	ret = vfio_group_add_container_user(group);
+	if (ret)
+		goto err_unpin_pages;
+
+	container = group->container;
+	down_read(&container->group_lock);
+
+	driver = container->iommu_driver;
+	if (likely(driver && driver->ops->unpin_pages))
+		ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
+					       npage);
+	else
+		ret = -ENOTTY;
+
+	up_read(&container->group_lock);
+	vfio_group_try_dissolve_container(group);
+
+err_unpin_pages:
+	vfio_group_put(group);
+	return ret;
+}
+EXPORT_SYMBOL(vfio_unpin_pages);
+
 /**
  * Module/class support
  */
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2ba19424e4a1..9f3d58d3dfaf 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -259,8 +259,8 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
  * the iommu can only map chunks of consecutive pfns anyway, so get the
  * first page and all consecutive pages with the same locking.
  */
-static long vfio_pin_pages(unsigned long vaddr, long npage,
-			   int prot, unsigned long *pfn_base)
+static long vfio_pin_pages_remote(unsigned long vaddr, long npage,
+				  int prot, unsigned long *pfn_base)
 {
 	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 	bool lock_cap = capable(CAP_IPC_LOCK);
@@ -318,8 +318,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
 	return i;
 }
 
-static long vfio_unpin_pages(unsigned long pfn, long npage,
-			     int prot, bool do_accounting)
+static long vfio_unpin_pages_remote(unsigned long pfn, long npage,
+				    int prot, bool do_accounting)
 {
 	unsigned long unlocked = 0;
 	long i;
@@ -382,9 +382,9 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
 		if (WARN_ON(!unmapped))
 			break;
 
-		unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
-					     unmapped >> PAGE_SHIFT,
-					     dma->prot, false);
+		unlocked += vfio_unpin_pages_remote(phys >> PAGE_SHIFT,
+						    unmapped >> PAGE_SHIFT,
+						    dma->prot, false);
 		iova += unmapped;
 
 		cond_resched();
@@ -613,8 +613,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
 
 	while (size) {
 		/* Pin a contiguous chunk of memory */
-		npage = vfio_pin_pages(vaddr + dma->size,
-				       size >> PAGE_SHIFT, prot, &pfn);
+		npage = vfio_pin_pages_remote(vaddr + dma->size,
+					      size >> PAGE_SHIFT, prot, &pfn);
 		if (npage <= 0) {
 			WARN_ON(!npage);
 			ret = (int)npage;
@@ -624,7 +624,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
 		/* Map it! */
 		ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
 		if (ret) {
-			vfio_unpin_pages(pfn, npage, prot, true);
+			vfio_unpin_pages_remote(pfn, npage, prot, true);
 			break;
 		}
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 0ecae0b1cd34..3c862a030029 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -75,7 +75,11 @@ struct vfio_iommu_driver_ops {
 					struct iommu_group *group);
 	void		(*detach_group)(void *iommu_data,
 					struct iommu_group *group);
-
+	int		(*pin_pages)(void *iommu_data, unsigned long *user_pfn,
+				     int npage, int prot,
+				     unsigned long *phys_pfn);
+	int		(*unpin_pages)(void *iommu_data,
+				       unsigned long *user_pfn, int npage);
 };
 
 extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
@@ -92,6 +96,13 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 
+#define VFIO_PIN_PAGES_MAX_ENTRIES	(PAGE_SIZE/sizeof(unsigned long))
+
+extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
+			  int npage, int prot, unsigned long *phys_pfn);
+extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
+			    int npage);
+
 /*
  * Sub-module helpers
  */
-- 
cgit v1.2.3


From c086de818dd81c3c2f7cecff23de6585b74340c0 Mon Sep 17 00:00:00 2001
From: Kirti Wankhede <kwankhede@nvidia.com>
Date: Thu, 17 Nov 2016 10:28:26 +0530
Subject: vfio iommu: Add blocking notifier to notify DMA_UNMAP

Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers
about DMA_UNMAP.
Exported two APIs vfio_register_notifier() and vfio_unregister_notifier().
Notifier should be registered, if external user wants to use
vfio_pin_pages()/vfio_unpin_pages() APIs to pin/unpin pages.
Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate
mappings.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c             | 73 ++++++++++++++++++++++++++++++++++++++++
 drivers/vfio/vfio_iommu_type1.c | 74 +++++++++++++++++++++++++++++++++--------
 include/linux/vfio.h            | 12 +++++++
 3 files changed, 146 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index bd36c16b0ef2..301eed07a0ab 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1901,6 +1901,79 @@ err_unpin_pages:
 }
 EXPORT_SYMBOL(vfio_unpin_pages);
 
+int vfio_register_notifier(struct device *dev, struct notifier_block *nb)
+{
+	struct vfio_container *container;
+	struct vfio_group *group;
+	struct vfio_iommu_driver *driver;
+	int ret;
+
+	if (!dev || !nb)
+		return -EINVAL;
+
+	group = vfio_group_get_from_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	ret = vfio_group_add_container_user(group);
+	if (ret)
+		goto err_register_nb;
+
+	container = group->container;
+	down_read(&container->group_lock);
+
+	driver = container->iommu_driver;
+	if (likely(driver && driver->ops->register_notifier))
+		ret = driver->ops->register_notifier(container->iommu_data, nb);
+	else
+		ret = -ENOTTY;
+
+	up_read(&container->group_lock);
+	vfio_group_try_dissolve_container(group);
+
+err_register_nb:
+	vfio_group_put(group);
+	return ret;
+}
+EXPORT_SYMBOL(vfio_register_notifier);
+
+int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb)
+{
+	struct vfio_container *container;
+	struct vfio_group *group;
+	struct vfio_iommu_driver *driver;
+	int ret;
+
+	if (!dev || !nb)
+		return -EINVAL;
+
+	group = vfio_group_get_from_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	ret = vfio_group_add_container_user(group);
+	if (ret)
+		goto err_unregister_nb;
+
+	container = group->container;
+	down_read(&container->group_lock);
+
+	driver = container->iommu_driver;
+	if (likely(driver && driver->ops->unregister_notifier))
+		ret = driver->ops->unregister_notifier(container->iommu_data,
+						       nb);
+	else
+		ret = -ENOTTY;
+
+	up_read(&container->group_lock);
+	vfio_group_try_dissolve_container(group);
+
+err_unregister_nb:
+	vfio_group_put(group);
+	return ret;
+}
+EXPORT_SYMBOL(vfio_unregister_notifier);
+
 /**
  * Module/class support
  */
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index db0901f8a149..51810a95416e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -38,6 +38,7 @@
 #include <linux/workqueue.h>
 #include <linux/pid_namespace.h>
 #include <linux/mdev.h>
+#include <linux/notifier.h>
 
 #define DRIVER_VERSION  "0.2"
 #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
@@ -60,6 +61,7 @@ struct vfio_iommu {
 	struct vfio_domain	*external_domain; /* domain for external user */
 	struct mutex		lock;
 	struct rb_root		dma_list;
+	struct blocking_notifier_head notifier;
 	bool			v2;
 	bool			nesting;
 };
@@ -561,7 +563,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
 
 	mutex_lock(&iommu->lock);
 
-	if (!iommu->external_domain) {
+	/* Fail if notifier list is empty */
+	if ((!iommu->external_domain) || (!iommu->notifier.head)) {
 		ret = -EINVAL;
 		goto pin_done;
 	}
@@ -776,9 +779,9 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 			     struct vfio_iommu_type1_dma_unmap *unmap)
 {
 	uint64_t mask;
-	struct vfio_dma *dma;
+	struct vfio_dma *dma, *dma_last = NULL;
 	size_t unmapped = 0;
-	int ret = 0;
+	int ret = 0, retries = 0;
 
 	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
 
@@ -788,7 +791,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 		return -EINVAL;
 
 	WARN_ON(mask & PAGE_MASK);
-
+again:
 	mutex_lock(&iommu->lock);
 
 	/*
@@ -844,6 +847,32 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 		 */
 		if (dma->task->mm != current->mm)
 			break;
+
+		if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
+			struct vfio_iommu_type1_dma_unmap nb_unmap;
+
+			if (dma_last == dma) {
+				BUG_ON(++retries > 10);
+			} else {
+				dma_last = dma;
+				retries = 0;
+			}
+
+			nb_unmap.iova = dma->iova;
+			nb_unmap.size = dma->size;
+
+			/*
+			 * Notify anyone (mdev vendor drivers) to invalidate and
+			 * unmap iovas within the range we're about to unmap.
+			 * Vendor drivers MUST unpin pages in response to an
+			 * invalidation.
+			 */
+			mutex_unlock(&iommu->lock);
+			blocking_notifier_call_chain(&iommu->notifier,
+						    VFIO_IOMMU_NOTIFY_DMA_UNMAP,
+						    &nb_unmap);
+			goto again;
+		}
 		unmapped += dma->size;
 		vfio_remove_dma(iommu, dma);
 	}
@@ -1419,6 +1448,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
 	INIT_LIST_HEAD(&iommu->domain_list);
 	iommu->dma_list = RB_ROOT;
 	mutex_init(&iommu->lock);
+	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
 
 	return iommu;
 }
@@ -1553,16 +1583,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
 	return -ENOTTY;
 }
 
+static int vfio_iommu_type1_register_notifier(void *iommu_data,
+					      struct notifier_block *nb)
+{
+	struct vfio_iommu *iommu = iommu_data;
+
+	return blocking_notifier_chain_register(&iommu->notifier, nb);
+}
+
+static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
+						struct notifier_block *nb)
+{
+	struct vfio_iommu *iommu = iommu_data;
+
+	return blocking_notifier_chain_unregister(&iommu->notifier, nb);
+}
+
 static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
-	.name		= "vfio-iommu-type1",
-	.owner		= THIS_MODULE,
-	.open		= vfio_iommu_type1_open,
-	.release	= vfio_iommu_type1_release,
-	.ioctl		= vfio_iommu_type1_ioctl,
-	.attach_group	= vfio_iommu_type1_attach_group,
-	.detach_group	= vfio_iommu_type1_detach_group,
-	.pin_pages	= vfio_iommu_type1_pin_pages,
-	.unpin_pages	= vfio_iommu_type1_unpin_pages,
+	.name			= "vfio-iommu-type1",
+	.owner			= THIS_MODULE,
+	.open			= vfio_iommu_type1_open,
+	.release		= vfio_iommu_type1_release,
+	.ioctl			= vfio_iommu_type1_ioctl,
+	.attach_group		= vfio_iommu_type1_attach_group,
+	.detach_group		= vfio_iommu_type1_detach_group,
+	.pin_pages		= vfio_iommu_type1_pin_pages,
+	.unpin_pages		= vfio_iommu_type1_unpin_pages,
+	.register_notifier	= vfio_iommu_type1_register_notifier,
+	.unregister_notifier	= vfio_iommu_type1_unregister_notifier,
 };
 
 static int __init vfio_iommu_type1_init(void)
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 3c862a030029..6ab13f7e2920 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -80,6 +80,10 @@ struct vfio_iommu_driver_ops {
 				     unsigned long *phys_pfn);
 	int		(*unpin_pages)(void *iommu_data,
 				       unsigned long *user_pfn, int npage);
+	int		(*register_notifier)(void *iommu_data,
+					     struct notifier_block *nb);
+	int		(*unregister_notifier)(void *iommu_data,
+					       struct notifier_block *nb);
 };
 
 extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
@@ -103,6 +107,14 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
 extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
 			    int npage);
 
+#define VFIO_IOMMU_NOTIFY_DMA_UNMAP	(1)
+
+extern int vfio_register_notifier(struct device *dev,
+				  struct notifier_block *nb);
+
+extern int vfio_unregister_notifier(struct device *dev,
+				    struct notifier_block *nb);
+
 /*
  * Sub-module helpers
  */
-- 
cgit v1.2.3


From b3c0a866f1692da2d1059dadd9c429ff5b364fc9 Mon Sep 17 00:00:00 2001
From: Kirti Wankhede <kwankhede@nvidia.com>
Date: Thu, 17 Nov 2016 02:16:25 +0530
Subject: vfio: Introduce common function to add capabilities

Vendor driver using mediated device framework should use
vfio_info_add_capability() to add capabilities.
Introduced this function to reduce code duplication in vendor drivers.

vfio_info_cap_shift() manipulated a data buffer to add an offset to each
element in a chain. This data buffer is documented in a uapi header.
Changing vfio_info_cap_shift symbol to be available to all drivers.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c  | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/vfio.h |  3 +++
 2 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 301eed07a0ab..bb3ddb76911f 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1797,8 +1797,66 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
 	for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset)
 		tmp->next += offset;
 }
-EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
+EXPORT_SYMBOL(vfio_info_cap_shift);
 
+static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
+{
+	struct vfio_info_cap_header *header;
+	struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
+	size_t size;
+
+	size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
+	header = vfio_info_cap_add(caps, size,
+				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
+	if (IS_ERR(header))
+		return PTR_ERR(header);
+
+	sparse_cap = container_of(header,
+			struct vfio_region_info_cap_sparse_mmap, header);
+	sparse_cap->nr_areas = sparse->nr_areas;
+	memcpy(sparse_cap->areas, sparse->areas,
+	       sparse->nr_areas * sizeof(*sparse->areas));
+	return 0;
+}
+
+static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
+{
+	struct vfio_info_cap_header *header;
+	struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
+
+	header = vfio_info_cap_add(caps, sizeof(*cap),
+				   VFIO_REGION_INFO_CAP_TYPE, 1);
+	if (IS_ERR(header))
+		return PTR_ERR(header);
+
+	type_cap = container_of(header, struct vfio_region_info_cap_type,
+				header);
+	type_cap->type = cap->type;
+	type_cap->subtype = cap->subtype;
+	return 0;
+}
+
+int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
+			     void *cap_type)
+{
+	int ret = -EINVAL;
+
+	if (!cap_type)
+		return 0;
+
+	switch (cap_type_id) {
+	case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
+		ret = sparse_mmap_cap(caps, cap_type);
+		break;
+
+	case VFIO_REGION_INFO_CAP_TYPE:
+		ret = region_type_cap(caps, cap_type);
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(vfio_info_add_capability);
 
 /*
  * Pin a set of guest PFNs and return their associated host PFNs for local
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 6ab13f7e2920..e26f7ccab564 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -126,6 +126,9 @@ extern struct vfio_info_cap_header *vfio_info_cap_add(
 		struct vfio_info_cap *caps, size_t size, u16 id, u16 version);
 extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
 
+extern int vfio_info_add_capability(struct vfio_info_cap *caps,
+				    int cap_type_id, void *cap_type);
+
 struct pci_dev;
 #ifdef CONFIG_EEH
 extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
-- 
cgit v1.2.3


From c747f08aea847c8c0704acf9375ca83c4800f6c1 Mon Sep 17 00:00:00 2001
From: Kirti Wankhede <kwankhede@nvidia.com>
Date: Thu, 17 Nov 2016 02:16:27 +0530
Subject: vfio: Introduce vfio_set_irqs_validate_and_prepare()

Vendor driver using mediated device framework would use same mechnism to
validate and prepare IRQs. Introducing this function to reduce code
replication in multiple drivers.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c  | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/vfio.h |  4 ++++
 2 files changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index bb3ddb76911f..c8150674787c 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1858,6 +1858,54 @@ int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
 }
 EXPORT_SYMBOL(vfio_info_add_capability);
 
+int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
+				       int max_irq_type, size_t *data_size)
+{
+	unsigned long minsz;
+	size_t size;
+
+	minsz = offsetofend(struct vfio_irq_set, count);
+
+	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
+	    (hdr->count >= (U32_MAX - hdr->start)) ||
+	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
+				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
+		return -EINVAL;
+
+	if (data_size)
+		*data_size = 0;
+
+	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
+		return -EINVAL;
+
+	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
+	case VFIO_IRQ_SET_DATA_NONE:
+		size = 0;
+		break;
+	case VFIO_IRQ_SET_DATA_BOOL:
+		size = sizeof(uint8_t);
+		break;
+	case VFIO_IRQ_SET_DATA_EVENTFD:
+		size = sizeof(int32_t);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (size) {
+		if (hdr->argsz - minsz < hdr->count * size)
+			return -EINVAL;
+
+		if (!data_size)
+			return -EINVAL;
+
+		*data_size = hdr->count * size;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
+
 /*
  * Pin a set of guest PFNs and return their associated host PFNs for local
  * domain only.
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e26f7ccab564..15ff0421b423 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -129,6 +129,10 @@ extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
 extern int vfio_info_add_capability(struct vfio_info_cap *caps,
 				    int cap_type_id, void *cap_type);
 
+extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
+					      int num_irqs, int max_irq_type,
+					      size_t *data_size);
+
 struct pci_dev;
 #ifdef CONFIG_EEH
 extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
-- 
cgit v1.2.3


From 7c7a6077f5c7a35fc03a7f452875d8440dd1bc8d Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Wed, 16 Nov 2016 22:51:29 +0200
Subject: mei: bus: split RX and async notification callbacks

Split callbacks for RX and async notification events on mei bus to
eliminate synchronization problems and to open way for RX optimizations.

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c     | 141 ++++++++++++++++++++++++++-------------------
 drivers/misc/mei/client.c  |   5 ++
 drivers/nfc/mei_phy.c      |  36 +++++-------
 drivers/watchdog/mei_wdt.c |  36 ++++--------
 include/linux/mei_cl_bus.h |  32 +++++-----
 5 files changed, 128 insertions(+), 122 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 483587f60249..2fd254ecde2f 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -209,31 +209,40 @@ ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
 EXPORT_SYMBOL_GPL(mei_cldev_recv);
 
 /**
- * mei_cl_bus_event_work  - dispatch rx event for a bus device
- *    and schedule new work
+ * mei_cl_bus_rx_work - dispatch rx event for a bus device
  *
  * @work: work
  */
-static void mei_cl_bus_event_work(struct work_struct *work)
+static void mei_cl_bus_rx_work(struct work_struct *work)
 {
 	struct mei_cl_device *cldev;
 	struct mei_device *bus;
 
-	cldev = container_of(work, struct mei_cl_device, event_work);
+	cldev = container_of(work, struct mei_cl_device, rx_work);
 
 	bus = cldev->bus;
 
-	if (cldev->event_cb)
-		cldev->event_cb(cldev, cldev->events);
+	if (cldev->rx_cb)
+		cldev->rx_cb(cldev);
 
-	cldev->events = 0;
+	mutex_lock(&bus->device_lock);
+	mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
+	mutex_unlock(&bus->device_lock);
+}
 
-	/* Prepare for the next read */
-	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
-		mutex_lock(&bus->device_lock);
-		mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
-		mutex_unlock(&bus->device_lock);
-	}
+/**
+ * mei_cl_bus_notif_work - dispatch FW notif event for a bus device
+ *
+ * @work: work
+ */
+static void mei_cl_bus_notif_work(struct work_struct *work)
+{
+	struct mei_cl_device *cldev;
+
+	cldev = container_of(work, struct mei_cl_device, notif_work);
+
+	if (cldev->notif_cb)
+		cldev->notif_cb(cldev);
 }
 
 /**
@@ -248,18 +257,13 @@ bool mei_cl_bus_notify_event(struct mei_cl *cl)
 {
 	struct mei_cl_device *cldev = cl->cldev;
 
-	if (!cldev || !cldev->event_cb)
-		return false;
-
-	if (!(cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF)))
+	if (!cldev || !cldev->notif_cb)
 		return false;
 
 	if (!cl->notify_ev)
 		return false;
 
-	set_bit(MEI_CL_EVENT_NOTIF, &cldev->events);
-
-	schedule_work(&cldev->event_work);
+	schedule_work(&cldev->notif_work);
 
 	cl->notify_ev = false;
 
@@ -267,7 +271,7 @@ bool mei_cl_bus_notify_event(struct mei_cl *cl)
 }
 
 /**
- * mei_cl_bus_rx_event  - schedule rx event
+ * mei_cl_bus_rx_event - schedule rx event
  *
  * @cl: host client
  *
@@ -278,64 +282,81 @@ bool mei_cl_bus_rx_event(struct mei_cl *cl)
 {
 	struct mei_cl_device *cldev = cl->cldev;
 
-	if (!cldev || !cldev->event_cb)
-		return false;
-
-	if (!(cldev->events_mask & BIT(MEI_CL_EVENT_RX)))
+	if (!cldev || !cldev->rx_cb)
 		return false;
 
-	set_bit(MEI_CL_EVENT_RX, &cldev->events);
-
-	schedule_work(&cldev->event_work);
+	schedule_work(&cldev->rx_work);
 
 	return true;
 }
 
 /**
- * mei_cldev_register_event_cb - register event callback
+ * mei_cldev_register_rx_cb - register Rx event callback
  *
  * @cldev: me client devices
- * @event_cb: callback function
- * @events_mask: requested events bitmask
+ * @rx_cb: callback function
  *
  * Return: 0 on success
  *         -EALREADY if an callback is already registered
  *         <0 on other errors
  */
-int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
-				unsigned long events_mask,
-				mei_cldev_event_cb_t event_cb)
+int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb)
 {
 	struct mei_device *bus = cldev->bus;
 	int ret;
 
-	if (cldev->event_cb)
+	if (!rx_cb)
+		return -EINVAL;
+	if (cldev->rx_cb)
 		return -EALREADY;
 
-	cldev->events = 0;
-	cldev->events_mask = events_mask;
-	cldev->event_cb = event_cb;
-	INIT_WORK(&cldev->event_work, mei_cl_bus_event_work);
+	cldev->rx_cb = rx_cb;
+	INIT_WORK(&cldev->rx_work, mei_cl_bus_rx_work);
 
-	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
-		mutex_lock(&bus->device_lock);
-		ret = mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
-		mutex_unlock(&bus->device_lock);
-		if (ret && ret != -EBUSY)
-			return ret;
-	}
+	mutex_lock(&bus->device_lock);
+	ret = mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
+	mutex_unlock(&bus->device_lock);
+	if (ret && ret != -EBUSY)
+		return ret;
 
-	if (cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF)) {
-		mutex_lock(&bus->device_lock);
-		ret = mei_cl_notify_request(cldev->cl, NULL, event_cb ? 1 : 0);
-		mutex_unlock(&bus->device_lock);
-		if (ret)
-			return ret;
-	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_register_rx_cb);
+
+/**
+ * mei_cldev_register_notif_cb - register FW notification event callback
+ *
+ * @cldev: me client devices
+ * @notif_cb: callback function
+ *
+ * Return: 0 on success
+ *         -EALREADY if an callback is already registered
+ *         <0 on other errors
+ */
+int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
+				mei_cldev_cb_t notif_cb)
+{
+	struct mei_device *bus = cldev->bus;
+	int ret;
+
+	if (!notif_cb)
+		return -EINVAL;
+
+	if (cldev->notif_cb)
+		return -EALREADY;
+
+	cldev->notif_cb = notif_cb;
+	INIT_WORK(&cldev->notif_work, mei_cl_bus_notif_work);
+
+	mutex_lock(&bus->device_lock);
+	ret = mei_cl_notify_request(cldev->cl, NULL, 1);
+	mutex_unlock(&bus->device_lock);
+	if (ret)
+		return ret;
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(mei_cldev_register_event_cb);
+EXPORT_SYMBOL_GPL(mei_cldev_register_notif_cb);
 
 /**
  * mei_cldev_get_drvdata - driver data getter
@@ -471,8 +492,6 @@ int mei_cldev_disable(struct mei_cl_device *cldev)
 
 	bus = cldev->bus;
 
-	cldev->event_cb = NULL;
-
 	mutex_lock(&bus->device_lock);
 
 	if (!mei_cl_is_connected(cl)) {
@@ -619,9 +638,13 @@ static int mei_cl_device_remove(struct device *dev)
 	if (!cldev || !dev->driver)
 		return 0;
 
-	if (cldev->event_cb) {
-		cldev->event_cb = NULL;
-		cancel_work_sync(&cldev->event_work);
+	if (cldev->rx_cb) {
+		cancel_work_sync(&cldev->rx_work);
+		cldev->rx_cb = NULL;
+	}
+	if (cldev->notif_cb) {
+		cancel_work_sync(&cldev->notif_work);
+		cldev->notif_cb = NULL;
 	}
 
 	cldrv = to_mei_cl_driver(dev->driver);
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 46ee9155ada6..9635b14b6011 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -673,6 +673,11 @@ int mei_cl_unlink(struct mei_cl *cl)
 	list_del_init(&cl->link);
 
 	cl->state = MEI_FILE_UNINITIALIZED;
+	cl->writing_state = MEI_IDLE;
+
+	WARN_ON(!list_empty(&cl->rd_completed) ||
+		!list_empty(&cl->rd_pending) ||
+		!list_empty(&cl->link));
 
 	return 0;
 }
diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c
index 03139c5a05e4..8a04c5e02999 100644
--- a/drivers/nfc/mei_phy.c
+++ b/drivers/nfc/mei_phy.c
@@ -297,9 +297,11 @@ static int mei_nfc_recv(struct nfc_mei_phy *phy, u8 *buf, size_t length)
 }
 
 
-static void nfc_mei_event_cb(struct mei_cl_device *cldev, u32 events)
+static void nfc_mei_rx_cb(struct mei_cl_device *cldev)
 {
 	struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev);
+	struct sk_buff *skb;
+	int reply_size;
 
 	if (!phy)
 		return;
@@ -307,27 +309,22 @@ static void nfc_mei_event_cb(struct mei_cl_device *cldev, u32 events)
 	if (phy->hard_fault != 0)
 		return;
 
-	if (events & BIT(MEI_CL_EVENT_RX)) {
-		struct sk_buff *skb;
-		int reply_size;
-
-		skb = alloc_skb(MEI_NFC_MAX_READ, GFP_KERNEL);
-		if (!skb)
-			return;
+	skb = alloc_skb(MEI_NFC_MAX_READ, GFP_KERNEL);
+	if (!skb)
+		return;
 
-		reply_size = mei_nfc_recv(phy, skb->data, MEI_NFC_MAX_READ);
-		if (reply_size < MEI_NFC_HEADER_SIZE) {
-			kfree_skb(skb);
-			return;
-		}
+	reply_size = mei_nfc_recv(phy, skb->data, MEI_NFC_MAX_READ);
+	if (reply_size < MEI_NFC_HEADER_SIZE) {
+		kfree_skb(skb);
+		return;
+	}
 
-		skb_put(skb, reply_size);
-		skb_pull(skb, MEI_NFC_HEADER_SIZE);
+	skb_put(skb, reply_size);
+	skb_pull(skb, MEI_NFC_HEADER_SIZE);
 
-		MEI_DUMP_SKB_IN("mei frame read", skb);
+	MEI_DUMP_SKB_IN("mei frame read", skb);
 
-		nfc_hci_recv_frame(phy->hdev, skb);
-	}
+	nfc_hci_recv_frame(phy->hdev, skb);
 }
 
 static int nfc_mei_phy_enable(void *phy_id)
@@ -358,8 +355,7 @@ static int nfc_mei_phy_enable(void *phy_id)
 		goto err;
 	}
 
-	r = mei_cldev_register_event_cb(phy->cldev, BIT(MEI_CL_EVENT_RX),
-					nfc_mei_event_cb);
+	r = mei_cldev_register_rx_cb(phy->cldev, nfc_mei_rx_cb);
 	if (r) {
 		pr_err("Event cb registration failed %d\n", r);
 		goto err;
diff --git a/drivers/watchdog/mei_wdt.c b/drivers/watchdog/mei_wdt.c
index e0af52265511..79b35515904e 100644
--- a/drivers/watchdog/mei_wdt.c
+++ b/drivers/watchdog/mei_wdt.c
@@ -410,11 +410,11 @@ static void mei_wdt_unregister_work(struct work_struct *work)
 }
 
 /**
- * mei_wdt_event_rx - callback for data receive
+ * mei_wdt_rx - callback for data receive
  *
  * @cldev: bus device
  */
-static void mei_wdt_event_rx(struct mei_cl_device *cldev)
+static void mei_wdt_rx(struct mei_cl_device *cldev)
 {
 	struct mei_wdt *wdt = mei_cldev_get_drvdata(cldev);
 	struct mei_wdt_start_response res;
@@ -482,11 +482,11 @@ out:
 }
 
 /*
- * mei_wdt_notify_event - callback for event notification
+ * mei_wdt_notif - callback for event notification
  *
  * @cldev: bus device
  */
-static void mei_wdt_notify_event(struct mei_cl_device *cldev)
+static void mei_wdt_notif(struct mei_cl_device *cldev)
 {
 	struct mei_wdt *wdt = mei_cldev_get_drvdata(cldev);
 
@@ -496,21 +496,6 @@ static void mei_wdt_notify_event(struct mei_cl_device *cldev)
 	mei_wdt_register(wdt);
 }
 
-/**
- * mei_wdt_event - callback for event receive
- *
- * @cldev: bus device
- * @events: event mask
- */
-static void mei_wdt_event(struct mei_cl_device *cldev, u32 events)
-{
-	if (events & BIT(MEI_CL_EVENT_RX))
-		mei_wdt_event_rx(cldev);
-
-	if (events & BIT(MEI_CL_EVENT_NOTIF))
-		mei_wdt_notify_event(cldev);
-}
-
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 
 static ssize_t mei_dbgfs_read_activation(struct file *file, char __user *ubuf,
@@ -621,16 +606,17 @@ static int mei_wdt_probe(struct mei_cl_device *cldev,
 		goto err_out;
 	}
 
-	ret = mei_cldev_register_event_cb(wdt->cldev,
-					  BIT(MEI_CL_EVENT_RX) |
-					  BIT(MEI_CL_EVENT_NOTIF),
-					  mei_wdt_event);
+	ret = mei_cldev_register_rx_cb(wdt->cldev, mei_wdt_rx);
+	if (ret) {
+		dev_err(&cldev->dev, "Could not reg rx event ret=%d\n", ret);
+		goto err_disable;
+	}
 
+	ret = mei_cldev_register_notif_cb(wdt->cldev, mei_wdt_notif);
 	/* on legacy devices notification is not supported
-	 * this doesn't fail the registration for RX event
 	 */
 	if (ret && ret != -EOPNOTSUPP) {
-		dev_err(&cldev->dev, "Could not register event ret=%d\n", ret);
+		dev_err(&cldev->dev, "Could not reg notif event ret=%d\n", ret);
 		goto err_disable;
 	}
 
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 4adb2e7c9f84..017f5232b3de 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -8,8 +8,7 @@
 struct mei_cl_device;
 struct mei_device;
 
-typedef void (*mei_cldev_event_cb_t)(struct mei_cl_device *cldev,
-				     u32 events);
+typedef void (*mei_cldev_cb_t)(struct mei_cl_device *cldev);
 
 /**
  * struct mei_cl_device - MEI device handle
@@ -24,11 +23,12 @@ typedef void (*mei_cldev_event_cb_t)(struct mei_cl_device *cldev,
  * @me_cl: me client
  * @cl: mei client
  * @name: device name
- * @event_work: async work to execute event callback
- * @event_cb: Drivers register this callback to get asynchronous ME
- *	events (e.g. Rx buffer pending) notifications.
- * @events_mask: Events bit mask requested by driver.
- * @events: Events bitmask sent to the driver.
+ * @rx_work: async work to execute Rx event callback
+ * @rx_cb: Drivers register this callback to get asynchronous ME
+ *	Rx buffer pending notifications.
+ * @notif_work: async work to execute FW notif event callback
+ * @notif_cb: Drivers register this callback to get asynchronous ME
+ *	FW notification pending notifications.
  *
  * @do_match: wheather device can be matched with a driver
  * @is_added: device is already scanned
@@ -43,10 +43,10 @@ struct mei_cl_device {
 	struct mei_cl *cl;
 	char name[MEI_CL_NAME_SIZE];
 
-	struct work_struct event_work;
-	mei_cldev_event_cb_t event_cb;
-	unsigned long events_mask;
-	unsigned long events;
+	struct work_struct rx_work;
+	mei_cldev_cb_t rx_cb;
+	struct work_struct notif_work;
+	mei_cldev_cb_t notif_cb;
 
 	unsigned int do_match:1;
 	unsigned int is_added:1;
@@ -88,13 +88,9 @@ void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv);
 ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length);
 ssize_t  mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
 
-int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
-				unsigned long event_mask,
-				mei_cldev_event_cb_t read_cb);
-
-#define MEI_CL_EVENT_RX 0
-#define MEI_CL_EVENT_TX 1
-#define MEI_CL_EVENT_NOTIF 2
+int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb);
+int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
+				mei_cldev_cb_t notif_cb);
 
 const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev);
 u8 mei_cldev_ver(const struct mei_cl_device *cldev);
-- 
cgit v1.2.3


From 06426adf072bca62ac31ea396ff2159a34f276c2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Mon, 14 Nov 2016 13:01:59 -0700
Subject: blk-mq: implement hybrid poll mode for sync O_DIRECT

This patch enables a hybrid polling mode. Instead of polling after IO
submission, we can induce an artificial delay, and then poll after that.
For example, if the IO is presumed to complete in 8 usecs from now, we
can sleep for 4 usecs, wake up, and then do our polling. This still puts
a sleep/wakeup cycle in the IO path, but instead of the wakeup happening
after the IO has completed, it'll happen before. With this hybrid
scheme, we can achieve big latency reductions while still using the same
(or less) amount of CPU.

Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-By: Stephen Bates <sbates@raithlin.com>
Reviewed-By: Stephen Bates <sbates@raithlin.com>
---
 block/blk-mq.c         | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 block/blk-sysfs.c      | 29 +++++++++++++++++++++++++++++
 block/blk.h            |  1 +
 include/linux/blkdev.h |  1 +
 4 files changed, 81 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f39e69c732cc..8cb248fb6a68 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -332,6 +332,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 	rq->rq_flags = 0;
 
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+	clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
 	blk_mq_put_tag(hctx, ctx, tag);
 	blk_queue_exit(q);
 }
@@ -2468,11 +2469,60 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 }
 EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 
+static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
+				     struct request *rq)
+{
+	struct hrtimer_sleeper hs;
+	enum hrtimer_mode mode;
+	ktime_t kt;
+
+	if (!q->poll_nsec || test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
+		return false;
+
+	set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
+
+	/*
+	 * This will be replaced with the stats tracking code, using
+	 * 'avg_completion_time / 2' as the pre-sleep target.
+	 */
+	kt = ktime_set(0, q->poll_nsec);
+
+	mode = HRTIMER_MODE_REL;
+	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
+	hrtimer_set_expires(&hs.timer, kt);
+
+	hrtimer_init_sleeper(&hs, current);
+	do {
+		if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags))
+			break;
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		hrtimer_start_expires(&hs.timer, mode);
+		if (hs.task)
+			io_schedule();
+		hrtimer_cancel(&hs.timer);
+		mode = HRTIMER_MODE_ABS;
+	} while (hs.task && !signal_pending(current));
+
+	__set_current_state(TASK_RUNNING);
+	destroy_hrtimer_on_stack(&hs.timer);
+	return true;
+}
+
 static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
 {
 	struct request_queue *q = hctx->queue;
 	long state;
 
+	/*
+	 * If we sleep, have the caller restart the poll loop to reset
+	 * the state. Like for the other success return cases, the
+	 * caller is responsible for checking if the IO completed. If
+	 * the IO isn't complete, we'll get called again and will go
+	 * straight to the busy poll loop.
+	 */
+	if (blk_mq_poll_hybrid_sleep(q, rq))
+		return true;
+
 	hctx->poll_considered++;
 
 	state = current->state;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 415e764807d0..dcdfcaa12653 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -350,6 +350,28 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 	return ret;
 }
 
+static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->poll_nsec / 1000, page);
+}
+
+static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
+				size_t count)
+{
+	unsigned long poll_usec;
+	ssize_t ret;
+
+	if (!q->mq_ops || !q->mq_ops->poll)
+		return -EINVAL;
+
+	ret = queue_var_store(&poll_usec, page, count);
+	if (ret < 0)
+		return ret;
+
+	q->poll_nsec = poll_usec * 1000;
+	return ret;
+}
+
 static ssize_t queue_poll_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
@@ -602,6 +624,12 @@ static struct queue_sysfs_entry queue_poll_entry = {
 	.store = queue_poll_store,
 };
 
+static struct queue_sysfs_entry queue_poll_delay_entry = {
+	.attr = {.name = "io_poll_delay", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_poll_delay_show,
+	.store = queue_poll_delay_store,
+};
+
 static struct queue_sysfs_entry queue_wc_entry = {
 	.attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_wc_show,
@@ -655,6 +683,7 @@ static struct attribute *default_attrs[] = {
 	&queue_dax_entry.attr,
 	&queue_stats_entry.attr,
 	&queue_wb_lat_entry.attr,
+	&queue_poll_delay_entry.attr,
 	NULL,
 };
 
diff --git a/block/blk.h b/block/blk.h
index aa132dea598c..041185e5f129 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -111,6 +111,7 @@ void blk_account_io_done(struct request *req);
 enum rq_atomic_flags {
 	REQ_ATOM_COMPLETE = 0,
 	REQ_ATOM_STARTED,
+	REQ_ATOM_POLL_SLEPT,
 };
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bab18ee5810d..37ed4ea705c8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -509,6 +509,7 @@ struct request_queue {
 	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
+	unsigned int		poll_nsec;
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
 	struct list_head	timeout_list;
-- 
cgit v1.2.3


From 64f1c21e86f7fe63337b5c23c129de3ec506431d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Mon, 14 Nov 2016 13:03:03 -0700
Subject: blk-mq: make the polling code adaptive

The previous commit introduced the hybrid sleep/poll mode. Take
that one step further, and use the completion latencies to
automatically sleep for half the mean completion time. This is
a good approximation.

This changes the 'io_poll_delay' sysfs file a bit to expose the
various options. Depending on the value, the polling code will
behave differently:

-1	Never enter hybrid sleep mode
 0	Use half of the completion mean for the sleep delay
>0	Use this specific value as the sleep delay

Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-By: Stephen Bates <sbates@raithlin.com>
Reviewed-By: Stephen Bates <sbates@raithlin.com>
---
 block/blk-mq.c         | 67 +++++++++++++++++++++++++++++++++++++++++++++++---
 block/blk-sysfs.c      | 26 ++++++++++++++------
 include/linux/blkdev.h |  2 +-
 3 files changed, 83 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8cb248fb6a68..9d4a1d630d0b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2132,6 +2132,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	 */
 	q->nr_requests = set->queue_depth;
 
+	/*
+	 * Default to classic polling
+	 */
+	q->poll_nsec = -1;
+
 	if (set->ops->complete)
 		blk_queue_softirq_done(q, set->ops->complete);
 
@@ -2469,14 +2474,70 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 }
 EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 
+static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
+				       struct blk_mq_hw_ctx *hctx,
+				       struct request *rq)
+{
+	struct blk_rq_stat stat[2];
+	unsigned long ret = 0;
+
+	/*
+	 * If stats collection isn't on, don't sleep but turn it on for
+	 * future users
+	 */
+	if (!blk_stat_enable(q))
+		return 0;
+
+	/*
+	 * We don't have to do this once per IO, should optimize this
+	 * to just use the current window of stats until it changes
+	 */
+	memset(&stat, 0, sizeof(stat));
+	blk_hctx_stat_get(hctx, stat);
+
+	/*
+	 * As an optimistic guess, use half of the mean service time
+	 * for this type of request. We can (and should) make this smarter.
+	 * For instance, if the completion latencies are tight, we can
+	 * get closer than just half the mean. This is especially
+	 * important on devices where the completion latencies are longer
+	 * than ~10 usec.
+	 */
+	if (req_op(rq) == REQ_OP_READ && stat[BLK_STAT_READ].nr_samples)
+		ret = (stat[BLK_STAT_READ].mean + 1) / 2;
+	else if (req_op(rq) == REQ_OP_WRITE && stat[BLK_STAT_WRITE].nr_samples)
+		ret = (stat[BLK_STAT_WRITE].mean + 1) / 2;
+
+	return ret;
+}
+
 static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
+				     struct blk_mq_hw_ctx *hctx,
 				     struct request *rq)
 {
 	struct hrtimer_sleeper hs;
 	enum hrtimer_mode mode;
+	unsigned int nsecs;
 	ktime_t kt;
 
-	if (!q->poll_nsec || test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
+	if (test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
+		return false;
+
+	/*
+	 * poll_nsec can be:
+	 *
+	 * -1:	don't ever hybrid sleep
+	 *  0:	use half of prev avg
+	 * >0:	use this specific value
+	 */
+	if (q->poll_nsec == -1)
+		return false;
+	else if (q->poll_nsec > 0)
+		nsecs = q->poll_nsec;
+	else
+		nsecs = blk_mq_poll_nsecs(q, hctx, rq);
+
+	if (!nsecs)
 		return false;
 
 	set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
@@ -2485,7 +2546,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 	 * This will be replaced with the stats tracking code, using
 	 * 'avg_completion_time / 2' as the pre-sleep target.
 	 */
-	kt = ktime_set(0, q->poll_nsec);
+	kt = ktime_set(0, nsecs);
 
 	mode = HRTIMER_MODE_REL;
 	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
@@ -2520,7 +2581,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	 * the IO isn't complete, we'll get called again and will go
 	 * straight to the busy poll loop.
 	 */
-	if (blk_mq_poll_hybrid_sleep(q, rq))
+	if (blk_mq_poll_hybrid_sleep(q, hctx, rq))
 		return true;
 
 	hctx->poll_considered++;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index dcdfcaa12653..1855c6770045 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -352,24 +352,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 
 static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->poll_nsec / 1000, page);
+	int val;
+
+	if (q->poll_nsec == -1)
+		val = -1;
+	else
+		val = q->poll_nsec / 1000;
+
+	return sprintf(page, "%d\n", val);
 }
 
 static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
 				size_t count)
 {
-	unsigned long poll_usec;
-	ssize_t ret;
+	int err, val;
 
 	if (!q->mq_ops || !q->mq_ops->poll)
 		return -EINVAL;
 
-	ret = queue_var_store(&poll_usec, page, count);
-	if (ret < 0)
-		return ret;
+	err = kstrtoint(page, 10, &val);
+	if (err < 0)
+		return err;
 
-	q->poll_nsec = poll_usec * 1000;
-	return ret;
+	if (val == -1)
+		q->poll_nsec = -1;
+	else
+		q->poll_nsec = val * 1000;
+
+	return count;
 }
 
 static ssize_t queue_poll_show(struct request_queue *q, char *page)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 37ed4ea705c8..85699bc90a51 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -509,7 +509,7 @@ struct request_queue {
 	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
-	unsigned int		poll_nsec;
+	int			poll_nsec;
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
 	struct list_head	timeout_list;
-- 
cgit v1.2.3


From 7254383341bc6e1a61996accd836009f0c922b21 Mon Sep 17 00:00:00 2001
From: Noa Osherovich <noaos@mellanox.com>
Date: Thu, 17 Nov 2016 16:06:56 -0600
Subject: PCI: Add Mellanox device IDs

Add Mellanox device IDs for use by the mlx4 driver and INTx quirks.

[bhelgaas: sorted and adapted from
http://lkml.kernel.org/r/1478011644-12080-1-git-send-email-noaos@mellanox.com]
Signed-off-by: Noa Osherovich <noaos@mellanox.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci_ids.h | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c58752fe16c4..f020ab4079d3 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2256,12 +2256,29 @@
 #define PCI_DEVICE_ID_ZOLTRIX_2BD0	0x2bd0
 
 #define PCI_VENDOR_ID_MELLANOX		0x15b3
-#define PCI_DEVICE_ID_MELLANOX_TAVOR	0x5a44
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX3	0x1003
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO	0x1007
+#define PCI_DEVICE_ID_MELLANOX_CONNECTIB	0x1011
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX4	0x1013
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX	0x1015
+#define PCI_DEVICE_ID_MELLANOX_TAVOR		0x5a44
 #define PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE	0x5a46
-#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278
-#define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
-#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
-#define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
+#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD	0x5e8c
+#define PCI_DEVICE_ID_MELLANOX_SINAI		0x6274
+#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT	0x6278
+#define PCI_DEVICE_ID_MELLANOX_ARBEL		0x6282
+#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR	0x6340
+#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR	0x634a
+#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR	0x6354
+#define PCI_DEVICE_ID_MELLANOX_HERMON_EN	0x6368
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN	0x6372
+#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2	0x6732
+#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2	0x673c
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
+#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2	0x6750
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2	0x6764
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX2	0x676e
 
 #define PCI_VENDOR_ID_DFI		0x15bd
 
-- 
cgit v1.2.3


From c02b7bf532f7e46f1f9a0e9c3c27ca3f6f134e8d Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@mellanox.com>
Date: Thu, 10 Nov 2016 21:26:23 +0000
Subject: i2c: mux: mellanox: add driver

This driver allows I2C routing controlled through CPLD select registers on
a wide range of Mellanox systems (CPLD Lattice device).
MUX selection is provided by digital and analog HW. Analog part is not
under SW control.
Digital part is under CPLD control (channel selection/de-selection).

Connectivity schema.
.---.             .-------------.
| l |             |             |-- i2cx1 -- i2cx8
| i |-- i2cn --+--| mlxcpld mux |
| n |          |  |             |-- i2cy1 -- i2cy8
| u |          |  '-------------'
| x |          |         |
'---'          '---------'

i2c-mux-mlxpcld does not necessarily require i2c-mlxcpld. It can be used
along with another bus driver, and still control i2c routing through CPLD
mux selection, in case the system is equipped with CPLD capable of mux
selection control.

The Kconfig currently controlling compilation of this code is:
drivers/i2c/muxes/Kconfig:config I2C_MUX_MLXCPLD

Signed-off-by: Michael Shych <michaelsh@mellanox.com>
Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS                         |   7 ++
 drivers/i2c/muxes/Kconfig           |  11 ++
 drivers/i2c/muxes/Makefile          |   1 +
 drivers/i2c/muxes/i2c-mux-mlxcpld.c | 220 ++++++++++++++++++++++++++++++++++++
 include/linux/i2c/mlxcpld.h         |  52 +++++++++
 5 files changed, 291 insertions(+)
 create mode 100644 drivers/i2c/muxes/i2c-mux-mlxcpld.c
 create mode 100644 include/linux/i2c/mlxcpld.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 411e3b87b8c2..22ee29a8f403 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7881,6 +7881,13 @@ W:	http://www.mellanox.com
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/mellanox/mlxsw/
 
+MELLANOX MLXCPLD I2C MUX DRIVER
+M:	Vadim Pasternak <vadimp@mellanox.com>
+M:	Michael Shych <michaelsh@mellanox.com>
+L:	linux-i2c@vger.kernel.org
+S:	Supported
+F:	drivers/i2c/muxes/i2c-mux-mlxcpld.c
+
 MELLANOX MLXCPLD LED DRIVER
 M:	Vadim Pasternak <vadimp@mellanox.com>
 L:	linux-leds@vger.kernel.org
diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index e280c8ecc0b5..d03340203615 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -81,4 +81,15 @@ config I2C_DEMUX_PINCTRL
 	  demultiplexer that uses the pinctrl subsystem. This is useful if you
 	  want to change the I2C master at run-time depending on features.
 
+config I2C_MUX_MLXCPLD
+        tristate "Mellanox CPLD based I2C multiplexer"
+        help
+          If you say yes to this option, support will be included for a
+          CPLD based I2C multiplexer. This driver provides access to
+          I2C busses connected through a MUX, which is controlled
+          by a CPLD register.
+
+          This driver can also be built as a module.  If so, the module
+          will be called i2c-mux-mlxcpld.
+
 endmenu
diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile
index 7c267c29b191..9948fa45037f 100644
--- a/drivers/i2c/muxes/Makefile
+++ b/drivers/i2c/muxes/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_I2C_ARB_GPIO_CHALLENGE)	+= i2c-arb-gpio-challenge.o
 obj-$(CONFIG_I2C_DEMUX_PINCTRL)		+= i2c-demux-pinctrl.o
 
 obj-$(CONFIG_I2C_MUX_GPIO)	+= i2c-mux-gpio.o
+obj-$(CONFIG_I2C_MUX_MLXCPLD)	+= i2c-mux-mlxcpld.o
 obj-$(CONFIG_I2C_MUX_PCA9541)	+= i2c-mux-pca9541.o
 obj-$(CONFIG_I2C_MUX_PCA954x)	+= i2c-mux-pca954x.o
 obj-$(CONFIG_I2C_MUX_PINCTRL)	+= i2c-mux-pinctrl.o
diff --git a/drivers/i2c/muxes/i2c-mux-mlxcpld.c b/drivers/i2c/muxes/i2c-mux-mlxcpld.c
new file mode 100644
index 000000000000..3ab654bbfab5
--- /dev/null
+++ b/drivers/i2c/muxes/i2c-mux-mlxcpld.c
@@ -0,0 +1,220 @@
+/*
+ * drivers/i2c/muxes/i2c-mux-mlxcpld.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Michael Shych <michaels@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/i2c-mux.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/i2c/mlxcpld.h>
+
+#define CPLD_MUX_MAX_NCHANS	8
+
+/* mlxcpld_mux - mux control structure:
+ * @last_chan - last register value
+ * @client - I2C device client
+ */
+struct mlxcpld_mux {
+	u8 last_chan;
+	struct i2c_client *client;
+};
+
+/* MUX logic description.
+ * Driver can support different mux control logic, according to CPLD
+ * implementation.
+ *
+ * Connectivity schema.
+ *
+ * i2c-mlxcpld                                 Digital               Analog
+ * driver
+ * *--------*                                 * -> mux1 (virt bus2) -> mux -> |
+ * | I2CLPC | i2c physical                    * -> mux2 (virt bus3) -> mux -> |
+ * | bridge | bus 1                 *---------*                               |
+ * | logic  |---------------------> * mux reg *                               |
+ * | in CPLD|                       *---------*                               |
+ * *--------*   i2c-mux-mlxpcld          ^    * -> muxn (virt busn) -> mux -> |
+ *     |        driver                   |                                    |
+ *     |        *---------------*        |                              Devices
+ *     |        * CPLD (i2c bus)* select |
+ *     |        * registers for *--------*
+ *     |        * mux selection * deselect
+ *     |        *---------------*
+ *     |                 |
+ * <-------->     <----------->
+ * i2c cntrl      Board cntrl reg
+ * reg space      space (mux select,
+ *                IO, LED, WD, info)
+ *
+ */
+
+static const struct i2c_device_id mlxcpld_mux_id[] = {
+	{ "mlxcpld_mux_module", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, mlxcpld_mux_id);
+
+/* Write to mux register. Don't use i2c_transfer() and i2c_smbus_xfer()
+ * for this as they will try to lock adapter a second time.
+ */
+static int mlxcpld_mux_reg_write(struct i2c_adapter *adap,
+				 struct i2c_client *client, u8 val)
+{
+	struct mlxcpld_mux_plat_data *pdata = dev_get_platdata(&client->dev);
+
+	if (adap->algo->master_xfer) {
+		struct i2c_msg msg;
+		u8 msgbuf[] = {pdata->sel_reg_addr, val};
+
+		msg.addr = client->addr;
+		msg.flags = 0;
+		msg.len = 2;
+		msg.buf = msgbuf;
+		return __i2c_transfer(adap, &msg, 1);
+	} else if (adap->algo->smbus_xfer) {
+		union i2c_smbus_data data;
+
+		data.byte = val;
+		return adap->algo->smbus_xfer(adap, client->addr,
+					      client->flags, I2C_SMBUS_WRITE,
+					      pdata->sel_reg_addr,
+					      I2C_SMBUS_BYTE_DATA, &data);
+	} else
+		return -ENODEV;
+}
+
+static int mlxcpld_mux_select_chan(struct i2c_mux_core *muxc, u32 chan)
+{
+	struct mlxcpld_mux *data = i2c_mux_priv(muxc);
+	struct i2c_client *client = data->client;
+	u8 regval = chan + 1;
+	int err = 0;
+
+	/* Only select the channel if its different from the last channel */
+	if (data->last_chan != regval) {
+		err = mlxcpld_mux_reg_write(muxc->parent, client, regval);
+		if (err)
+			data->last_chan = 0;
+		else
+			data->last_chan = regval;
+	}
+
+	return err;
+}
+
+static int mlxcpld_mux_deselect(struct i2c_mux_core *muxc, u32 chan)
+{
+	struct mlxcpld_mux *data = i2c_mux_priv(muxc);
+	struct i2c_client *client = data->client;
+
+	/* Deselect active channel */
+	data->last_chan = 0;
+
+	return mlxcpld_mux_reg_write(muxc->parent, client, data->last_chan);
+}
+
+/* Probe/reomove functions */
+static int mlxcpld_mux_probe(struct i2c_client *client,
+			     const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adap = to_i2c_adapter(client->dev.parent);
+	struct mlxcpld_mux_plat_data *pdata = dev_get_platdata(&client->dev);
+	struct i2c_mux_core *muxc;
+	int num, force;
+	struct mlxcpld_mux *data;
+	int err;
+
+	if (!pdata)
+		return -EINVAL;
+
+	if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_WRITE_BYTE_DATA))
+		return -ENODEV;
+
+	muxc = i2c_mux_alloc(adap, &client->dev, CPLD_MUX_MAX_NCHANS,
+			     sizeof(*data), 0, mlxcpld_mux_select_chan,
+			     mlxcpld_mux_deselect);
+	if (!muxc)
+		return -ENOMEM;
+
+	data = i2c_mux_priv(muxc);
+	i2c_set_clientdata(client, muxc);
+	data->client = client;
+	data->last_chan = 0; /* force the first selection */
+
+	/* Create an adapter for each channel. */
+	for (num = 0; num < CPLD_MUX_MAX_NCHANS; num++) {
+		if (num >= pdata->num_adaps)
+			/* discard unconfigured channels */
+			break;
+
+		force = pdata->adap_ids[num];
+
+		err = i2c_mux_add_adapter(muxc, force, num, 0);
+		if (err)
+			goto virt_reg_failed;
+	}
+
+	return 0;
+
+virt_reg_failed:
+	i2c_mux_del_adapters(muxc);
+	return err;
+}
+
+static int mlxcpld_mux_remove(struct i2c_client *client)
+{
+	struct i2c_mux_core *muxc = i2c_get_clientdata(client);
+
+	i2c_mux_del_adapters(muxc);
+	return 0;
+}
+
+static struct i2c_driver mlxcpld_mux_driver = {
+	.driver		= {
+		.name	= "mlxcpld-mux",
+	},
+	.probe		= mlxcpld_mux_probe,
+	.remove		= mlxcpld_mux_remove,
+	.id_table	= mlxcpld_mux_id,
+};
+
+module_i2c_driver(mlxcpld_mux_driver);
+
+MODULE_AUTHOR("Michael Shych (michaels@mellanox.com)");
+MODULE_DESCRIPTION("Mellanox I2C-CPLD-MUX driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("platform:i2c-mux-mlxcpld");
diff --git a/include/linux/i2c/mlxcpld.h b/include/linux/i2c/mlxcpld.h
new file mode 100644
index 000000000000..b08dcb183fca
--- /dev/null
+++ b/include/linux/i2c/mlxcpld.h
@@ -0,0 +1,52 @@
+/*
+ * mlxcpld.h - Mellanox I2C multiplexer support in CPLD
+ *
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Michael Shych <michaels@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_I2C_MLXCPLD_H
+#define _LINUX_I2C_MLXCPLD_H
+
+/* Platform data for the CPLD I2C multiplexers */
+
+/* mlxcpld_mux_plat_data - per mux data, used with i2c_register_board_info
+ * @adap_ids - adapter array
+ * @num_adaps - number of adapters
+ * @sel_reg_addr - mux select register offset in CPLD space
+ */
+struct mlxcpld_mux_plat_data {
+	int *adap_ids;
+	int num_adaps;
+	int sel_reg_addr;
+};
+
+#endif /* _LINUX_I2C_MLXCPLD_H */
-- 
cgit v1.2.3


From 437eb7bf7b28472f8b7689e166dc1dd691367121 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 28 Oct 2016 10:52:06 +0200
Subject: ACPI / hotplug / PCI: Make device_is_managed_by_native_pciehp()
 public

We're about to add runtime PM of hotplug ports, but we need to restrict it
to ports that are handled natively by the OS:  If they're handled by the
firmware (which is the case for Thunderbolt on non-Macs), things would
break if the OS put the ports into D3hot behind the firmware's back.

To determine if a hotplug port is handled natively, one has to walk up from
the port to the root bridge and check the cached _OSC Control Field for the
value of the "PCI Express Native Hot Plug control" bit.  There's already a
function to do that, device_is_managed_by_native_pciehp(), but it's private
to drivers/pci/hotplug/acpiphp_glue.c and only compiled in if
CONFIG_HOTPLUG_PCI_ACPI is enabled.

Make it public and move it to drivers/pci/pci-acpi.c, so that it is
available in the more general CONFIG_ACPI case.

The function contains a check if the device in question is a hotplug port
and returns false if it's not.  The caller we're going to add doesn't need
this as it only calls the function if it actually *is* a hotplug port.
Move the check out of the function into the single existing caller.

Rename it to pciehp_is_native() and add some kerneldoc and polish.

No functional change intended.

Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 28 +---------------------------
 drivers/pci/pci-acpi.c             | 24 ++++++++++++++++++++++++
 include/linux/pci_hotplug.h        |  2 ++
 3 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index b286a56e84b3..5ed2dcaa8e27 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -222,32 +222,6 @@ static void acpiphp_post_dock_fixup(struct acpi_device *adev)
 	acpiphp_let_context_go(context);
 }
 
-/* Check whether the PCI device is managed by native PCIe hotplug driver */
-static bool device_is_managed_by_native_pciehp(struct pci_dev *pdev)
-{
-	acpi_handle tmp;
-	struct acpi_pci_root *root;
-
-	/* Check whether the PCIe port supports native PCIe hotplug */
-	if (!pdev->is_hotplug_bridge)
-		return false;
-
-	/*
-	 * Check whether native PCIe hotplug has been enabled for
-	 * this PCIe hierarchy.
-	 */
-	tmp = acpi_find_root_bridge_handle(pdev);
-	if (!tmp)
-		return false;
-	root = acpi_pci_find_root(tmp);
-	if (!root)
-		return false;
-	if (!(root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL))
-		return false;
-
-	return true;
-}
-
 /**
  * acpiphp_add_context - Add ACPIPHP context to an ACPI device object.
  * @handle: ACPI handle of the object to add a context to.
@@ -331,7 +305,7 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data,
 	 * expose slots to user space in those cases.
 	 */
 	if ((acpi_pci_check_ejectable(pbus, handle) || is_dock_device(adev))
-	    && !(pdev && device_is_managed_by_native_pciehp(pdev))) {
+	    && !(pdev && pdev->is_hotplug_bridge && pciehp_is_native(pdev))) {
 		unsigned long long sun;
 		int retval;
 
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index d966d47c9e80..ed3daa8e7658 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -293,6 +293,30 @@ int pci_get_hp_params(struct pci_dev *dev, struct hotplug_params *hpp)
 }
 EXPORT_SYMBOL_GPL(pci_get_hp_params);
 
+/**
+ * pciehp_is_native - Check whether a hotplug port is handled by the OS
+ * @pdev: Hotplug port to check
+ *
+ * Walk up from @pdev to the host bridge, obtain its cached _OSC Control Field
+ * and return the value of the "PCI Express Native Hot Plug control" bit.
+ * On failure to obtain the _OSC Control Field return %false.
+ */
+bool pciehp_is_native(struct pci_dev *pdev)
+{
+	struct acpi_pci_root *root;
+	acpi_handle handle;
+
+	handle = acpi_find_root_bridge_handle(pdev);
+	if (!handle)
+		return false;
+
+	root = acpi_pci_find_root(handle);
+	if (!root)
+		return false;
+
+	return root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL;
+}
+
 /**
  * pci_acpi_wake_bus - Root bus wakeup notification fork function.
  * @work: Work item to handle.
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 8c7895061121..2e855afa0212 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -176,6 +176,7 @@ struct hotplug_params {
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
 int pci_get_hp_params(struct pci_dev *dev, struct hotplug_params *hpp);
+bool pciehp_is_native(struct pci_dev *pdev);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(acpi_handle handle);
@@ -185,5 +186,6 @@ static inline int pci_get_hp_params(struct pci_dev *dev,
 {
 	return -ENODEV;
 }
+static inline bool pciehp_is_native(struct pci_dev *pdev) { return true; }
 #endif
 #endif
-- 
cgit v1.2.3


From bf0f2d380f15f1fa05254b000ddeeb560dfb8638 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Thu, 17 Nov 2016 10:31:18 +0100
Subject: block: add reference counting for struct bsg_job

Add reference counting to 'struct bsg_job' so we can implement a reuqest
timeout handler for bsg_jobs, which is needed for Fibre Channel.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/bsg-lib.c         | 7 +++++--
 include/linux/bsg-lib.h | 2 ++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 650f427d915b..632fb4006c40 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -32,8 +32,10 @@
  * bsg_destroy_job - routine to teardown/delete a bsg job
  * @job: bsg_job that is to be torn down
  */
-static void bsg_destroy_job(struct bsg_job *job)
+static void bsg_destroy_job(struct kref *kref)
 {
+	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
+
 	put_device(job->dev);	/* release reference for the request */
 
 	kfree(job->request_payload.sg_list);
@@ -84,7 +86,7 @@ static void bsg_softirq_done(struct request *rq)
 	struct bsg_job *job = rq->special;
 
 	blk_end_request_all(rq, rq->errors);
-	bsg_destroy_job(job);
+	kref_put(&job->kref, bsg_destroy_job);
 }
 
 static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
@@ -142,6 +144,7 @@ static int bsg_create_job(struct device *dev, struct request *req)
 	job->dev = dev;
 	/* take a reference for the request */
 	get_device(job->dev);
+	kref_init(&job->kref);
 	return 0;
 
 failjob_rls_rqst_payload:
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index a226652a5a6c..58e0717fda6e 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -40,6 +40,8 @@ struct bsg_job {
 	struct device *dev;
 	struct request *req;
 
+	struct kref kref;
+
 	/* Transport/driver specific request/reply structs */
 	void *request;
 	void *reply;
-- 
cgit v1.2.3


From c00da4c90ffd066cdfe7f53ff3529c8ab4a35db0 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Thu, 17 Nov 2016 10:31:20 +0100
Subject: scsi: fc: Use bsg_destroy_job

fc_destroy_bsgjob() and bsg_destroy_job() are now 1:1 copies, so use the
latter. As bsg_destroy_job() comes from bsg-lib we need to select it in
Kconfig once CONFOG_SCSI_FC_ATTRS is active.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/bsg-lib.c                  |  7 +++++--
 drivers/scsi/Kconfig             |  1 +
 drivers/scsi/scsi_transport_fc.c | 25 +++----------------------
 include/linux/bsg-lib.h          |  1 +
 4 files changed, 10 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 632fb4006c40..9f1e8fde924a 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -32,9 +32,12 @@
  * bsg_destroy_job - routine to teardown/delete a bsg job
  * @job: bsg_job that is to be torn down
  */
-static void bsg_destroy_job(struct kref *kref)
+void bsg_destroy_job(struct kref *kref)
 {
 	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
+	struct request *rq = job->req;
+
+	blk_end_request_all(rq, rq->errors);
 
 	put_device(job->dev);	/* release reference for the request */
 
@@ -42,6 +45,7 @@ static void bsg_destroy_job(struct kref *kref)
 	kfree(job->reply_payload.sg_list);
 	kfree(job);
 }
+EXPORT_SYMBOL_GPL(bsg_destroy_job);
 
 /**
  * bsg_job_done - completion routine for bsg requests
@@ -85,7 +89,6 @@ static void bsg_softirq_done(struct request *rq)
 {
 	struct bsg_job *job = rq->special;
 
-	blk_end_request_all(rq, rq->errors);
 	kref_put(&job->kref, bsg_destroy_job);
 }
 
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 3b416c9efe5e..dfa93347c752 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -263,6 +263,7 @@ config SCSI_SPI_ATTRS
 config SCSI_FC_ATTRS
 	tristate "FiberChannel Transport Attributes"
 	depends on SCSI && NET
+	select BLK_DEV_BSGLIB
 	select SCSI_NETLINK
 	help
 	  If you wish to export transport-specific information about
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index bb705e02374c..616c7e1cbab2 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/kernel.h>
+#include <linux/bsg-lib.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport.h>
@@ -3554,26 +3555,6 @@ fc_vport_sched_delete(struct work_struct *work)
  * BSG support
  */
 
-
-/**
- * fc_destroy_bsgjob - routine to teardown/delete a fc bsg job
- * @job:	fc_bsg_job that is to be torn down
- */
-static void
-fc_destroy_bsgjob(struct kref *kref)
-{
-	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
-	struct request *rq = job->req;
-
-	blk_end_request_all(rq, rq->errors);
-
-	put_device(job->dev);	/* release reference for the request */
-
-	kfree(job->request_payload.sg_list);
-	kfree(job->reply_payload.sg_list);
-	kfree(job);
-}
-
 /**
  * fc_bsg_jobdone - completion routine for bsg requests that the LLD has
  *                  completed
@@ -3617,7 +3598,7 @@ static void fc_bsg_softirq_done(struct request *rq)
 {
 	struct bsg_job *job = rq->special;
 
-	kref_put(&job->kref, fc_destroy_bsgjob);
+	kref_put(&job->kref, bsg_destroy_job);
 }
 
 /**
@@ -3642,7 +3623,7 @@ fc_bsg_job_timeout(struct request *req)
 		/* call LLDD to abort the i/o as it has timed out */
 		err = i->f->bsg_timeout(job);
 		if (err == -EAGAIN) {
-			kref_put(&job->kref, fc_destroy_bsgjob);
+			kref_put(&job->kref, bsg_destroy_job);
 			return BLK_EH_RESET_TIMER;
 		} else if (err)
 			printk(KERN_ERR "ERROR: FC BSG request timeout - LLD "
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 58e0717fda6e..67f7de6146c9 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -69,5 +69,6 @@ void bsg_job_done(struct bsg_job *job, int result,
 int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
 		    bsg_job_fn *job_fn, int dd_job_size);
 void bsg_request_fn(struct request_queue *q);
+void bsg_destroy_job(struct kref *kref);
 
 #endif
-- 
cgit v1.2.3


From 6aa858cd335a94e2824ed542140ac9704c0a64e2 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Thu, 17 Nov 2016 10:31:21 +0100
Subject: scsi: fc: use bsg_softirq_done

bsg_softirq_done() and fc_bsg_softirq_done() are copies of each other, so
ditch the fc specific one.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/bsg-lib.c                  |  3 ++-
 drivers/scsi/scsi_transport_fc.c | 15 ++-------------
 include/linux/bsg-lib.h          |  1 +
 3 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 9f1e8fde924a..6661f823db4c 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -85,12 +85,13 @@ EXPORT_SYMBOL_GPL(bsg_job_done);
  * bsg_softirq_done - softirq done routine for destroying the bsg requests
  * @rq: BSG request that holds the job to be destroyed
  */
-static void bsg_softirq_done(struct request *rq)
+void bsg_softirq_done(struct request *rq)
 {
 	struct bsg_job *job = rq->special;
 
 	kref_put(&job->kref, bsg_destroy_job);
 }
+EXPORT_SYMBOL_GPL(bsg_softirq_done);
 
 static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
 {
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 616c7e1cbab2..1d959ae7d0c8 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3590,17 +3590,6 @@ void fc_bsg_jobdone(struct bsg_job *job, int result,
 }
 EXPORT_SYMBOL_GPL(fc_bsg_jobdone);
 
-/**
- * fc_bsg_softirq_done - softirq done routine for destroying the bsg requests
- * @rq:        BSG request that holds the job to be destroyed
- */
-static void fc_bsg_softirq_done(struct request *rq)
-{
-	struct bsg_job *job = rq->special;
-
-	kref_put(&job->kref, bsg_destroy_job);
-}
-
 /**
  * fc_bsg_job_timeout - handler for when a bsg request timesout
  * @req:	request that timed out
@@ -4033,7 +4022,7 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 
 	q->queuedata = shost;
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
-	blk_queue_softirq_done(q, fc_bsg_softirq_done);
+	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
 	blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);
 
@@ -4079,7 +4068,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
 
 	q->queuedata = rport;
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
-	blk_queue_softirq_done(q, fc_bsg_softirq_done);
+	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 67f7de6146c9..09f304437cd6 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -70,5 +70,6 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
 		    bsg_job_fn *job_fn, int dd_job_size);
 void bsg_request_fn(struct request_queue *q);
 void bsg_destroy_job(struct kref *kref);
+void bsg_softirq_done(struct request *rq);
 
 #endif
-- 
cgit v1.2.3


From fb6f7c8d8a19e5543d5b4d44c58e2c4e5a82bb12 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Thu, 17 Nov 2016 10:31:23 +0100
Subject: block: add bsg_job_put() and bsg_job_get()

Add bsg_job_put() and bsg_job_get() so don't need to export
bsg_destroy_job() any more.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/bsg-lib.c                  | 17 ++++++++++++++---
 drivers/scsi/scsi_transport_fc.c |  4 ++--
 include/linux/bsg-lib.h          |  3 ++-
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 6661f823db4c..803ec40ebd6d 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -32,7 +32,7 @@
  * bsg_destroy_job - routine to teardown/delete a bsg job
  * @job: bsg_job that is to be torn down
  */
-void bsg_destroy_job(struct kref *kref)
+static void bsg_destroy_job(struct kref *kref)
 {
 	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
 	struct request *rq = job->req;
@@ -45,7 +45,18 @@ void bsg_destroy_job(struct kref *kref)
 	kfree(job->reply_payload.sg_list);
 	kfree(job);
 }
-EXPORT_SYMBOL_GPL(bsg_destroy_job);
+
+void bsg_job_put(struct bsg_job *job)
+{
+	kref_put(&job->kref, bsg_destroy_job);
+}
+EXPORT_SYMBOL_GPL(bsg_job_put);
+
+int bsg_job_get(struct bsg_job *job)
+{
+	return kref_get_unless_zero(&job->kref);
+}
+EXPORT_SYMBOL_GPL(bsg_job_get);
 
 /**
  * bsg_job_done - completion routine for bsg requests
@@ -89,7 +100,7 @@ void bsg_softirq_done(struct request *rq)
 {
 	struct bsg_job *job = rq->special;
 
-	kref_put(&job->kref, bsg_destroy_job);
+	bsg_job_put(job);
 }
 EXPORT_SYMBOL_GPL(bsg_softirq_done);
 
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index ee1e812bad4c..23e1eed932b5 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3571,13 +3571,13 @@ fc_bsg_job_timeout(struct request *req)
 	if (rport && rport->port_state == FC_PORTSTATE_BLOCKED)
 		return BLK_EH_RESET_TIMER;
 
-	inflight = kref_get_unless_zero(&job->kref);
+	inflight = bsg_job_get(job);
 
 	if (inflight && i->f->bsg_timeout) {
 		/* call LLDD to abort the i/o as it has timed out */
 		err = i->f->bsg_timeout(job);
 		if (err == -EAGAIN) {
-			kref_put(&job->kref, bsg_destroy_job);
+			bsg_job_put(job);
 			return BLK_EH_RESET_TIMER;
 		} else if (err)
 			printk(KERN_ERR "ERROR: FC BSG request timeout - LLD "
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 09f304437cd6..b708db91618f 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -69,7 +69,8 @@ void bsg_job_done(struct bsg_job *job, int result,
 int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
 		    bsg_job_fn *job_fn, int dd_job_size);
 void bsg_request_fn(struct request_queue *q);
-void bsg_destroy_job(struct kref *kref);
 void bsg_softirq_done(struct request *rq);
+void bsg_job_put(struct bsg_job *job);
+int __must_check bsg_job_get(struct bsg_job *job);
 
 #endif
-- 
cgit v1.2.3


From a0f4bd7f2a5be485747aa438cea38f69e3ae8962 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Thu, 17 Nov 2016 10:31:24 +0100
Subject: scsi: fc: move FC transport's bsg code to bsg-lib

Now that all conversions are done, move the FibreChannel bsg code over
to the bsg library.

This patch is derived from work done by Mike Christie in 2011 [1] but
only the iscsi parts got merged back then.

[1] http://marc.info/?l=linux-scsi&m=131149780921009&w=2

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/bsg-lib.c                  |   3 +-
 drivers/scsi/scsi_transport_fc.c | 285 ++++++---------------------------------
 include/linux/bsg-lib.h          |   1 -
 3 files changed, 44 insertions(+), 245 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 803ec40ebd6d..2d1df5cc0507 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -96,13 +96,12 @@ EXPORT_SYMBOL_GPL(bsg_job_done);
  * bsg_softirq_done - softirq done routine for destroying the bsg requests
  * @rq: BSG request that holds the job to be destroyed
  */
-void bsg_softirq_done(struct request *rq)
+static void bsg_softirq_done(struct request *rq)
 {
 	struct bsg_job *job = rq->special;
 
 	bsg_job_put(job);
 }
-EXPORT_SYMBOL_GPL(bsg_softirq_done);
 
 static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
 {
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 23e1eed932b5..03577bde6ac5 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3591,109 +3591,12 @@ fc_bsg_job_timeout(struct request *req)
 		return BLK_EH_HANDLED;
 }
 
-static int
-fc_bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
-{
-	size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
-
-	BUG_ON(!req->nr_phys_segments);
-
-	buf->sg_list = kzalloc(sz, GFP_KERNEL);
-	if (!buf->sg_list)
-		return -ENOMEM;
-	sg_init_table(buf->sg_list, req->nr_phys_segments);
-	buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
-	buf->payload_len = blk_rq_bytes(req);
-	return 0;
-}
-
-
-/**
- * fc_req_to_bsgjob - Allocate/create the fc_bsg_job structure for the
- *                   bsg request
- * @shost:	SCSI Host corresponding to the bsg object
- * @rport:	(optional) FC Remote Port corresponding to the bsg object
- * @req:	BSG request that needs a job structure
- */
-static int
-fc_req_to_bsgjob(struct Scsi_Host *shost, struct fc_rport *rport,
-	struct request *req)
-{
-	struct fc_internal *i = to_fc_internal(shost->transportt);
-	struct request *rsp = req->next_rq;
-	struct bsg_job *job;
-	int ret;
-
-	BUG_ON(req->special);
-
-	job = kzalloc(sizeof(struct bsg_job) + i->f->dd_bsg_size,
-			GFP_KERNEL);
-	if (!job)
-		return -ENOMEM;
-
-	/*
-	 * Note: this is a bit silly.
-	 * The request gets formatted as a SGIO v4 ioctl request, which
-	 * then gets reformatted as a blk request, which then gets
-	 * reformatted as a fc bsg request. And on completion, we have
-	 * to wrap return results such that SGIO v4 thinks it was a scsi
-	 * status.  I hope this was all worth it.
-	 */
-
-	req->special = job;
-	job->req = req;
-	if (i->f->dd_bsg_size)
-		job->dd_data = (void *)&job[1];
-	job->request = (struct fc_bsg_request *)req->cmd;
-	job->request_len = req->cmd_len;
-	job->reply = req->sense;
-	job->reply_len = SCSI_SENSE_BUFFERSIZE;	/* Size of sense buffer
-						 * allocated */
-	if (req->bio) {
-		ret = fc_bsg_map_buffer(&job->request_payload, req);
-		if (ret)
-			goto failjob_rls_job;
-	}
-	if (rsp && rsp->bio) {
-		ret = fc_bsg_map_buffer(&job->reply_payload, rsp);
-		if (ret)
-			goto failjob_rls_rqst_payload;
-	}
-	if (rport)
-		job->dev = &rport->dev;
-	else
-		job->dev = &shost->shost_gendev;
-	get_device(job->dev);		/* take a reference for the request */
-
-	kref_init(&job->kref);
-
-	return 0;
-
-
-failjob_rls_rqst_payload:
-	kfree(job->request_payload.sg_list);
-failjob_rls_job:
-	kfree(job);
-	return -ENOMEM;
-}
-
-
-enum fc_dispatch_result {
-	FC_DISPATCH_BREAK,	/* on return, q is locked, break from q loop */
-	FC_DISPATCH_LOCKED,	/* on return, q is locked, continue on */
-	FC_DISPATCH_UNLOCKED,	/* on return, q is unlocked, continue on */
-};
-
-
 /**
  * fc_bsg_host_dispatch - process fc host bsg requests and dispatch to LLDD
- * @q:		fc host request queue
  * @shost:	scsi host rport attached to
  * @job:	bsg job to be processed
  */
-static enum fc_dispatch_result
-fc_bsg_host_dispatch(struct request_queue *q, struct Scsi_Host *shost,
-			 struct bsg_job *job)
+static int fc_bsg_host_dispatch(struct Scsi_Host *shost, struct bsg_job *job)
 {
 	struct fc_internal *i = to_fc_internal(shost->transportt);
 	struct fc_bsg_request *bsg_request = job->request;
@@ -3754,7 +3657,7 @@ fc_bsg_host_dispatch(struct request_queue *q, struct Scsi_Host *shost,
 
 	ret = i->f->bsg_request(job);
 	if (!ret)
-		return FC_DISPATCH_UNLOCKED;
+		return 0;
 
 fail_host_msg:
 	/* return the errno failure code as the only status */
@@ -3764,7 +3667,7 @@ fail_host_msg:
 	job->reply_len = sizeof(uint32_t);
 	bsg_job_done(job, bsg_reply->result,
 		       bsg_reply->reply_payload_rcv_len);
-	return FC_DISPATCH_UNLOCKED;
+	return 0;
 }
 
 
@@ -3788,14 +3691,10 @@ fc_bsg_goose_queue(struct fc_rport *rport)
 
 /**
  * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD
- * @q:		rport request queue
  * @shost:	scsi host rport attached to
- * @rport:	rport request destined to
  * @job:	bsg job to be processed
  */
-static enum fc_dispatch_result
-fc_bsg_rport_dispatch(struct request_queue *q, struct Scsi_Host *shost,
-			 struct fc_rport *rport, struct bsg_job *job)
+static int fc_bsg_rport_dispatch(struct Scsi_Host *shost, struct bsg_job *job)
 {
 	struct fc_internal *i = to_fc_internal(shost->transportt);
 	struct fc_bsg_request *bsg_request = job->request;
@@ -3832,7 +3731,7 @@ check_bidi:
 
 	ret = i->f->bsg_request(job);
 	if (!ret)
-		return FC_DISPATCH_UNLOCKED;
+		return 0;
 
 fail_rport_msg:
 	/* return the errno failure code as the only status */
@@ -3842,119 +3741,19 @@ fail_rport_msg:
 	job->reply_len = sizeof(uint32_t);
 	bsg_job_done(job, bsg_reply->result,
 		       bsg_reply->reply_payload_rcv_len);
-	return FC_DISPATCH_UNLOCKED;
-}
-
-
-/**
- * fc_bsg_request_handler - generic handler for bsg requests
- * @q:		request queue to manage
- * @shost:	Scsi_Host related to the bsg object
- * @rport:	FC remote port related to the bsg object (optional)
- * @dev:	device structure for bsg object
- */
-static void
-fc_bsg_request_handler(struct request_queue *q, struct Scsi_Host *shost,
-		       struct fc_rport *rport, struct device *dev)
-{
-	struct request *req;
-	struct bsg_job *job;
-	enum fc_dispatch_result ret;
-	struct fc_bsg_reply *bsg_reply;
-
-	if (!get_device(dev))
-		return;
-
-	while (1) {
-		if (rport && (rport->port_state == FC_PORTSTATE_BLOCKED) &&
-		    !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT))
-			break;
-
-		req = blk_fetch_request(q);
-		if (!req)
-			break;
-
-		if (rport && (rport->port_state != FC_PORTSTATE_ONLINE)) {
-			req->errors = -ENXIO;
-			spin_unlock_irq(q->queue_lock);
-			blk_end_request_all(req, -ENXIO);
-			spin_lock_irq(q->queue_lock);
-			continue;
-		}
-
-		spin_unlock_irq(q->queue_lock);
-
-		ret = fc_req_to_bsgjob(shost, rport, req);
-		if (ret) {
-			req->errors = ret;
-			blk_end_request_all(req, ret);
-			spin_lock_irq(q->queue_lock);
-			continue;
-		}
-
-		job = req->special;
-
-		/* check if we have the msgcode value at least */
-		if (job->request_len < sizeof(uint32_t)) {
-			BUG_ON(job->reply_len < sizeof(uint32_t));
-			bsg_reply = job->reply;
-			bsg_reply->reply_payload_rcv_len = 0;
-			bsg_reply->result = -ENOMSG;
-			job->reply_len = sizeof(uint32_t);
-			bsg_job_done(job, bsg_reply->result,
-				       bsg_reply->reply_payload_rcv_len);
-			spin_lock_irq(q->queue_lock);
-			continue;
-		}
-
-		/* the dispatch routines will unlock the queue_lock */
-		if (rport)
-			ret = fc_bsg_rport_dispatch(q, shost, rport, job);
-		else
-			ret = fc_bsg_host_dispatch(q, shost, job);
-
-		/* did dispatcher hit state that can't process any more */
-		if (ret == FC_DISPATCH_BREAK)
-			break;
-
-		/* did dispatcher had released the lock */
-		if (ret == FC_DISPATCH_UNLOCKED)
-			spin_lock_irq(q->queue_lock);
-	}
-
-	spin_unlock_irq(q->queue_lock);
-	put_device(dev);
-	spin_lock_irq(q->queue_lock);
-}
-
-
-/**
- * fc_bsg_host_handler - handler for bsg requests for a fc host
- * @q:		fc host request queue
- */
-static void
-fc_bsg_host_handler(struct request_queue *q)
-{
-	struct Scsi_Host *shost = q->queuedata;
-
-	fc_bsg_request_handler(q, shost, NULL, &shost->shost_gendev);
+	return 0;
 }
 
-
-/**
- * fc_bsg_rport_handler - handler for bsg requests for a fc rport
- * @q:		rport request queue
- */
-static void
-fc_bsg_rport_handler(struct request_queue *q)
+static int fc_bsg_dispatch(struct bsg_job *job)
 {
-	struct fc_rport *rport = q->queuedata;
-	struct Scsi_Host *shost = rport_to_shost(rport);
+	struct Scsi_Host *shost = fc_bsg_to_shost(job);
 
-	fc_bsg_request_handler(q, shost, rport, &rport->dev);
+	if (scsi_is_fc_rport(job->dev))
+		return fc_bsg_rport_dispatch(shost, job);
+	else
+		return fc_bsg_host_dispatch(shost, job);
 }
 
-
 /**
  * fc_bsg_hostadd - Create and add the bsg hooks so we can receive requests
  * @shost:	shost for fc_host
@@ -3977,33 +3776,42 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 	snprintf(bsg_name, sizeof(bsg_name),
 		 "fc_host%d", shost->host_no);
 
-	q = __scsi_alloc_queue(shost, fc_bsg_host_handler);
+	q = __scsi_alloc_queue(shost, bsg_request_fn);
 	if (!q) {
-		printk(KERN_ERR "fc_host%d: bsg interface failed to "
-				"initialize - no request queue\n",
-				 shost->host_no);
+		dev_err(dev,
+			"fc_host%d: bsg interface failed to initialize - no request queue\n",
+			shost->host_no);
 		return -ENOMEM;
 	}
 
-	q->queuedata = shost;
-	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
-	blk_queue_softirq_done(q, bsg_softirq_done);
-	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
-	blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);
-
-	err = bsg_register_queue(q, dev, bsg_name, NULL);
+	err = bsg_setup_queue(dev, q, bsg_name, fc_bsg_dispatch,
+				 i->f->dd_bsg_size);
 	if (err) {
-		printk(KERN_ERR "fc_host%d: bsg interface failed to "
-				"initialize - register queue\n",
-				shost->host_no);
+		dev_err(dev,
+			"fc_host%d: bsg interface failed to initialize - setup queue\n",
+			shost->host_no);
 		blk_cleanup_queue(q);
 		return err;
 	}
-
+	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
+	blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);
 	fc_host->rqst_q = q;
 	return 0;
 }
 
+static int fc_bsg_rport_prep(struct request_queue *q, struct request *req)
+{
+	struct fc_rport *rport = dev_to_rport(q->queuedata);
+
+	if (rport->port_state == FC_PORTSTATE_BLOCKED &&
+	    !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT))
+		return BLKPREP_DEFER;
+
+	if (rport->port_state != FC_PORTSTATE_ONLINE)
+		return BLKPREP_KILL;
+
+	return BLKPREP_OK;
+}
 
 /**
  * fc_bsg_rportadd - Create and add the bsg hooks so we can receive requests
@@ -4023,29 +3831,22 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
 	if (!i->f->bsg_request)
 		return -ENOTSUPP;
 
-	q = __scsi_alloc_queue(shost, fc_bsg_rport_handler);
+	q = __scsi_alloc_queue(shost, bsg_request_fn);
 	if (!q) {
-		printk(KERN_ERR "%s: bsg interface failed to "
-				"initialize - no request queue\n",
-				 dev->kobj.name);
+		dev_err(dev, "bsg interface failed to initialize - no request queue\n");
 		return -ENOMEM;
 	}
 
-	q->queuedata = rport;
-	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
-	blk_queue_softirq_done(q, bsg_softirq_done);
-	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
-	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
-
-	err = bsg_register_queue(q, dev, NULL, NULL);
+	err = bsg_setup_queue(dev, q, NULL, fc_bsg_dispatch, i->f->dd_bsg_size);
 	if (err) {
-		printk(KERN_ERR "%s: bsg interface failed to "
-				"initialize - register queue\n",
-				 dev->kobj.name);
+		dev_err(dev, "failed to setup bsg queue\n");
 		blk_cleanup_queue(q);
 		return err;
 	}
 
+	blk_queue_prep_rq(q, fc_bsg_rport_prep);
+	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
+	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 	rport->rqst_q = q;
 	return 0;
 }
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index b708db91618f..657a718c27d2 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -69,7 +69,6 @@ void bsg_job_done(struct bsg_job *job, int result,
 int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
 		    bsg_job_fn *job_fn, int dd_job_size);
 void bsg_request_fn(struct request_queue *q);
-void bsg_softirq_done(struct request *rq);
 void bsg_job_put(struct bsg_job *job);
 int __must_check bsg_job_get(struct bsg_job *job);
 
-- 
cgit v1.2.3


From 982555fc26f9d8bcdbd5f9db0378fe0682eb4188 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Tue, 8 Nov 2016 10:08:24 +0800
Subject: usb: gadget: fix request length error for isoc transfer

For isoc endpoint descriptor, the wMaxPacketSize is not real max packet
size (see Table 9-13. Standard Endpoint Descriptor, USB 2.0 specifcation),
it may contain the number of packet, so the real max packet should be
ep->desc->wMaxPacketSize && 0x7ff.

Cc: Felipe F. Tonello <eu@felipetonello.com>
Cc: Felipe Balbi <felipe.balbi@linux.intel.com>
Fixes: 16b114a6d797 ("usb: gadget: fix usb_ep_align_maybe
  endianness and new usb_ep_aligna")

Signed-off-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 include/linux/usb/gadget.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 8e81f9eb95e4..e4516e9ded0f 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -429,7 +429,9 @@ static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev)
  */
 static inline size_t usb_ep_align(struct usb_ep *ep, size_t len)
 {
-	return round_up(len, (size_t)le16_to_cpu(ep->desc->wMaxPacketSize));
+	int max_packet_size = (size_t)usb_endpoint_maxp(ep->desc) & 0x7ff;
+
+	return round_up(len, max_packet_size);
 }
 
 /**
-- 
cgit v1.2.3


From 9a05e7541c39680d28ecf91892338e074738d5fd Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 18 Nov 2016 15:16:06 +0100
Subject: block: Change extern inline to static inline

With compilers which follow the C99 standard (like modern versions of
gcc and clang), "extern inline" does the opposite thing from older
versions of gcc (emits code for an externally linkable version of the
inline function).

"static inline" does the intended behavior in all cases instead.

Description taken from commit 6d91857d4826 ("staging, rtl8192e,
LLVMLinux: Change extern inline to static inline").

This also fixes the following GCC warning when building with CONFIG_PM
disabled:

  ./include/linux/blkdev.h:1143:20: warning: no previous prototype for 'blk_set_runtime_active' [-Wmissing-prototypes]

Fixes: d07ab6d11477 ("block: Add blk_set_runtime_active()")
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 85699bc90a51..541fdd8787a5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1174,7 +1174,7 @@ static inline int blk_pre_runtime_suspend(struct request_queue *q)
 static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
 static inline void blk_pre_runtime_resume(struct request_queue *q) {}
 static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
-extern inline void blk_set_runtime_active(struct request_queue *q) {}
+static inline void blk_set_runtime_active(struct request_queue *q) {}
 #endif
 
 /*
-- 
cgit v1.2.3


From 0ac3ea70897fb9f84b620aeda074ecccf481629d Mon Sep 17 00:00:00 2001
From: Mohamad Haj Yahia <mohamad@mellanox.com>
Date: Thu, 17 Nov 2016 13:45:55 +0200
Subject: net/mlx5: Make the command interface cache more flexible

Add more cache command size sets and more entries for each set based on
the current commands set different sizes and commands frequency.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 145 ++++++++++++--------------
 include/linux/mlx5/driver.h                   |  14 +--
 2 files changed, 76 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 8561102f2563..0fe7a60bf66a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -53,14 +53,6 @@ enum {
 	CMD_MODE_EVENTS
 };
 
-enum {
-	NUM_LONG_LISTS	  = 2,
-	NUM_MED_LISTS	  = 64,
-	LONG_LIST_SIZE	  = (2ULL * 1024 * 1024 * 1024 / PAGE_SIZE) * 8 + 16 +
-				MLX5_CMD_DATA_BLOCK_SIZE,
-	MED_LIST_SIZE	  = 16 + MLX5_CMD_DATA_BLOCK_SIZE,
-};
-
 enum {
 	MLX5_CMD_DELIVERY_STAT_OK			= 0x0,
 	MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR		= 0x1,
@@ -1372,10 +1364,10 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
 {
 	unsigned long flags;
 
-	if (msg->cache) {
-		spin_lock_irqsave(&msg->cache->lock, flags);
-		list_add_tail(&msg->list, &msg->cache->head);
-		spin_unlock_irqrestore(&msg->cache->lock, flags);
+	if (msg->parent) {
+		spin_lock_irqsave(&msg->parent->lock, flags);
+		list_add_tail(&msg->list, &msg->parent->head);
+		spin_unlock_irqrestore(&msg->parent->lock, flags);
 	} else {
 		mlx5_free_cmd_msg(dev, msg);
 	}
@@ -1472,30 +1464,37 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
 				      gfp_t gfp)
 {
 	struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+	struct cmd_msg_cache *ch = NULL;
 	struct mlx5_cmd *cmd = &dev->cmd;
-	struct cache_ent *ent = NULL;
-
-	if (in_size > MED_LIST_SIZE && in_size <= LONG_LIST_SIZE)
-		ent = &cmd->cache.large;
-	else if (in_size > 16 && in_size <= MED_LIST_SIZE)
-		ent = &cmd->cache.med;
-
-	if (ent) {
-		spin_lock_irq(&ent->lock);
-		if (!list_empty(&ent->head)) {
-			msg = list_entry(ent->head.next, typeof(*msg), list);
-			/* For cached lists, we must explicitly state what is
-			 * the real size
-			 */
-			msg->len = in_size;
-			list_del(&msg->list);
+	int i;
+
+	if (in_size <= 16)
+		goto cache_miss;
+
+	for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+		ch = &cmd->cache[i];
+		if (in_size > ch->max_inbox_size)
+			continue;
+		spin_lock_irq(&ch->lock);
+		if (list_empty(&ch->head)) {
+			spin_unlock_irq(&ch->lock);
+			continue;
 		}
-		spin_unlock_irq(&ent->lock);
+		msg = list_entry(ch->head.next, typeof(*msg), list);
+		/* For cached lists, we must explicitly state what is
+		 * the real size
+		 */
+		msg->len = in_size;
+		list_del(&msg->list);
+		spin_unlock_irq(&ch->lock);
+		break;
 	}
 
-	if (IS_ERR(msg))
-		msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
+	if (!IS_ERR(msg))
+		return msg;
 
+cache_miss:
+	msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
 	return msg;
 }
 
@@ -1593,58 +1592,56 @@ EXPORT_SYMBOL(mlx5_cmd_exec_cb);
 
 static void destroy_msg_cache(struct mlx5_core_dev *dev)
 {
-	struct mlx5_cmd *cmd = &dev->cmd;
+	struct cmd_msg_cache *ch;
 	struct mlx5_cmd_msg *msg;
 	struct mlx5_cmd_msg *n;
+	int i;
 
-	list_for_each_entry_safe(msg, n, &cmd->cache.large.head, list) {
-		list_del(&msg->list);
-		mlx5_free_cmd_msg(dev, msg);
-	}
-
-	list_for_each_entry_safe(msg, n, &cmd->cache.med.head, list) {
-		list_del(&msg->list);
-		mlx5_free_cmd_msg(dev, msg);
+	for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+		ch = &dev->cmd.cache[i];
+		list_for_each_entry_safe(msg, n, &ch->head, list) {
+			list_del(&msg->list);
+			mlx5_free_cmd_msg(dev, msg);
+		}
 	}
 }
 
-static int create_msg_cache(struct mlx5_core_dev *dev)
+static unsigned cmd_cache_num_ent[MLX5_NUM_COMMAND_CACHES] = {
+	512, 32, 16, 8, 2
+};
+
+static unsigned cmd_cache_ent_size[MLX5_NUM_COMMAND_CACHES] = {
+	16 + MLX5_CMD_DATA_BLOCK_SIZE,
+	16 + MLX5_CMD_DATA_BLOCK_SIZE * 2,
+	16 + MLX5_CMD_DATA_BLOCK_SIZE * 16,
+	16 + MLX5_CMD_DATA_BLOCK_SIZE * 256,
+	16 + MLX5_CMD_DATA_BLOCK_SIZE * 512,
+};
+
+static void create_msg_cache(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
+	struct cmd_msg_cache *ch;
 	struct mlx5_cmd_msg *msg;
-	int err;
 	int i;
-
-	spin_lock_init(&cmd->cache.large.lock);
-	INIT_LIST_HEAD(&cmd->cache.large.head);
-	spin_lock_init(&cmd->cache.med.lock);
-	INIT_LIST_HEAD(&cmd->cache.med.head);
-
-	for (i = 0; i < NUM_LONG_LISTS; i++) {
-		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0);
-		if (IS_ERR(msg)) {
-			err = PTR_ERR(msg);
-			goto ex_err;
-		}
-		msg->cache = &cmd->cache.large;
-		list_add_tail(&msg->list, &cmd->cache.large.head);
-	}
-
-	for (i = 0; i < NUM_MED_LISTS; i++) {
-		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0);
-		if (IS_ERR(msg)) {
-			err = PTR_ERR(msg);
-			goto ex_err;
+	int k;
+
+	/* Initialize and fill the caches with initial entries */
+	for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) {
+		ch = &cmd->cache[k];
+		spin_lock_init(&ch->lock);
+		INIT_LIST_HEAD(&ch->head);
+		ch->num_ent = cmd_cache_num_ent[k];
+		ch->max_inbox_size = cmd_cache_ent_size[k];
+		for (i = 0; i < ch->num_ent; i++) {
+			msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL | __GFP_NOWARN,
+						 ch->max_inbox_size, 0);
+			if (IS_ERR(msg))
+				break;
+			msg->parent = ch;
+			list_add_tail(&msg->list, &ch->head);
 		}
-		msg->cache = &cmd->cache.med;
-		list_add_tail(&msg->list, &cmd->cache.med.head);
 	}
-
-	return 0;
-
-ex_err:
-	destroy_msg_cache(dev);
-	return err;
 }
 
 static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
@@ -1767,11 +1764,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 
 	cmd->mode = CMD_MODE_POLLING;
 
-	err = create_msg_cache(dev);
-	if (err) {
-		dev_err(&dev->pdev->dev, "failed to create command cache\n");
-		goto err_free_page;
-	}
+	create_msg_cache(dev);
 
 	set_wqname(dev);
 	cmd->wq = create_singlethread_workqueue(cmd->wq_name);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ecc451d89ccd..5e7dbbcf47f0 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -208,7 +208,7 @@ struct mlx5_cmd_first {
 
 struct mlx5_cmd_msg {
 	struct list_head		list;
-	struct cache_ent	       *cache;
+	struct cmd_msg_cache	       *parent;
 	u32				len;
 	struct mlx5_cmd_first		first;
 	struct mlx5_cmd_mailbox	       *next;
@@ -228,17 +228,17 @@ struct mlx5_cmd_debug {
 	u16			outlen;
 };
 
-struct cache_ent {
+struct cmd_msg_cache {
 	/* protect block chain allocations
 	 */
 	spinlock_t		lock;
 	struct list_head	head;
+	unsigned int		max_inbox_size;
+	unsigned int		num_ent;
 };
 
-struct cmd_msg_cache {
-	struct cache_ent	large;
-	struct cache_ent	med;
-
+enum {
+	MLX5_NUM_COMMAND_CACHES = 5,
 };
 
 struct mlx5_cmd_stats {
@@ -281,7 +281,7 @@ struct mlx5_cmd {
 	struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
 	struct pci_pool *pool;
 	struct mlx5_cmd_debug dbg;
-	struct cmd_msg_cache cache;
+	struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES];
 	int checksum_disabled;
 	struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
 };
-- 
cgit v1.2.3


From 4ce3bf2fa8ba309b5ca19539fcc8671a0fc084f9 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Thu, 17 Nov 2016 13:45:56 +0200
Subject: net/mlx5: Port module event hardware structures

Add hardware structures and constants definitions needed for module
events support.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h   | 11 +++++++++++
 include/linux/mlx5/mlx5_ifc.h |  3 ++-
 include/linux/mlx5/port.h     |  3 +++
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 58276144ba81..52b437431c6a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -277,6 +277,7 @@ enum mlx5_event {
 	MLX5_EVENT_TYPE_INTERNAL_ERROR	   = 0x08,
 	MLX5_EVENT_TYPE_PORT_CHANGE	   = 0x09,
 	MLX5_EVENT_TYPE_GPIO_EVENT	   = 0x15,
+	MLX5_EVENT_TYPE_PORT_MODULE_EVENT  = 0x16,
 	MLX5_EVENT_TYPE_REMOTE_CONFIG	   = 0x19,
 
 	MLX5_EVENT_TYPE_DB_BF_CONGESTION   = 0x1a,
@@ -552,6 +553,15 @@ struct mlx5_eqe_vport_change {
 	__be32		rsvd1[6];
 } __packed;
 
+struct mlx5_eqe_port_module {
+	u8        reserved_at_0[1];
+	u8        module;
+	u8        reserved_at_2[1];
+	u8        module_status;
+	u8        reserved_at_4[2];
+	u8        error_type;
+} __packed;
+
 union ev_data {
 	__be32				raw[7];
 	struct mlx5_eqe_cmd		cmd;
@@ -565,6 +575,7 @@ union ev_data {
 	struct mlx5_eqe_page_req	req_pages;
 	struct mlx5_eqe_page_fault	page_fault;
 	struct mlx5_eqe_vport_change	vport_change;
+	struct mlx5_eqe_port_module	port_module;
 } __packed;
 
 struct mlx5_eqe {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2632cb2caf10..cd1d530ca368 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -824,7 +824,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8	   early_vf_enable[0x1];
 	u8         reserved_at_1a9[0x2];
 	u8         local_ca_ack_delay[0x5];
-	u8         reserved_at_1af[0x2];
+	u8         port_module_event[0x1];
+	u8         reserved_at_1b0[0x1];
 	u8         ports_check[0x1];
 	u8         reserved_at_1b2[0x1];
 	u8         disable_link_up[0x1];
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index b3065acd20b4..dde8c7ec5ff1 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -94,6 +94,9 @@ enum mlx5e_link_mode {
 
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
 
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK         0xF
+
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
 			 int ptys_size, int proto_mask, u8 local_port);
-- 
cgit v1.2.3


From d4eb4cd78b0774c7061db56844ed2ea7790cc77c Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Thu, 17 Nov 2016 13:45:57 +0200
Subject: net/mlx5: Add handling for port module event

For each asynchronous port module event:
  1. print with ratelimit to the dmesg log
  2. increment the corresponding event counter

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       | 12 +++++
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/port.c     | 57 ++++++++++++++++++++++
 include/linux/mlx5/driver.h                        | 27 ++++++++++
 4 files changed, 97 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index e74a73be5e0a..8ffcc8808e50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -139,6 +139,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_PORT_CHANGE";
 	case MLX5_EVENT_TYPE_GPIO_EVENT:
 		return "MLX5_EVENT_TYPE_GPIO_EVENT";
+	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
 	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
 		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
 	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
@@ -285,6 +287,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 			mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
 			break;
 #endif
+
+		case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+			mlx5_port_module_event(dev, eqe);
+			break;
+
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
 				       eqe->type, eq->eqn);
@@ -480,6 +487,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	    mlx5_core_is_pf(dev))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
 
+	if (MLX5_CAP_GEN(dev, port_module_event))
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
+	else
+		mlx5_core_dbg(dev, "port_module_event is not set\n");
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 4762bb9d013c..7e635ebda199 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -81,6 +81,7 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 		     unsigned long param);
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 void mlx5_recover_device(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 34e7184e23c9..b77928f5b46e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -746,3 +746,60 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
 	*supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap));
 	*enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
 }
+
+static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
+	"Cable plugged",   /* MLX5_MODULE_STATUS_PLUGGED    = 0x1 */
+	"Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED  = 0x2 */
+	"Cable error",     /* MLX5_MODULE_STATUS_ERROR      = 0x3 */
+};
+
+static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
+	"Power budget exceeded",
+	"Long Range for non MLNX cable",
+	"Bus stuck(I2C or data shorted)",
+	"No EEPROM/retry timeout",
+	"Enforce part number list",
+	"Unknown identifier",
+	"High Temperature",
+	"Bad or shorted cable/module",
+	"Unknown status",
+};
+
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+	enum port_module_event_status_type module_status;
+	enum port_module_event_error_type error_type;
+	struct mlx5_eqe_port_module *module_event_eqe;
+	struct mlx5_priv *priv = &dev->priv;
+	u8 module_num;
+
+	module_event_eqe = &eqe->data.port_module;
+	module_num = module_event_eqe->module;
+	module_status = module_event_eqe->module_status &
+			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+	error_type = module_event_eqe->error_type &
+		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+	if (module_status < MLX5_MODULE_STATUS_ERROR) {
+		priv->pme_stats.status_counters[module_status - 1]++;
+	} else if (module_status == MLX5_MODULE_STATUS_ERROR) {
+		if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
+			/* Unknown error type */
+			error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
+		priv->pme_stats.error_counters[error_type]++;
+	}
+
+	if (!printk_ratelimit())
+		return;
+
+	if (module_status < MLX5_MODULE_STATUS_ERROR)
+		mlx5_core_info(dev,
+			       "Port module event: module %u, %s\n",
+			       module_num, mlx5_pme_status[module_status - 1]);
+
+	else if (module_status == MLX5_MODULE_STATUS_ERROR)
+		mlx5_core_info(dev,
+			       "Port module event[error]: module %u, %s, %s\n",
+			       module_num, mlx5_pme_status[module_status - 1],
+			       mlx5_pme_error[error_type]);
+}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5e7dbbcf47f0..7336c8e529d7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -498,6 +498,31 @@ struct mlx5_rl_table {
 	struct mlx5_rl_entry   *rl_entry;
 };
 
+enum port_module_event_status_type {
+	MLX5_MODULE_STATUS_PLUGGED   = 0x1,
+	MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+	MLX5_MODULE_STATUS_ERROR     = 0x3,
+	MLX5_MODULE_STATUS_NUM       = 0x3,
+};
+
+enum  port_module_event_error_type {
+	MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED,
+	MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE,
+	MLX5_MODULE_EVENT_ERROR_BUS_STUCK,
+	MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT,
+	MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST,
+	MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER,
+	MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE,
+	MLX5_MODULE_EVENT_ERROR_BAD_CABLE,
+	MLX5_MODULE_EVENT_ERROR_UNKNOWN,
+	MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_port_module_event_stats {
+	u64 status_counters[MLX5_MODULE_STATUS_NUM];
+	u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
 struct mlx5_priv {
 	char			name[MLX5_MAX_NAME_LEN];
 	struct mlx5_eq_table	eq_table;
@@ -559,6 +584,8 @@ struct mlx5_priv {
 	unsigned long		pci_dev_data;
 	struct mlx5_fc_stats		fc_stats;
 	struct mlx5_rl_table            rl_table;
+
+	struct mlx5_port_module_event_stats  pme_stats;
 };
 
 enum mlx5_device_state {
-- 
cgit v1.2.3


From 0dbc6fe09fbe5f5191bcc606f3bdc9a829f97066 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 17 Nov 2016 13:45:59 +0200
Subject: net/mlx5: Set driver version infrastructure

Add driver_version capability bit is enabled, and set driver
version command in mlx5_ifc firmware header.  The only purpose
of this command is to store a driver version/OS string in FW
to be reported and displayed in various management systems,
such as IPMI/BMC.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cd1d530ca368..f08a06247fba 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -83,6 +83,7 @@ enum {
 	MLX5_CMD_OP_SET_HCA_CAP                   = 0x109,
 	MLX5_CMD_OP_QUERY_ISSI                    = 0x10a,
 	MLX5_CMD_OP_SET_ISSI                      = 0x10b,
+	MLX5_CMD_OP_SET_DRIVER_VERSION            = 0x10d,
 	MLX5_CMD_OP_CREATE_MKEY                   = 0x200,
 	MLX5_CMD_OP_QUERY_MKEY                    = 0x201,
 	MLX5_CMD_OP_DESTROY_MKEY                  = 0x202,
@@ -909,7 +910,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_pg_sz[0x8];
 
 	u8         bf[0x1];
-	u8         reserved_at_261[0x1];
+	u8         driver_version[0x1];
 	u8         pad_tx_eth_packet[0x1];
 	u8         reserved_at_263[0x8];
 	u8         log_bf_reg_size[0x5];
@@ -4005,6 +4006,25 @@ struct mlx5_ifc_query_issi_in_bits {
 	u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_set_driver_version_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_set_driver_version_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+	u8         driver_version[64][0x8];
+};
+
 struct mlx5_ifc_query_hca_vport_pkey_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
-- 
cgit v1.2.3


From 7f503169cabd70c1f13b9279c50eca7dfb9a7d51 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 17 Nov 2016 13:46:01 +0200
Subject: net/mlx5: Add MPCNT register infrastructure

Add the needed infrastructure for future use of MPCNT register.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h   |  5 +++
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 93 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 52b437431c6a..9f489365b3d3 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1071,6 +1071,11 @@ enum {
 	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 };
 
+enum {
+	MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP       = 0x0,
+	MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2,
+};
+
 static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 {
 	if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7336c8e529d7..ae1f451e8f89 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -121,6 +121,7 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MLCR		 = 0x902b,
+	MLX5_REG_MPCNT		 = 0x9051,
 };
 
 enum {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f08a06247fba..a5f0fbedf1e7 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1757,6 +1757,80 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
 	u8         reserved_at_4c0[0x300];
 };
 
+struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits {
+	u8         life_time_counter_high[0x20];
+
+	u8         life_time_counter_low[0x20];
+
+	u8         rx_errors[0x20];
+
+	u8         tx_errors[0x20];
+
+	u8         l0_to_recovery_eieos[0x20];
+
+	u8         l0_to_recovery_ts[0x20];
+
+	u8         l0_to_recovery_framing[0x20];
+
+	u8         l0_to_recovery_retrain[0x20];
+
+	u8         crc_error_dllp[0x20];
+
+	u8         crc_error_tlp[0x20];
+
+	u8         reserved_at_140[0x680];
+};
+
+struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits {
+	u8         life_time_counter_high[0x20];
+
+	u8         life_time_counter_low[0x20];
+
+	u8         time_to_boot_image_start[0x20];
+
+	u8         time_to_link_image[0x20];
+
+	u8         calibration_time[0x20];
+
+	u8         time_to_first_perst[0x20];
+
+	u8         time_to_detect_state[0x20];
+
+	u8         time_to_l0[0x20];
+
+	u8         time_to_crs_en[0x20];
+
+	u8         time_to_plastic_image_start[0x20];
+
+	u8         time_to_iron_image_start[0x20];
+
+	u8         perst_handler[0x20];
+
+	u8         times_in_l1[0x20];
+
+	u8         times_in_l23[0x20];
+
+	u8         dl_down[0x20];
+
+	u8         config_cycle1usec[0x20];
+
+	u8         config_cycle2to7usec[0x20];
+
+	u8         config_cycle_8to15usec[0x20];
+
+	u8         config_cycle_16_to_63usec[0x20];
+
+	u8         config_cycle_64usec[0x20];
+
+	u8         correctable_err_msg_sent[0x20];
+
+	u8         non_fatal_err_msg_sent[0x20];
+
+	u8         fatal_err_msg_sent[0x20];
+
+	u8         reserved_at_2e0[0x4e0];
+};
+
 struct mlx5_ifc_cmd_inter_comp_event_bits {
 	u8         command_completion_vector[0x20];
 
@@ -2921,6 +2995,12 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	u8         reserved_at_0[0x7c0];
 };
 
+union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits {
+	struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout;
+	struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout;
+	u8         reserved_at_0[0x7c0];
+};
+
 union mlx5_ifc_event_auto_bits {
 	struct mlx5_ifc_comp_event_bits comp_event;
 	struct mlx5_ifc_dct_events_bits dct_events;
@@ -7240,6 +7320,18 @@ struct mlx5_ifc_ppcnt_reg_bits {
 	union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
 
+struct mlx5_ifc_mpcnt_reg_bits {
+	u8         reserved_at_0[0x8];
+	u8         pcie_index[0x8];
+	u8         reserved_at_10[0xa];
+	u8         grp[0x6];
+
+	u8         clr[0x1];
+	u8         reserved_at_21[0x1f];
+
+	union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set;
+};
+
 struct mlx5_ifc_ppad_reg_bits {
 	u8         reserved_at_0[0x3];
 	u8         single_mac[0x1];
@@ -7845,6 +7937,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
 	struct mlx5_ifc_ppad_reg_bits ppad_reg;
 	struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
+	struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg;
 	struct mlx5_ifc_pplm_reg_bits pplm_reg;
 	struct mlx5_ifc_pplr_reg_bits pplr_reg;
 	struct mlx5_ifc_ppsc_reg_bits ppsc_reg;
-- 
cgit v1.2.3


From 968ad9da7e0e333e25442950e10a1b631981ce84 Mon Sep 17 00:00:00 2001
From: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Date: Thu, 17 Nov 2016 13:07:21 +0100
Subject: ethtool: Implements ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE

Adding get_tunable/set_tunable function pointer to the phy_driver
structure, and uses these function pointers to implement the
ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE ioctls.

Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h |  7 +++++
 net/core/ethtool.c  | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index b9bd3b4f4ea1..edde28ce163a 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -611,6 +611,13 @@ struct phy_driver {
 	void (*get_strings)(struct phy_device *dev, u8 *data);
 	void (*get_stats)(struct phy_device *dev,
 			  struct ethtool_stats *stats, u64 *data);
+
+	/* Get and Set PHY tunables */
+	int (*get_tunable)(struct phy_device *dev,
+			   struct ethtool_tunable *tuna, void *data);
+	int (*set_tunable)(struct phy_device *dev,
+			    struct ethtool_tunable *tuna,
+			    const void *data);
 };
 #define to_phy_driver(d) container_of(to_mdio_common_driver(d),		\
 				      struct phy_driver, mdiodrv)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 977489820eb9..61aebdf9c61b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -119,6 +119,11 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_TX_COPYBREAK]	= "tx-copybreak",
 };
 
+static const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+	[ETHTOOL_ID_UNSPEC]     = "Unspec",
+};
+
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_gfeatures cmd = {
@@ -227,6 +232,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
 	if (sset == ETH_SS_TUNABLES)
 		return ARRAY_SIZE(tunable_strings);
 
+	if (sset == ETH_SS_PHY_TUNABLES)
+		return ARRAY_SIZE(phy_tunable_strings);
+
 	if (sset == ETH_SS_PHY_STATS) {
 		if (dev->phydev)
 			return phy_get_sset_count(dev->phydev);
@@ -253,6 +261,8 @@ static void __ethtool_get_strings(struct net_device *dev,
 		       sizeof(rss_hash_func_strings));
 	else if (stringset == ETH_SS_TUNABLES)
 		memcpy(data, tunable_strings, sizeof(tunable_strings));
+	else if (stringset == ETH_SS_PHY_TUNABLES)
+		memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings));
 	else if (stringset == ETH_SS_PHY_STATS) {
 		struct phy_device *phydev = dev->phydev;
 
@@ -2422,6 +2432,76 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
 	};
 }
 
+static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna)
+{
+	switch (tuna->id) {
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
+{
+	int ret;
+	struct ethtool_tunable tuna;
+	struct phy_device *phydev = dev->phydev;
+	void *data;
+
+	if (!(phydev && phydev->drv && phydev->drv->get_tunable))
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+		return -EFAULT;
+	ret = ethtool_phy_tunable_valid(&tuna);
+	if (ret)
+		return ret;
+	data = kmalloc(tuna.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+	ret = phydev->drv->get_tunable(phydev, &tuna, data);
+	if (ret)
+		goto out;
+	useraddr += sizeof(tuna);
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, data, tuna.len))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
+{
+	int ret;
+	struct ethtool_tunable tuna;
+	struct phy_device *phydev = dev->phydev;
+	void *data;
+
+	if (!(phydev && phydev->drv && phydev->drv->set_tunable))
+		return -EOPNOTSUPP;
+	if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+		return -EFAULT;
+	ret = ethtool_phy_tunable_valid(&tuna);
+	if (ret)
+		return ret;
+	data = kmalloc(tuna.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+	useraddr += sizeof(tuna);
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr, tuna.len))
+		goto out;
+	ret = phydev->drv->set_tunable(phydev, &tuna, data);
+
+out:
+	kfree(data);
+	return ret;
+}
+
 /* The main entry point in this file.  Called from net/core/dev_ioctl.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2479,6 +2559,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GET_TS_INFO:
 	case ETHTOOL_GEEE:
 	case ETHTOOL_GTUNABLE:
+	case ETHTOOL_PHY_GTUNABLE:
 		break;
 	default:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -2684,6 +2765,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SLINKSETTINGS:
 		rc = ethtool_set_link_ksettings(dev, useraddr);
 		break;
+	case ETHTOOL_PHY_GTUNABLE:
+		rc = get_phy_tunable(dev, useraddr);
+		break;
+	case ETHTOOL_PHY_STUNABLE:
+		rc = set_phy_tunable(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 603ab57363a0ba66c77ca4b3027bc0b4505df504 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 17 Nov 2016 11:19:12 -0800
Subject: bus: mvebu-bus: Provide inline stub for mvebu_mbus_get_dram_win_info

In preparation for allowing CONFIG_MVNETA_BM to build with COMPILE_TEST,
provide an inline stub for mvebu_mbus_get_dram_win_info().

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mbus.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 2931aa43dab1..0d3f14fd2621 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -82,6 +82,7 @@ static inline int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size,
 }
 #endif
 
+#ifdef CONFIG_MVEBU_MBUS
 int mvebu_mbus_save_cpu_target(u32 __iomem *store_addr);
 void mvebu_mbus_get_pcie_mem_aperture(struct resource *res);
 void mvebu_mbus_get_pcie_io_aperture(struct resource *res);
@@ -97,5 +98,12 @@ int mvebu_mbus_init(const char *soc, phys_addr_t mbus_phys_base,
 		    size_t mbus_size, phys_addr_t sdram_phys_base,
 		    size_t sdram_size);
 int mvebu_mbus_dt_init(bool is_coherent);
+#else
+static inline int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target,
+					       u8 *attr)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_MVEBU_MBUS */
 
 #endif /* __LINUX_MBUS_H */
-- 
cgit v1.2.3


From 3371d663bb4579f1b2003a92162edd6d90edd089 Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Tue, 15 Nov 2016 10:56:20 +0100
Subject: mtd: nand: Support controllers with custom page

If your controller already sends the required NAND commands when
reading or writing a page, then the framework is not supposed to
send READ0 and SEQIN/PAGEPROG respectively.

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 34 +++++++++++++++++++++++++++++++---
 include/linux/mtd/nand.h     | 12 ++++++++++++
 2 files changed, 43 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 4d099d4f442c..96d242e2fe34 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1961,7 +1961,8 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 						 __func__, buf);
 
 read_retry:
-			chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+			if (nand_standard_page_accessors(&chip->ecc))
+				chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
 
 			/*
 			 * Now read the page into the buffer.  Absent an error,
@@ -2649,7 +2650,8 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	else
 		subpage = 0;
 
-	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
+	if (nand_standard_page_accessors(&chip->ecc))
+		chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
 
 	if (unlikely(raw))
 		status = chip->ecc.write_page_raw(mtd, chip, buf,
@@ -2672,7 +2674,8 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	if (!cached || !NAND_HAS_CACHEPROG(chip)) {
 
-		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+		if (nand_standard_page_accessors(&chip->ecc))
+			chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
 		status = chip->waitfunc(mtd, chip);
 		/*
 		 * See if operation failed and additional status checks are
@@ -4511,6 +4514,26 @@ static bool nand_ecc_strength_good(struct mtd_info *mtd)
 	return corr >= ds_corr && ecc->strength >= chip->ecc_strength_ds;
 }
 
+static bool invalid_ecc_page_accessors(struct nand_chip *chip)
+{
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+
+	if (nand_standard_page_accessors(ecc))
+		return false;
+
+	/*
+	 * NAND_ECC_CUSTOM_PAGE_ACCESS flag is set, make sure the NAND
+	 * controller driver implements all the page accessors because
+	 * default helpers are not suitable when the core does not
+	 * send the READ0/PAGEPROG commands.
+	 */
+	return (!ecc->read_page || !ecc->write_page ||
+		!ecc->read_page_raw || !ecc->write_page_raw ||
+		(NAND_HAS_SUBPAGE_READ(chip) && !ecc->read_subpage) ||
+		(NAND_HAS_SUBPAGE_WRITE(chip) && !ecc->write_subpage &&
+		 ecc->hwctl && ecc->calculate));
+}
+
 /**
  * nand_scan_tail - [NAND Interface] Scan for the NAND device
  * @mtd: MTD device structure
@@ -4531,6 +4554,11 @@ int nand_scan_tail(struct mtd_info *mtd)
 		   !(chip->bbt_options & NAND_BBT_USE_FLASH)))
 		return -EINVAL;
 
+	if (invalid_ecc_page_accessors(chip)) {
+		pr_err("Invalid ECC page accessors setup\n");
+		return -EINVAL;
+	}
+
 	if (!(chip->options & NAND_OWN_BUFFERS)) {
 		nbuf = kzalloc(sizeof(*nbuf) + mtd->writesize
 				+ mtd->oobsize * 3, GFP_KERNEL);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 970ceb948835..ed6fd1993be1 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -142,6 +142,12 @@ enum nand_ecc_algo {
  */
 #define NAND_ECC_GENERIC_ERASED_CHECK	BIT(0)
 #define NAND_ECC_MAXIMIZE		BIT(1)
+/*
+ * If your controller already sends the required NAND commands when
+ * reading or writing a page, then the framework is not supposed to
+ * send READ0 and SEQIN/PAGEPROG respectively.
+ */
+#define NAND_ECC_CUSTOM_PAGE_ACCESS	BIT(2)
 
 /* Bit mask for flags passed to do_nand_read_ecc */
 #define NAND_GET_DEVICE		0x80
@@ -186,6 +192,7 @@ enum nand_ecc_algo {
 /* Macros to identify the above */
 #define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG))
 #define NAND_HAS_SUBPAGE_READ(chip) ((chip->options & NAND_SUBPAGE_READ))
+#define NAND_HAS_SUBPAGE_WRITE(chip) !((chip)->options & NAND_NO_SUBPAGE_WRITE)
 
 /* Non chip related options */
 /* This option skips the bbt scan during initialization. */
@@ -568,6 +575,11 @@ struct nand_ecc_ctrl {
 			int page);
 };
 
+static inline int nand_standard_page_accessors(struct nand_ecc_ctrl *ecc)
+{
+	return !(ecc->options & NAND_ECC_CUSTOM_PAGE_ACCESS);
+}
+
 /**
  * struct nand_buffers - buffer structure for read/write
  * @ecccalc:	buffer pointer for calculated ECC, size is oobsize.
-- 
cgit v1.2.3


From d66016a77757b004b8637f44d87bedfc4a47b89c Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jarno@ovn.org>
Date: Fri, 18 Nov 2016 15:40:39 -0800
Subject: virtio_net.h: Fix comment.

Fix incorrent comment after the final #endif.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 1c912f85e041..74f1e3363506 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -98,4 +98,4 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 	return 0;
 }
 
-#endif /* _LINUX_VIRTIO_BYTEORDER */
+#endif /* _LINUX_VIRTIO_NET_H */
-- 
cgit v1.2.3


From 9403cd7cbb08aa3709c632decafa2014c8ed96e6 Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jarno@ovn.org>
Date: Fri, 18 Nov 2016 15:40:40 -0800
Subject: virtio_net: Do not clear memory for struct virtio_net_hdr twice.

virtio_net_hdr_from_skb() clears the memory for the header, so there
is no point for the callers to do the same.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c          | 3 +--
 include/linux/virtio_net.h | 2 +-
 net/packet/af_packet.c     | 2 --
 3 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3b8d8cc6d2ea..64e694c68d99 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1360,8 +1360,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 	}
 
 	if (vnet_hdr_sz) {
-		struct virtio_net_hdr gso = { 0 }; /* no info leak */
-		int ret;
+		struct virtio_net_hdr gso;
 
 		if (iov_iter_count(iter) < vnet_hdr_sz)
 			return -EINVAL;
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 74f1e3363506..66204007d7ac 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -58,7 +58,7 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 					  struct virtio_net_hdr *hdr,
 					  bool little_endian)
 {
-	memset(hdr, 0, sizeof(*hdr));
+	memset(hdr, 0, sizeof(*hdr));   /* no info leak */
 
 	if (skb_is_gso(skb)) {
 		struct skb_shared_info *sinfo = skb_shinfo(skb);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d2238b204691..abe6c0b6683c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1970,8 +1970,6 @@ static unsigned int run_filter(struct sk_buff *skb,
 static int __packet_rcv_vnet(const struct sk_buff *skb,
 			     struct virtio_net_hdr *vnet_hdr)
 {
-	*vnet_hdr = (const struct virtio_net_hdr) { 0 };
-
 	if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le()))
 		BUG();
 
-- 
cgit v1.2.3


From c72d8cdaa5dbd3baf918046ee5149ab69330923e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 19 Nov 2016 04:08:08 +0300
Subject: net: fix bogus cast in skb_pagelen() and use unsigned variables

1) cast to "int" is unnecessary:
   u8 will be promoted to int before decrementing,
   small positive numbers fit into "int", so their values won't be changed
   during promotion.

   Once everything is int including loop counters, signedness doesn't
   matter: 32-bit operations will stay 32-bit operations.

   But! Someone tried to make this loop smart by making everything of
   the same type apparently in an attempt to optimise it.
   Do the optimization, just differently.
   Do the cast where it matters. :^)

2) frag size is unsigned entity and sum of fragments sizes is also
   unsigned.

Make everything unsigned, leave no MOVSX instruction behind.

	add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-4 (-4)
	function                                     old     new   delta
	skb_cow_data                                 835     834      -1
	ip_do_fragment                              2549    2548      -1
	ip6_fragment                                3130    3128      -2
	Total: Before=154865032, After=154865028, chg -0.00%

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 +++---
 net/ipv4/ip_output.c   | 2 +-
 net/ipv6/ip6_output.c  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a4aeeca7e805..9c535fbccf2c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1799,11 +1799,11 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb)
 	return skb->len - skb->data_len;
 }
 
-static inline int skb_pagelen(const struct sk_buff *skb)
+static inline unsigned int skb_pagelen(const struct sk_buff *skb)
 {
-	int i, len = 0;
+	unsigned int i, len = 0;
 
-	for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
+	for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--)
 		len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
 	return len + skb_headlen(skb);
 }
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eaf720b65d7e..358f2c82b030 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -581,7 +581,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	 */
 	if (skb_has_frag_list(skb)) {
 		struct sk_buff *frag, *frag2;
-		int first_len = skb_pagelen(skb);
+		unsigned int first_len = skb_pagelen(skb);
 
 		if (first_len - hlen > mtu ||
 		    ((first_len - hlen) & 7) ||
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b37054b1873d..312cbd0e5038 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -625,7 +625,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 	if (skb_has_frag_list(skb)) {
-		int first_len = skb_pagelen(skb);
+		unsigned int first_len = skb_pagelen(skb);
 		struct sk_buff *frag2;
 
 		if (first_len - hlen > mtu ||
-- 
cgit v1.2.3


From dd936e4313fa3f60abd6e67abb3cb66fc9a018d1 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sun, 20 Nov 2016 20:48:36 -0500
Subject: dax: rip out get_block based IO support

No one uses functions using the get_block callback anymore. Rip them
out and update documentation.

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 Documentation/filesystems/dax.txt |  22 +--
 fs/dax.c                          | 315 --------------------------------------
 include/linux/dax.h               |  12 --
 3 files changed, 11 insertions(+), 338 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index 23d18b8a49d5..a7e6e14aeb08 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -58,22 +58,22 @@ Implementation Tips for Filesystem Writers
 Filesystem support consists of
 - adding support to mark inodes as being DAX by setting the S_DAX flag in
   i_flags
-- implementing the direct_IO address space operation, and calling
-  dax_do_io() instead of blockdev_direct_IO() if S_DAX is set
+- implementing ->read_iter and ->write_iter operations which use dax_iomap_rw()
+  when inode has S_DAX flag set
 - implementing an mmap file operation for DAX files which sets the
   VM_MIXEDMAP and VM_HUGEPAGE flags on the VMA, and setting the vm_ops to
-  include handlers for fault, pmd_fault and page_mkwrite (which should
-  probably call dax_fault(), dax_pmd_fault() and dax_mkwrite(), passing the
-  appropriate get_block() callback)
-- calling dax_truncate_page() instead of block_truncate_page() for DAX files
-- calling dax_zero_page_range() instead of zero_user() for DAX files
+  include handlers for fault, pmd_fault, page_mkwrite, pfn_mkwrite. These
+  handlers should probably call dax_iomap_fault() (for fault and page_mkwrite
+  handlers), dax_iomap_pmd_fault(), dax_pfn_mkwrite() passing the appropriate
+  iomap operations.
+- calling iomap_zero_range() passing appropriate iomap operations instead of
+  block_truncate_page() for DAX files
 - ensuring that there is sufficient locking between reads, writes,
   truncates and page faults
 
-The get_block() callback passed to the DAX functions may return
-uninitialised extents.  If it does, it must ensure that simultaneous
-calls to get_block() (for example by a page-fault racing with a read()
-or a write()) work correctly.
+The iomap handlers for allocating blocks must make sure that allocated blocks
+are zeroed out and converted to written extents before being returned to avoid
+exposure of uninitialized data through mmap.
 
 These filesystems may be used for inspiration:
 - ext2: see Documentation/filesystems/ext2.txt
diff --git a/fs/dax.c b/fs/dax.c
index 28af41b9da3a..ad131cd2605d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -116,168 +116,6 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
 	return page;
 }
 
-static bool buffer_written(struct buffer_head *bh)
-{
-	return buffer_mapped(bh) && !buffer_unwritten(bh);
-}
-
-static sector_t to_sector(const struct buffer_head *bh,
-		const struct inode *inode)
-{
-	sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
-
-	return sector;
-}
-
-static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
-		      loff_t start, loff_t end, get_block_t get_block,
-		      struct buffer_head *bh)
-{
-	loff_t pos = start, max = start, bh_max = start;
-	bool hole = false;
-	struct block_device *bdev = NULL;
-	int rw = iov_iter_rw(iter), rc;
-	long map_len = 0;
-	struct blk_dax_ctl dax = {
-		.addr = ERR_PTR(-EIO),
-	};
-	unsigned blkbits = inode->i_blkbits;
-	sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
-								>> blkbits;
-
-	if (rw == READ)
-		end = min(end, i_size_read(inode));
-
-	while (pos < end) {
-		size_t len;
-		if (pos == max) {
-			long page = pos >> PAGE_SHIFT;
-			sector_t block = page << (PAGE_SHIFT - blkbits);
-			unsigned first = pos - (block << blkbits);
-			long size;
-
-			if (pos == bh_max) {
-				bh->b_size = PAGE_ALIGN(end - pos);
-				bh->b_state = 0;
-				rc = get_block(inode, block, bh, rw == WRITE);
-				if (rc)
-					break;
-				bh_max = pos - first + bh->b_size;
-				bdev = bh->b_bdev;
-				/*
-				 * We allow uninitialized buffers for writes
-				 * beyond EOF as those cannot race with faults
-				 */
-				WARN_ON_ONCE(
-					(buffer_new(bh) && block < file_blks) ||
-					(rw == WRITE && buffer_unwritten(bh)));
-			} else {
-				unsigned done = bh->b_size -
-						(bh_max - (pos - first));
-				bh->b_blocknr += done >> blkbits;
-				bh->b_size -= done;
-			}
-
-			hole = rw == READ && !buffer_written(bh);
-			if (hole) {
-				size = bh->b_size - first;
-			} else {
-				dax_unmap_atomic(bdev, &dax);
-				dax.sector = to_sector(bh, inode);
-				dax.size = bh->b_size;
-				map_len = dax_map_atomic(bdev, &dax);
-				if (map_len < 0) {
-					rc = map_len;
-					break;
-				}
-				dax.addr += first;
-				size = map_len - first;
-			}
-			/*
-			 * pos + size is one past the last offset for IO,
-			 * so pos + size can overflow loff_t at extreme offsets.
-			 * Cast to u64 to catch this and get the true minimum.
-			 */
-			max = min_t(u64, pos + size, end);
-		}
-
-		if (iov_iter_rw(iter) == WRITE) {
-			len = copy_from_iter_pmem(dax.addr, max - pos, iter);
-		} else if (!hole)
-			len = copy_to_iter((void __force *) dax.addr, max - pos,
-					iter);
-		else
-			len = iov_iter_zero(max - pos, iter);
-
-		if (!len) {
-			rc = -EFAULT;
-			break;
-		}
-
-		pos += len;
-		if (!IS_ERR(dax.addr))
-			dax.addr += len;
-	}
-
-	dax_unmap_atomic(bdev, &dax);
-
-	return (pos == start) ? rc : pos - start;
-}
-
-/**
- * dax_do_io - Perform I/O to a DAX file
- * @iocb: The control block for this I/O
- * @inode: The file which the I/O is directed at
- * @iter: The addresses to do I/O from or to
- * @get_block: The filesystem method used to translate file offsets to blocks
- * @end_io: A filesystem callback for I/O completion
- * @flags: See below
- *
- * This function uses the same locking scheme as do_blockdev_direct_IO:
- * If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the
- * caller for writes.  For reads, we take and release the i_mutex ourselves.
- * If DIO_LOCKING is not set, the filesystem takes care of its own locking.
- * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
- * is in progress.
- */
-ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
-		  struct iov_iter *iter, get_block_t get_block,
-		  dio_iodone_t end_io, int flags)
-{
-	struct buffer_head bh;
-	ssize_t retval = -EINVAL;
-	loff_t pos = iocb->ki_pos;
-	loff_t end = pos + iov_iter_count(iter);
-
-	memset(&bh, 0, sizeof(bh));
-	bh.b_bdev = inode->i_sb->s_bdev;
-
-	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
-		inode_lock(inode);
-
-	/* Protects against truncate */
-	if (!(flags & DIO_SKIP_DIO_COUNT))
-		inode_dio_begin(inode);
-
-	retval = dax_io(inode, iter, pos, end, get_block, &bh);
-
-	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
-		inode_unlock(inode);
-
-	if (end_io) {
-		int err;
-
-		err = end_io(iocb, pos, retval, bh.b_private);
-		if (err)
-			retval = err;
-	}
-
-	if (!(flags & DIO_SKIP_DIO_COUNT))
-		inode_dio_end(inode);
-	return retval;
-}
-EXPORT_SYMBOL_GPL(dax_do_io);
-
 /*
  * DAX radix tree locking
  */
@@ -919,105 +757,6 @@ static int dax_insert_mapping(struct address_space *mapping,
 	return vm_insert_mixed(vma, vaddr, dax.pfn);
 }
 
-/**
- * dax_fault - handle a page fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * When a page fault occurs, filesystems may call this helper in their
- * fault handler for DAX files. dax_fault() assumes the caller has done all
- * the necessary locking for the page fault to proceed successfully.
- */
-int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-			get_block_t get_block)
-{
-	struct file *file = vma->vm_file;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	void *entry;
-	struct buffer_head bh;
-	unsigned long vaddr = (unsigned long)vmf->virtual_address;
-	unsigned blkbits = inode->i_blkbits;
-	sector_t block;
-	pgoff_t size;
-	int error;
-	int major = 0;
-
-	/*
-	 * Check whether offset isn't beyond end of file now. Caller is supposed
-	 * to hold locks serializing us with truncate / punch hole so this is
-	 * a reliable test.
-	 */
-	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (vmf->pgoff >= size)
-		return VM_FAULT_SIGBUS;
-
-	memset(&bh, 0, sizeof(bh));
-	block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
-	bh.b_bdev = inode->i_sb->s_bdev;
-	bh.b_size = PAGE_SIZE;
-
-	entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
-	if (IS_ERR(entry)) {
-		error = PTR_ERR(entry);
-		goto out;
-	}
-
-	error = get_block(inode, block, &bh, 0);
-	if (!error && (bh.b_size < PAGE_SIZE))
-		error = -EIO;		/* fs corruption? */
-	if (error)
-		goto unlock_entry;
-
-	if (vmf->cow_page) {
-		struct page *new_page = vmf->cow_page;
-		if (buffer_written(&bh))
-			error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
-					bh.b_size, new_page, vaddr);
-		else
-			clear_user_highpage(new_page, vaddr);
-		if (error)
-			goto unlock_entry;
-		if (!radix_tree_exceptional_entry(entry)) {
-			vmf->page = entry;
-			return VM_FAULT_LOCKED;
-		}
-		vmf->entry = entry;
-		return VM_FAULT_DAX_LOCKED;
-	}
-
-	if (!buffer_mapped(&bh)) {
-		if (vmf->flags & FAULT_FLAG_WRITE) {
-			error = get_block(inode, block, &bh, 1);
-			count_vm_event(PGMAJFAULT);
-			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
-			major = VM_FAULT_MAJOR;
-			if (!error && (bh.b_size < PAGE_SIZE))
-				error = -EIO;
-			if (error)
-				goto unlock_entry;
-		} else {
-			return dax_load_hole(mapping, entry, vmf);
-		}
-	}
-
-	/* Filesystem should not return unwritten buffers to us! */
-	WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-	error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
-			bh.b_size, &entry, vma, vmf);
- unlock_entry:
-	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
- out:
-	if (error == -ENOMEM)
-		return VM_FAULT_OOM | major;
-	/* -EBUSY is fine, somebody else faulted on the same PTE */
-	if ((error < 0) && (error != -EBUSY))
-		return VM_FAULT_SIGBUS | major;
-	return VM_FAULT_NOPAGE | major;
-}
-EXPORT_SYMBOL_GPL(dax_fault);
-
 /**
  * dax_pfn_mkwrite - handle first write to DAX page
  * @vma: The virtual memory area where the fault occurred
@@ -1078,60 +817,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(__dax_zero_page_range);
 
-/**
- * dax_zero_page_range - zero a range within a page of a DAX file
- * @inode: The file being truncated
- * @from: The file offset that is being truncated to
- * @length: The number of bytes to zero
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * This function can be called by a filesystem when it is zeroing part of a
- * page in a DAX file.  This is intended for hole-punch operations.  If
- * you are truncating a file, the helper function dax_truncate_page() may be
- * more convenient.
- */
-int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
-							get_block_t get_block)
-{
-	struct buffer_head bh;
-	pgoff_t index = from >> PAGE_SHIFT;
-	unsigned offset = from & (PAGE_SIZE-1);
-	int err;
-
-	/* Block boundary? Nothing to do */
-	if (!length)
-		return 0;
-	if (WARN_ON_ONCE((offset + length) > PAGE_SIZE))
-		return -EINVAL;
-
-	memset(&bh, 0, sizeof(bh));
-	bh.b_bdev = inode->i_sb->s_bdev;
-	bh.b_size = PAGE_SIZE;
-	err = get_block(inode, index, &bh, 0);
-	if (err < 0 || !buffer_written(&bh))
-		return err;
-
-	return __dax_zero_page_range(bh.b_bdev, to_sector(&bh, inode),
-			offset, length);
-}
-EXPORT_SYMBOL_GPL(dax_zero_page_range);
-
-/**
- * dax_truncate_page - handle a partial page being truncated in a DAX file
- * @inode: The file being truncated
- * @from: The file offset that is being truncated to
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * Similar to block_truncate_page(), this function can be called by a
- * filesystem when it is truncating a DAX file to handle the partial page.
- */
-int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
-{
-	unsigned length = PAGE_ALIGN(from) - from;
-	return dax_zero_page_range(inode, from, length, get_block);
-}
-EXPORT_SYMBOL_GPL(dax_truncate_page);
-
 #ifdef CONFIG_FS_IOMAP
 static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
 {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8d1a5c47945f..0afade8bd3d7 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,13 +38,8 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		struct iomap_ops *ops);
-ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
-		  get_block_t, dio_iodone_t, int flags);
-int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
-int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			struct iomap_ops *ops);
-int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		pgoff_t index, void *entry, bool wake_all);
@@ -73,12 +68,6 @@ static inline int __dax_zero_page_range(struct block_device *bdev,
 }
 #endif
 
-static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
-				pmd_t *pmd, unsigned int flags, get_block_t gb)
-{
-	return VM_FAULT_FALLBACK;
-}
-
 #ifdef CONFIG_FS_DAX_PMD
 static inline unsigned int dax_radix_order(void *entry)
 {
@@ -101,7 +90,6 @@ static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
 }
 #endif
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
-#define dax_mkwrite(vma, vmf, gb)	dax_fault(vma, vmf, gb)
 
 static inline bool vma_is_dax(struct vm_area_struct *vma)
 {
-- 
cgit v1.2.3


From 1e8096bb2031c53b6bf3adc7667b4b2bdf2a1ac6 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <Yazen.Ghannam@amd.com>
Date: Thu, 17 Nov 2016 17:57:28 -0500
Subject: EDAC: Add LRDDR4 DRAM type

AMD Fam17h systems can support Load-Reduced DDR4 DIMMs. So add this new
type to edac.h in preparation for the Fam17h EDAC update. Also, let's
fix a format issue with the LRDDR3 line while we're here.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1479423463-8536-3-git-send-email-Yazen.Ghannam@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 include/linux/edac.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 9e0d78966552..bf2bf87bb2f9 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -192,10 +192,11 @@ static inline char *mc_event_error_type(const unsigned int err_type)
  * @MEM_DDR3:		DDR3 RAM
  * @MEM_RDDR3:		Registered DDR3 RAM
  *			This is a variant of the DDR3 memories.
- * @MEM_LRDDR3		Load-Reduced DDR3 memory.
+ * @MEM_LRDDR3:		Load-Reduced DDR3 memory.
  * @MEM_DDR4:		Unbuffered DDR4 RAM
  * @MEM_RDDR4:		Registered DDR4 RAM
  *			This is a variant of the DDR4 memories.
+ * @MEM_LRDDR4:		Load-Reduced DDR4 memory.
  */
 enum mem_type {
 	MEM_EMPTY = 0,
@@ -218,6 +219,7 @@ enum mem_type {
 	MEM_LRDDR3,
 	MEM_DDR4,
 	MEM_RDDR4,
+	MEM_LRDDR4,
 };
 
 #define MEM_FLAG_EMPTY		BIT(MEM_EMPTY)
@@ -239,6 +241,7 @@ enum mem_type {
 #define MEM_FLAG_RDDR3          BIT(MEM_RDDR3)
 #define MEM_FLAG_DDR4           BIT(MEM_DDR4)
 #define MEM_FLAG_RDDR4          BIT(MEM_RDDR4)
+#define MEM_FLAG_LRDDR4         BIT(MEM_LRDDR4)
 
 /**
  * enum edac-type - Error Detection and Correction capabilities and mode
-- 
cgit v1.2.3


From 194a6b5b9cb6b91a5f7d86984165a3bc55188599 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 17 Nov 2016 11:46:38 -0500
Subject: sched/wake_q: Rename WAKE_Q to DEFINE_WAKE_Q

Currently the wake_q data structure is defined by the WAKE_Q() macro.
This macro, however, looks like a function doing something as "wake" is
a verb. Even checkpatch.pl was confused as it reported warnings like

  WARNING: Missing a blank line after declarations
  #548: FILE: kernel/futex.c:3665:
  +	int ret;
  +	WAKE_Q(wake_q);

This patch renames the WAKE_Q() macro to DEFINE_WAKE_Q() which clarifies
what the macro is doing and eliminates the checkpatch.pl warnings.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1479401198-1765-1-git-send-email-longman@redhat.com
[ Resolved conflict and added missing rename. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       |  4 ++--
 ipc/mqueue.c                |  4 ++--
 ipc/msg.c                   |  8 ++++----
 kernel/futex.c              |  8 ++++----
 kernel/locking/mutex.c      |  2 +-
 kernel/locking/rtmutex.c    |  2 +-
 kernel/locking/rwsem-xadd.c | 10 +++++-----
 7 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1aa3b02f6ac..dc37cbe2b13c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -989,7 +989,7 @@ enum cpu_idle_type {
  * already in a wake queue, the wakeup will happen soon and the second
  * waker can just skip it.
  *
- * The WAKE_Q macro declares and initializes the list head.
+ * The DEFINE_WAKE_Q macro declares and initializes the list head.
  * wake_up_q() does NOT reinitialize the list; it's expected to be
  * called near the end of a function, where the fact that the queue is
  * not used again will be easy to see by inspection.
@@ -1009,7 +1009,7 @@ struct wake_q_head {
 
 #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
 
-#define WAKE_Q(name)					\
+#define DEFINE_WAKE_Q(name)				\
 	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
 
 extern void wake_q_add(struct wake_q_head *head,
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 8cbd6e6894d5..7a2d8f0c8ae5 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -967,7 +967,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
 	struct timespec ts;
 	struct posix_msg_tree_node *new_leaf = NULL;
 	int ret = 0;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	if (u_abs_timeout) {
 		int res = prepare_timeout(u_abs_timeout, &expires, &ts);
@@ -1151,7 +1151,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
 			msg_ptr = wait.msg;
 		}
 	} else {
-		WAKE_Q(wake_q);
+		DEFINE_WAKE_Q(wake_q);
 
 		msg_ptr = msg_get(info);
 
diff --git a/ipc/msg.c b/ipc/msg.c
index e12307d0c920..32e9bd837cde 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -235,7 +235,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
 	struct msg_msg *msg, *t;
 	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	expunge_all(msq, -EIDRM, &wake_q);
 	ss_wakeup(msq, &wake_q, true);
@@ -397,7 +397,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 		goto out_up;
 	case IPC_SET:
 	{
-		WAKE_Q(wake_q);
+		DEFINE_WAKE_Q(wake_q);
 
 		if (msqid64.msg_qbytes > ns->msg_ctlmnb &&
 		    !capable(CAP_SYS_RESOURCE)) {
@@ -634,7 +634,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 	struct msg_msg *msg;
 	int err;
 	struct ipc_namespace *ns;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	ns = current->nsproxy->ipc_ns;
 
@@ -850,7 +850,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
 	struct msg_queue *msq;
 	struct ipc_namespace *ns;
 	struct msg_msg *msg, *copy = NULL;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	ns = current->nsproxy->ipc_ns;
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 2c4be467fecd..9246d9f593d1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1298,7 +1298,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
 	struct task_struct *new_owner;
 	struct futex_pi_state *pi_state = this->pi_state;
 	u32 uninitialized_var(curval), newval;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 	bool deboost;
 	int ret = 0;
 
@@ -1415,7 +1415,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 	struct futex_q *this, *next;
 	union futex_key key = FUTEX_KEY_INIT;
 	int ret;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	if (!bitset)
 		return -EINVAL;
@@ -1469,7 +1469,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
 	struct futex_hash_bucket *hb1, *hb2;
 	struct futex_q *this, *next;
 	int ret, op_ret;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 retry:
 	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
@@ -1708,7 +1708,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
 	struct futex_pi_state *pi_state = NULL;
 	struct futex_hash_bucket *hb1, *hb2;
 	struct futex_q *this, *next;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	if (requeue_pi) {
 		/*
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index a65e09a046ac..c0731685603f 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -840,7 +840,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 {
 	struct task_struct *next = NULL;
 	unsigned long owner, flags;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	mutex_release(&lock->dep_map, 1, ip);
 
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 1ec0f48962b3..d8d7a153b711 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1382,7 +1382,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
 		    bool (*slowfn)(struct rt_mutex *lock,
 				   struct wake_q_head *wqh))
 {
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
 		rt_mutex_deadlock_account_unlock(current);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 2fa2e2e64950..263e7449282a 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -225,7 +225,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk = current;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	waiter.task = tsk;
 	waiter.type = RWSEM_WAITING_FOR_READ;
@@ -461,7 +461,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
 	struct rw_semaphore *ret = sem;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	/* undo write bias from down_write operation, stop active locking */
 	count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
@@ -495,7 +495,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 		 * wake any read locks that were queued ahead of us.
 		 */
 		if (count > RWSEM_WAITING_BIAS) {
-			WAKE_Q(wake_q);
+			DEFINE_WAKE_Q(wake_q);
 
 			__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
 			/*
@@ -571,7 +571,7 @@ __visible
 struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	/*
 	 * If a spinner is present, it is not necessary to do the wakeup.
@@ -625,7 +625,7 @@ __visible
 struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-- 
cgit v1.2.3


From d12a969ebbfcfc25853c4147d42b388f758e8784 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <Yazen.Ghannam@amd.com>
Date: Thu, 17 Nov 2016 17:57:32 -0500
Subject: EDAC, amd64: Add Deferred Error type

Currently, deferred errors are classified as correctable in EDAC. Add a
new error type for deferred errors so that they are correctly reported
to the user.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1479423463-8536-7-git-send-email-Yazen.Ghannam@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/amd64_edac.c | 2 ++
 include/linux/edac.h      | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index aaff0b9cdaa8..dd45cff02388 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2165,6 +2165,8 @@ static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
 		err_type = HW_EVENT_ERR_CORRECTED;
 	else if (ecc_type == 1)
 		err_type = HW_EVENT_ERR_UNCORRECTED;
+	else if (ecc_type == 3)
+		err_type = HW_EVENT_ERR_DEFERRED;
 	else {
 		WARN(1, "Something is rotten in the state of Denmark.\n");
 		return;
diff --git a/include/linux/edac.h b/include/linux/edac.h
index bf2bf87bb2f9..cb56dcba68c6 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -134,6 +134,7 @@ enum dev_type {
 enum hw_event_mc_err_type {
 	HW_EVENT_ERR_CORRECTED,
 	HW_EVENT_ERR_UNCORRECTED,
+	HW_EVENT_ERR_DEFERRED,
 	HW_EVENT_ERR_FATAL,
 	HW_EVENT_ERR_INFO,
 };
@@ -145,6 +146,8 @@ static inline char *mc_event_error_type(const unsigned int err_type)
 		return "Corrected";
 	case HW_EVENT_ERR_UNCORRECTED:
 		return "Uncorrected";
+	case HW_EVENT_ERR_DEFERRED:
+		return "Deferred";
 	case HW_EVENT_ERR_FATAL:
 		return "Fatal";
 	default:
-- 
cgit v1.2.3


From 3f89586bc1ce1434b15f78e62b555c0619852295 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.xyz>
Date: Fri, 1 Jul 2016 17:29:23 +0800
Subject: mfd: axp20x: Add adc volatile ranges for axp22x

AXP22x has also some different register map than axp20x, they're also
added here.

Signed-off-by: Icenowy Zheng <icenowy@aosc.xyz>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/axp20x.c       | 1 +
 include/linux/mfd/axp20x.h | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index ba130be32e61..98377d29b783 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -98,6 +98,7 @@ static const struct regmap_range axp22x_volatile_ranges[] = {
 	regmap_reg_range(AXP20X_PWR_INPUT_STATUS, AXP20X_PWR_OP_MODE),
 	regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IRQ5_STATE),
 	regmap_reg_range(AXP22X_GPIO_STATE, AXP22X_GPIO_STATE),
+	regmap_reg_range(AXP22X_PMIC_ADC_H, AXP20X_IPSOUT_V_HIGH_L),
 	regmap_reg_range(AXP20X_FG_RES, AXP20X_FG_RES),
 };
 
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index fec597fb34cb..6349496f09fc 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -226,6 +226,10 @@ enum {
 #define AXP20X_OCV_MAX			0xf
 
 /* AXP22X specific registers */
+#define AXP22X_PMIC_ADC_H		0x56
+#define AXP22X_PMIC_ADC_L		0x57
+#define AXP22X_TS_ADC_H			0x58
+#define AXP22X_TS_ADC_L			0x59
 #define AXP22X_BATLOW_THRES1		0xe6
 
 /* AXP288 specific registers */
-- 
cgit v1.2.3


From 30248feff5e5c6a01ade5e6126009e296ed8bd35 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 18 Nov 2016 13:59:21 +0100
Subject: cpufreq: Make cpufreq_update_policy() void

The return value of cpufreq_update_policy() is never used, so make
it void.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 21 +++++++--------------
 include/linux/cpufreq.h   |  2 +-
 2 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ad3b319486bd..cc475eff90b3 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2257,21 +2257,18 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
  *	Useful for policy notifiers which have different necessities
  *	at different times.
  */
-int cpufreq_update_policy(unsigned int cpu)
+void cpufreq_update_policy(unsigned int cpu)
 {
 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 	struct cpufreq_policy new_policy;
-	int ret;
 
 	if (!policy)
-		return -ENODEV;
+		return;
 
 	down_write(&policy->rwsem);
 
-	if (policy_is_inactive(policy)) {
-		ret = -ENODEV;
+	if (policy_is_inactive(policy))
 		goto unlock;
-	}
 
 	pr_debug("updating policy for CPU %u\n", cpu);
 	memcpy(&new_policy, policy, sizeof(*policy));
@@ -2283,24 +2280,20 @@ int cpufreq_update_policy(unsigned int cpu)
 	 * -> ask driver for current freq and notify governors about a change
 	 */
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
-		if (cpufreq_suspended) {
-			ret = -EAGAIN;
+		if (cpufreq_suspended)
 			goto unlock;
-		}
+
 		new_policy.cur = cpufreq_update_current_freq(policy);
-		if (WARN_ON(!new_policy.cur)) {
-			ret = -EIO;
+		if (WARN_ON(!new_policy.cur))
 			goto unlock;
-		}
 	}
 
-	ret = cpufreq_set_policy(policy, &new_policy);
+	cpufreq_set_policy(policy, &new_policy);
 
 unlock:
 	up_write(&policy->rwsem);
 
 	cpufreq_cpu_put(policy);
-	return ret;
 }
 EXPORT_SYMBOL(cpufreq_update_policy);
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 40dc2e29f480..7e05c5e4e45c 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -175,7 +175,7 @@ void disable_cpufreq(void);
 
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
-int cpufreq_update_policy(unsigned int cpu);
+void cpufreq_update_policy(unsigned int cpu);
 bool have_governor_per_policy(void);
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
 void cpufreq_enable_fast_switch(struct cpufreq_policy *policy);
-- 
cgit v1.2.3


From ed424bb3680b6e6167a44c5c195ec8f9aea3ab3b Mon Sep 17 00:00:00 2001
From: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Date: Fri, 18 Nov 2016 23:00:10 +0530
Subject: hwrng: Make explicit that max >= 32 always

As hw_random core calls ->read with max > 32 or more, make it explicit.
Also remove checks involving 'max' being less than 8.

Signed-off-by: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/msm-rng.c     | 4 ----
 drivers/char/hw_random/pic32-rng.c   | 3 ---
 drivers/char/hw_random/pseries-rng.c | 5 ++---
 include/linux/hw_random.h            | 3 +--
 4 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/hw_random/msm-rng.c b/drivers/char/hw_random/msm-rng.c
index 96fb986402eb..841fee845ec9 100644
--- a/drivers/char/hw_random/msm-rng.c
+++ b/drivers/char/hw_random/msm-rng.c
@@ -90,10 +90,6 @@ static int msm_rng_read(struct hwrng *hwrng, void *data, size_t max, bool wait)
 	/* calculate max size bytes to transfer back to caller */
 	maxsize = min_t(size_t, MAX_HW_FIFO_SIZE, max);
 
-	/* no room for word data */
-	if (maxsize < WORD_SZ)
-		return 0;
-
 	ret = clk_prepare_enable(rng->clk);
 	if (ret)
 		return ret;
diff --git a/drivers/char/hw_random/pic32-rng.c b/drivers/char/hw_random/pic32-rng.c
index 11dc9b7c09ce..9b5e68a71d01 100644
--- a/drivers/char/hw_random/pic32-rng.c
+++ b/drivers/char/hw_random/pic32-rng.c
@@ -62,9 +62,6 @@ static int pic32_rng_read(struct hwrng *rng, void *buf, size_t max,
 	u32 t;
 	unsigned int timeout = RNG_TIMEOUT;
 
-	if (max < 8)
-		return 0;
-
 	do {
 		t = readl(priv->base + RNGRCNT) & RCNT_MASK;
 		if (t == 64) {
diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c
index 63ce51d09af1..d9f46b437cc2 100644
--- a/drivers/char/hw_random/pseries-rng.c
+++ b/drivers/char/hw_random/pseries-rng.c
@@ -28,7 +28,6 @@
 static int pseries_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 {
 	u64 buffer[PLPAR_HCALL_BUFSIZE];
-	size_t size = max < 8 ? max : 8;
 	int rc;
 
 	rc = plpar_hcall(H_RANDOM, (unsigned long *)buffer);
@@ -36,10 +35,10 @@ static int pseries_rng_read(struct hwrng *rng, void *data, size_t max, bool wait
 		pr_err_ratelimited("H_RANDOM call failed %d\n", rc);
 		return -EIO;
 	}
-	memcpy(data, buffer, size);
+	memcpy(data, buffer, 8);
 
 	/* The hypervisor interface returns 64 bits */
-	return size;
+	return 8;
 }
 
 /**
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 34a0dc18f327..bee0827766a3 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -30,8 +30,7 @@
  *			Must not be NULL.    *OBSOLETE*
  * @read:		New API. drivers can fill up to max bytes of data
  *			into the buffer. The buffer is aligned for any type
- *			and max is guaranteed to be >= to that alignment
- *			(either 4 or 8 depending on architecture).
+ *			and max is a multiple of 4 and >= 32 bytes.
  * @priv:		Private data, for use by the RNG driver.
  * @quality:		Estimation of true entropy in RNG's bitstream
  *			(per mill).
-- 
cgit v1.2.3


From 6d67942dd0ebc3dddc86edf9208169d064a9b3d7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 19 Nov 2016 01:45:03 +0100
Subject: bpf: add __must_check attributes to refcount manipulating helpers

Helpers like bpf_prog_add(), bpf_prog_inc(), bpf_map_inc() can fail
with an error, so make sure the caller properly checks their return
value and not just ignores it, which could worst-case lead to use
after free.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 01c1487277b2..69d0a7f12a3b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -233,14 +233,14 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
-struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i);
+struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
 void bpf_prog_sub(struct bpf_prog *prog, int i);
-struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
+struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
-struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref);
+struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
@@ -299,7 +299,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
-static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
+static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog,
+							  int i)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
@@ -311,7 +312,8 @@ static inline void bpf_prog_sub(struct bpf_prog *prog, int i)
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
-static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+
+static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
-- 
cgit v1.2.3


From 93c5bdf7ab71bbdae27f8f51fa175e06f000d69d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 21 Nov 2016 16:18:18 +0100
Subject: block: clear all of bi_opf in bio_set_op_attrs

Since commit 87374179 ("block: add a proper block layer data direction
encoding") we only or the new op and flags into bi_opf in bio_set_op_attrs
instead of clearing the old value.  I've not seen any breakage with the
new behavior, but it seems dangerous.

Also convert it to an inline function to make the argument passing
safer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4d0044d09984..f57458a6a93b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -207,8 +207,11 @@ enum req_flag_bits {
 	((req)->cmd_flags & REQ_OP_MASK)
 
 /* obsolete, don't use in new code */
-#define bio_set_op_attrs(bio, op, op_flags) \
-	((bio)->bi_opf |= (op | op_flags))
+static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
+		unsigned op_flags)
+{
+	bio->bi_opf = op | op_flags;
+}
 
 static inline bool op_is_write(unsigned int op)
 {
-- 
cgit v1.2.3


From 08b98d3291652bdcd1029a059e39fbcae5ad93e2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 17 Nov 2016 03:28:53 +0100
Subject: PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag

Modify the ACPI system sleep support setup code to select
suspend-to-idle as the default system sleep state if the
ACPI_FADT_LOW_POWER_S0 flag is set in the FADT and the
default sleep state was not selected from the kernel command
line.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Mario Limonciello <mario.limonciello@dell.com>
---
 Documentation/power/states.txt | 4 +++-
 drivers/acpi/sleep.c           | 8 ++++++++
 include/linux/suspend.h        | 2 ++
 kernel/power/suspend.c         | 4 ++--
 4 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt
index 008ecb588317..8a39ce45d8a0 100644
--- a/Documentation/power/states.txt
+++ b/Documentation/power/states.txt
@@ -35,7 +35,9 @@ only one way to cause the system to go into the Suspend-To-RAM state (write
 The default suspend mode (ie. the one to be used without writing anything into
 /sys/power/mem_sleep) is either "deep" (if Suspend-To-RAM is supported) or
 "s2idle", but it can be overridden by the value of the "mem_sleep_default"
-parameter in the kernel command line.
+parameter in the kernel command line.  On some ACPI-based systems, depending on
+the information in the FADT, the default may be "s2idle" even if Suspend-To-RAM
+is supported.
 
 The properties of all of the sleep states are described below.
 
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index deb0ff78eba8..ce1855fd584b 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -691,6 +691,14 @@ static void acpi_sleep_suspend_setup(void)
 		if (acpi_sleep_state_supported(i))
 			sleep_states[i] = 1;
 
+	/*
+	 * Use suspend-to-idle by default if ACPI_FADT_LOW_POWER_S0 is set and
+	 * the default suspend mode was not selected from the command line.
+	 */
+	if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0 &&
+	    mem_sleep_default > PM_SUSPEND_MEM)
+		mem_sleep_default = PM_SUSPEND_FREEZE;
+
 	suspend_set_ops(old_suspend_ordering ?
 		&acpi_suspend_ops_old : &acpi_suspend_ops);
 	freeze_set_ops(&acpi_freeze_ops);
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d9718378a8be..0c729c3c8549 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -194,6 +194,8 @@ struct platform_freeze_ops {
 };
 
 #ifdef CONFIG_SUSPEND
+extern suspend_state_t mem_sleep_default;
+
 /**
  * suspend_set_ops - set platform dependent suspend operations
  * @ops: The new suspend operations to set.
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 15e6baef5c73..f67ceb7768b8 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -46,7 +46,7 @@ static const char * const mem_sleep_labels[] = {
 const char *mem_sleep_states[PM_SUSPEND_MAX];
 
 suspend_state_t mem_sleep_current = PM_SUSPEND_FREEZE;
-static suspend_state_t mem_sleep_default = PM_SUSPEND_MEM;
+suspend_state_t mem_sleep_default = PM_SUSPEND_MAX;
 
 unsigned int pm_suspend_global_flags;
 EXPORT_SYMBOL_GPL(pm_suspend_global_flags);
@@ -168,7 +168,7 @@ void suspend_set_ops(const struct platform_suspend_ops *ops)
 	}
 	if (valid_state(PM_SUSPEND_MEM)) {
 		mem_sleep_states[PM_SUSPEND_MEM] = mem_sleep_labels[PM_SUSPEND_MEM];
-		if (mem_sleep_default == PM_SUSPEND_MEM)
+		if (mem_sleep_default >= PM_SUSPEND_MEM)
 			mem_sleep_current = PM_SUSPEND_MEM;
 	}
 
-- 
cgit v1.2.3


From a9c6ce57ec2f136d08141e8220a0ffaca216f7b0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 8 Nov 2016 14:38:46 +0100
Subject: led: core: Use atomic bit-field for the blink-flags

All the LED_BLINK* flags are accessed read-modify-write from e.g.
led_set_brightness and led_blink_set_oneshot while both
set_brightness_work and the blink_timer may be running.

If these race then the modify step done by one of them may be lost,
switch the LED_BLINK* flags to a new atomic work_flags bit-field
to avoid this race.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
---
 drivers/leds/led-class.c |  1 +
 drivers/leds/led-core.c  | 52 +++++++++++++++++++++++++-----------------------
 include/linux/leds.h     | 24 ++++++++++++----------
 3 files changed, 42 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index d946eb166781..326ee6e925a2 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -204,6 +204,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 		dev_warn(parent, "Led %s renamed to %s due to name collision",
 				led_cdev->name, dev_name(led_cdev->dev));
 
+	led_cdev->work_flags = 0;
 #ifdef CONFIG_LEDS_TRIGGERS
 	init_rwsem(&led_cdev->trigger_lock);
 #endif
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 3bce44893021..bd6bb4d44f05 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -53,12 +53,13 @@ static void led_timer_function(unsigned long data)
 
 	if (!led_cdev->blink_delay_on || !led_cdev->blink_delay_off) {
 		led_set_brightness_nosleep(led_cdev, LED_OFF);
-		led_cdev->flags &= ~LED_BLINK_SW;
+		clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
 		return;
 	}
 
-	if (led_cdev->flags & LED_BLINK_ONESHOT_STOP) {
-		led_cdev->flags &=  ~(LED_BLINK_ONESHOT_STOP | LED_BLINK_SW);
+	if (test_and_clear_bit(LED_BLINK_ONESHOT_STOP,
+			       &led_cdev->work_flags)) {
+		clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
 		return;
 	}
 
@@ -73,10 +74,9 @@ static void led_timer_function(unsigned long data)
 		 * Do it only if there is no pending blink brightness
 		 * change, to avoid overwriting the new value.
 		 */
-		if (!(led_cdev->flags & LED_BLINK_BRIGHTNESS_CHANGE))
+		if (!test_and_clear_bit(LED_BLINK_BRIGHTNESS_CHANGE,
+					&led_cdev->work_flags))
 			led_cdev->blink_brightness = brightness;
-		else
-			led_cdev->flags &= ~LED_BLINK_BRIGHTNESS_CHANGE;
 		brightness = LED_OFF;
 		delay = led_cdev->blink_delay_off;
 	}
@@ -87,13 +87,15 @@ static void led_timer_function(unsigned long data)
 	 * the final blink state so that the led is toggled each delay_on +
 	 * delay_off milliseconds in worst case.
 	 */
-	if (led_cdev->flags & LED_BLINK_ONESHOT) {
-		if (led_cdev->flags & LED_BLINK_INVERT) {
+	if (test_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags)) {
+		if (test_bit(LED_BLINK_INVERT, &led_cdev->work_flags)) {
 			if (brightness)
-				led_cdev->flags |= LED_BLINK_ONESHOT_STOP;
+				set_bit(LED_BLINK_ONESHOT_STOP,
+					&led_cdev->work_flags);
 		} else {
 			if (!brightness)
-				led_cdev->flags |= LED_BLINK_ONESHOT_STOP;
+				set_bit(LED_BLINK_ONESHOT_STOP,
+					&led_cdev->work_flags);
 		}
 	}
 
@@ -106,10 +108,9 @@ static void set_brightness_delayed(struct work_struct *ws)
 		container_of(ws, struct led_classdev, set_brightness_work);
 	int ret = 0;
 
-	if (led_cdev->flags & LED_BLINK_DISABLE) {
+	if (test_and_clear_bit(LED_BLINK_DISABLE, &led_cdev->work_flags)) {
 		led_cdev->delayed_set_value = LED_OFF;
 		led_stop_software_blink(led_cdev);
-		led_cdev->flags &= ~LED_BLINK_DISABLE;
 	}
 
 	ret = __led_set_brightness(led_cdev, led_cdev->delayed_set_value);
@@ -152,7 +153,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
 		return;
 	}
 
-	led_cdev->flags |= LED_BLINK_SW;
+	set_bit(LED_BLINK_SW, &led_cdev->work_flags);
 	mod_timer(&led_cdev->blink_timer, jiffies + 1);
 }
 
@@ -161,7 +162,7 @@ static void led_blink_setup(struct led_classdev *led_cdev,
 		     unsigned long *delay_on,
 		     unsigned long *delay_off)
 {
-	if (!(led_cdev->flags & LED_BLINK_ONESHOT) &&
+	if (!test_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags) &&
 	    led_cdev->blink_set &&
 	    !led_cdev->blink_set(led_cdev, delay_on, delay_off))
 		return;
@@ -188,8 +189,8 @@ void led_blink_set(struct led_classdev *led_cdev,
 {
 	del_timer_sync(&led_cdev->blink_timer);
 
-	led_cdev->flags &= ~LED_BLINK_ONESHOT;
-	led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
+	clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
+	clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);
 
 	led_blink_setup(led_cdev, delay_on, delay_off);
 }
@@ -200,17 +201,17 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev,
 			   unsigned long *delay_off,
 			   int invert)
 {
-	if ((led_cdev->flags & LED_BLINK_ONESHOT) &&
+	if (test_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags) &&
 	     timer_pending(&led_cdev->blink_timer))
 		return;
 
-	led_cdev->flags |= LED_BLINK_ONESHOT;
-	led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
+	set_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
+	clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);
 
 	if (invert)
-		led_cdev->flags |= LED_BLINK_INVERT;
+		set_bit(LED_BLINK_INVERT, &led_cdev->work_flags);
 	else
-		led_cdev->flags &= ~LED_BLINK_INVERT;
+		clear_bit(LED_BLINK_INVERT, &led_cdev->work_flags);
 
 	led_blink_setup(led_cdev, delay_on, delay_off);
 }
@@ -221,7 +222,7 @@ void led_stop_software_blink(struct led_classdev *led_cdev)
 	del_timer_sync(&led_cdev->blink_timer);
 	led_cdev->blink_delay_on = 0;
 	led_cdev->blink_delay_off = 0;
-	led_cdev->flags &= ~LED_BLINK_SW;
+	clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
 }
 EXPORT_SYMBOL_GPL(led_stop_software_blink);
 
@@ -232,17 +233,18 @@ void led_set_brightness(struct led_classdev *led_cdev,
 	 * If software blink is active, delay brightness setting
 	 * until the next timer tick.
 	 */
-	if (led_cdev->flags & LED_BLINK_SW) {
+	if (test_bit(LED_BLINK_SW, &led_cdev->work_flags)) {
 		/*
 		 * If we need to disable soft blinking delegate this to the
 		 * work queue task to avoid problems in case we are called
 		 * from hard irq context.
 		 */
 		if (brightness == LED_OFF) {
-			led_cdev->flags |= LED_BLINK_DISABLE;
+			set_bit(LED_BLINK_DISABLE, &led_cdev->work_flags);
 			schedule_work(&led_cdev->set_brightness_work);
 		} else {
-			led_cdev->flags |= LED_BLINK_BRIGHTNESS_CHANGE;
+			set_bit(LED_BLINK_BRIGHTNESS_CHANGE,
+				&led_cdev->work_flags);
 			led_cdev->blink_brightness = brightness;
 		}
 		return;
diff --git a/include/linux/leds.h b/include/linux/leds.h
index ddfcb2df3656..21c598b366f8 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -42,16 +42,20 @@ struct led_classdev {
 #define LED_UNREGISTERING	(1 << 1)
 	/* Upper 16 bits reflect control information */
 #define LED_CORE_SUSPENDRESUME	(1 << 16)
-#define LED_BLINK_SW		(1 << 17)
-#define LED_BLINK_ONESHOT	(1 << 18)
-#define LED_BLINK_ONESHOT_STOP	(1 << 19)
-#define LED_BLINK_INVERT	(1 << 20)
-#define LED_BLINK_BRIGHTNESS_CHANGE (1 << 21)
-#define LED_BLINK_DISABLE	(1 << 22)
-#define LED_SYSFS_DISABLE	(1 << 23)
-#define LED_DEV_CAP_FLASH	(1 << 24)
-#define LED_HW_PLUGGABLE	(1 << 25)
-#define LED_PANIC_INDICATOR	(1 << 26)
+#define LED_SYSFS_DISABLE	(1 << 17)
+#define LED_DEV_CAP_FLASH	(1 << 18)
+#define LED_HW_PLUGGABLE	(1 << 19)
+#define LED_PANIC_INDICATOR	(1 << 20)
+
+	/* set_brightness_work / blink_timer flags, atomic, private. */
+	unsigned long		work_flags;
+
+#define LED_BLINK_SW			0
+#define LED_BLINK_ONESHOT		1
+#define LED_BLINK_ONESHOT_STOP		2
+#define LED_BLINK_INVERT		3
+#define LED_BLINK_BRIGHTNESS_CHANGE 	4
+#define LED_BLINK_DISABLE		5
 
 	/* Set LED brightness level
 	 * Must not sleep. Use brightness_set_blocking for drivers
-- 
cgit v1.2.3


From eb1610b4c273370f491c5e194e5a56e3470d81e8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 23 Oct 2016 21:47:26 +0200
Subject: led: core: Fix blink_brightness setting race

All 3 of led_timer_func, led_set_brightness and led_set_software_blink
set blink_brightness. If led_timer_func or led_set_software_blink race
with led_set_brightness they may end up overwriting the new
blink_brightness. The new atomic work_flags does not protect against
this as it just protects the flags and not blink_brightness.

This commit introduces a new new_blink_brightness value which gets
set by led_set_brightness and read by led_timer_func on LED on, fixing
this.

Dealing with the new brightness at LED on time, makes the new
brightness apply sooner, which also fixes a led_set_brightness which
happens while a oneshot blink which ends in LED on is running not
getting applied.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
---
 drivers/leds/led-core.c | 14 +++++++-------
 include/linux/leds.h    |  1 +
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index bd6bb4d44f05..ef1360445413 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -66,17 +66,17 @@ static void led_timer_function(unsigned long data)
 	brightness = led_get_brightness(led_cdev);
 	if (!brightness) {
 		/* Time to switch the LED on. */
-		brightness = led_cdev->blink_brightness;
+		if (test_and_clear_bit(LED_BLINK_BRIGHTNESS_CHANGE,
+					&led_cdev->work_flags))
+			brightness = led_cdev->new_blink_brightness;
+		else
+			brightness = led_cdev->blink_brightness;
 		delay = led_cdev->blink_delay_on;
 	} else {
 		/* Store the current brightness value to be able
 		 * to restore it when the delay_off period is over.
-		 * Do it only if there is no pending blink brightness
-		 * change, to avoid overwriting the new value.
 		 */
-		if (!test_and_clear_bit(LED_BLINK_BRIGHTNESS_CHANGE,
-					&led_cdev->work_flags))
-			led_cdev->blink_brightness = brightness;
+		led_cdev->blink_brightness = brightness;
 		brightness = LED_OFF;
 		delay = led_cdev->blink_delay_off;
 	}
@@ -245,7 +245,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
 		} else {
 			set_bit(LED_BLINK_BRIGHTNESS_CHANGE,
 				&led_cdev->work_flags);
-			led_cdev->blink_brightness = brightness;
+			led_cdev->new_blink_brightness = brightness;
 		}
 		return;
 	}
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 21c598b366f8..569cb531094c 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -93,6 +93,7 @@ struct led_classdev {
 	unsigned long		 blink_delay_on, blink_delay_off;
 	struct timer_list	 blink_timer;
 	int			 blink_brightness;
+	int			 new_blink_brightness;
 	void			(*flash_resume)(struct led_classdev *led_cdev);
 
 	struct work_struct	set_brightness_work;
-- 
cgit v1.2.3


From d9345c65eb7930ac6755cf593ee7686f4029ccf4 Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Date: Wed, 2 Nov 2016 05:08:28 -0400
Subject: sched/core: Introduce the vcpu_is_preempted(cpu) interface

This patch is the first step to add support to improve lock holder
preemption beaviour.

vcpu_is_preempted(cpu) does the obvious thing: it tells us whether a
vCPU is preempted or not.

Defaults to false on architectures that don't support it.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ Translated the changelog to English. ]
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: David.Laight@ACULAB.COM
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: benh@kernel.crashing.org
Cc: boqun.feng@gmail.com
Cc: bsingharora@gmail.com
Cc: dave@stgolabs.net
Cc: kernellwp@gmail.com
Cc: konrad.wilk@oracle.com
Cc: linuxppc-dev@lists.ozlabs.org
Cc: mpe@ellerman.id.au
Cc: paulmck@linux.vnet.ibm.com
Cc: paulus@samba.org
Cc: rkrcmar@redhat.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: xen-devel-request@lists.xenproject.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1478077718-37424-2-git-send-email-xinhui.pan@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc37cbe2b13c..37261afbf16a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3510,6 +3510,18 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
+/*
+ * In order to reduce various lock holder preemption latencies provide an
+ * interface to see if a vCPU is currently running or not.
+ *
+ * This allows us to terminate optimistic spin loops and block, analogous to
+ * the native optimistic spin heuristic of testing if the lock owner task is
+ * running or not.
+ */
+#ifndef vcpu_is_preempted
+# define vcpu_is_preempted(cpu)	false
+#endif
+
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-- 
cgit v1.2.3


From 4ec6e863625625a54f527464ab91ce1a1cb16c42 Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Date: Wed, 2 Nov 2016 05:08:34 -0400
Subject: kvm: Introduce kvm_write_guest_offset_cached()

It allows us to update some status or field of a structure partially.

We can also save a kvm_read_guest_cached() call if we just update one
fild of the struct regardless of its current value.

Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: David.Laight@ACULAB.COM
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: benh@kernel.crashing.org
Cc: boqun.feng@gmail.com
Cc: borntraeger@de.ibm.com
Cc: bsingharora@gmail.com
Cc: dave@stgolabs.net
Cc: jgross@suse.com
Cc: kernellwp@gmail.com
Cc: konrad.wilk@oracle.com
Cc: linuxppc-dev@lists.ozlabs.org
Cc: mpe@ellerman.id.au
Cc: paulmck@linux.vnet.ibm.com
Cc: paulus@samba.org
Cc: rkrcmar@redhat.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: xen-devel-request@lists.xenproject.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1478077718-37424-8-git-send-email-xinhui.pan@linux.vnet.ibm.com
[ Typo fixes. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 20 ++++++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01c0b9cc3915..6f0023797b33 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -645,6 +645,8 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
 		    unsigned long len);
 int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 			   void *data, unsigned long len);
+int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+			   void *data, int offset, unsigned long len);
 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 			      gpa_t gpa, unsigned long len);
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5c360347a1e9..2f38ce55016f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1972,30 +1972,38 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 }
 EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
 
-int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-			   void *data, unsigned long len)
+int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+			   void *data, int offset, unsigned long len)
 {
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	int r;
+	gpa_t gpa = ghc->gpa + offset;
 
-	BUG_ON(len > ghc->len);
+	BUG_ON(len + offset > ghc->len);
 
 	if (slots->generation != ghc->generation)
 		kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
 
 	if (unlikely(!ghc->memslot))
-		return kvm_write_guest(kvm, ghc->gpa, data, len);
+		return kvm_write_guest(kvm, gpa, data, len);
 
 	if (kvm_is_error_hva(ghc->hva))
 		return -EFAULT;
 
-	r = __copy_to_user((void __user *)ghc->hva, data, len);
+	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
 	if (r)
 		return -EFAULT;
-	mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+	mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached);
+
+int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+			   void *data, unsigned long len)
+{
+	return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len);
+}
 EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
 
 int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-- 
cgit v1.2.3


From 3a83f4677539bce8eaa2bca9ee9c20e172d7ab04 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Tue, 22 Nov 2016 08:57:21 -0700
Subject: block: bio: pass bvec table to bio_init()

Some drivers often use external bvec table, so introduce
this helper for this case. It is always safe to access the
bio->bi_io_vec in this way for this case.

After converting to this usage, it will becomes a bit easier
to evaluate the remaining direct access to bio->bi_io_vec,
so it can help to prepare for the following multipage bvec
support.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>

Fixed up the new O_DIRECT cases.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c                   |  8 ++++++--
 drivers/block/floppy.c        |  3 +--
 drivers/md/bcache/io.c        |  4 +---
 drivers/md/bcache/journal.c   |  4 +---
 drivers/md/bcache/movinggc.c  |  6 ++----
 drivers/md/bcache/request.c   |  2 +-
 drivers/md/bcache/super.c     | 12 +++---------
 drivers/md/bcache/writeback.c |  5 ++---
 drivers/md/dm-bufio.c         |  4 +---
 drivers/md/dm.c               |  2 +-
 drivers/md/multipath.c        |  2 +-
 drivers/md/raid5-cache.c      |  2 +-
 drivers/md/raid5.c            |  9 ++-------
 drivers/nvme/target/io-cmd.c  |  4 +---
 fs/block_dev.c                |  4 +---
 fs/logfs/dev_bdev.c           |  4 +---
 include/linux/bio.h           |  3 ++-
 17 files changed, 28 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 2cf6ebabc68c..de257ced69b1 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -270,11 +270,15 @@ static void bio_free(struct bio *bio)
 	}
 }
 
-void bio_init(struct bio *bio)
+void bio_init(struct bio *bio, struct bio_vec *table,
+	      unsigned short max_vecs)
 {
 	memset(bio, 0, sizeof(*bio));
 	atomic_set(&bio->__bi_remaining, 1);
 	atomic_set(&bio->__bi_cnt, 1);
+
+	bio->bi_io_vec = table;
+	bio->bi_max_vecs = max_vecs;
 }
 EXPORT_SYMBOL(bio_init);
 
@@ -480,7 +484,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		return NULL;
 
 	bio = p + front_pad;
-	bio_init(bio);
+	bio_init(bio, NULL, 0);
 
 	if (nr_iovecs > inline_vecs) {
 		unsigned long idx = 0;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index e3d8e4ced4a2..6a3ff2b2e3ae 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3806,8 +3806,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
 
 	cbdata.drive = drive;
 
-	bio_init(&bio);
-	bio.bi_io_vec = &bio_vec;
+	bio_init(&bio, &bio_vec, 1);
 	bio_vec.bv_page = page;
 	bio_vec.bv_len = size;
 	bio_vec.bv_offset = 0;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index e97b0acf7b8d..db45a88c0ce9 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -24,9 +24,7 @@ struct bio *bch_bbio_alloc(struct cache_set *c)
 	struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO);
 	struct bio *bio = &b->bio;
 
-	bio_init(bio);
-	bio->bi_max_vecs	 = bucket_pages(c);
-	bio->bi_io_vec		 = bio->bi_inline_vecs;
+	bio_init(bio, bio->bi_inline_vecs, bucket_pages(c));
 
 	return bio;
 }
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 6925023e12d4..1198e53d5670 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -448,13 +448,11 @@ static void do_journal_discard(struct cache *ca)
 
 		atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
 
-		bio_init(bio);
+		bio_init(bio, bio->bi_inline_vecs, 1);
 		bio_set_op_attrs(bio, REQ_OP_DISCARD, 0);
 		bio->bi_iter.bi_sector	= bucket_to_sector(ca->set,
 						ca->sb.d[ja->discard_idx]);
 		bio->bi_bdev		= ca->bdev;
-		bio->bi_max_vecs	= 1;
-		bio->bi_io_vec		= bio->bi_inline_vecs;
 		bio->bi_iter.bi_size	= bucket_bytes(ca);
 		bio->bi_end_io		= journal_discard_endio;
 
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 5c4bddecfaf0..13b8a907006d 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -77,15 +77,13 @@ static void moving_init(struct moving_io *io)
 {
 	struct bio *bio = &io->bio.bio;
 
-	bio_init(bio);
+	bio_init(bio, bio->bi_inline_vecs,
+		 DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS));
 	bio_get(bio);
 	bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
 
 	bio->bi_iter.bi_size	= KEY_SIZE(&io->w->key) << 9;
-	bio->bi_max_vecs	= DIV_ROUND_UP(KEY_SIZE(&io->w->key),
-					       PAGE_SECTORS);
 	bio->bi_private		= &io->cl;
-	bio->bi_io_vec		= bio->bi_inline_vecs;
 	bch_bio_map(bio, NULL);
 }
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 0d99b5f4b3e6..f49c5417527d 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -623,7 +623,7 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio)
 {
 	struct bio *bio = &s->bio.bio;
 
-	bio_init(bio);
+	bio_init(bio, NULL, 0);
 	__bio_clone_fast(bio, orig_bio);
 	bio->bi_end_io		= request_endio;
 	bio->bi_private		= &s->cl;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 988edf928466..2fb5bfeb43e2 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1152,9 +1152,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
 	dc->bdev = bdev;
 	dc->bdev->bd_holder = dc;
 
-	bio_init(&dc->sb_bio);
-	dc->sb_bio.bi_max_vecs	= 1;
-	dc->sb_bio.bi_io_vec	= dc->sb_bio.bi_inline_vecs;
+	bio_init(&dc->sb_bio, dc->sb_bio.bi_inline_vecs, 1);
 	dc->sb_bio.bi_io_vec[0].bv_page = sb_page;
 	get_page(sb_page);
 
@@ -1814,9 +1812,7 @@ static int cache_alloc(struct cache *ca)
 	__module_get(THIS_MODULE);
 	kobject_init(&ca->kobj, &bch_cache_ktype);
 
-	bio_init(&ca->journal.bio);
-	ca->journal.bio.bi_max_vecs = 8;
-	ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs;
+	bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
 
 	free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
 
@@ -1852,9 +1848,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
 	ca->bdev = bdev;
 	ca->bdev->bd_holder = ca;
 
-	bio_init(&ca->sb_bio);
-	ca->sb_bio.bi_max_vecs	= 1;
-	ca->sb_bio.bi_io_vec	= ca->sb_bio.bi_inline_vecs;
+	bio_init(&ca->sb_bio, ca->sb_bio.bi_inline_vecs, 1);
 	ca->sb_bio.bi_io_vec[0].bv_page = sb_page;
 	get_page(sb_page);
 
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index e51644e503a5..69e1ae59cab8 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -106,14 +106,13 @@ static void dirty_init(struct keybuf_key *w)
 	struct dirty_io *io = w->private;
 	struct bio *bio = &io->bio;
 
-	bio_init(bio);
+	bio_init(bio, bio->bi_inline_vecs,
+		 DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS));
 	if (!io->dc->writeback_percent)
 		bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
 
 	bio->bi_iter.bi_size	= KEY_SIZE(&w->key) << 9;
-	bio->bi_max_vecs	= DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS);
 	bio->bi_private		= w;
-	bio->bi_io_vec		= bio->bi_inline_vecs;
 	bch_bio_map(bio, NULL);
 }
 
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index b3ba142e59a4..262e75365cc0 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -611,9 +611,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
 	char *ptr;
 	int len;
 
-	bio_init(&b->bio);
-	b->bio.bi_io_vec = b->bio_vec;
-	b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
+	bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
 	b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits;
 	b->bio.bi_bdev = b->c->bdev;
 	b->bio.bi_end_io = inline_endio;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b2abfa41af3e..7915467d3726 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1525,7 +1525,7 @@ static struct mapped_device *alloc_dev(int minor)
 	if (!md->bdev)
 		goto bad;
 
-	bio_init(&md->flush_bio);
+	bio_init(&md->flush_bio, NULL, 0);
 	md->flush_bio.bi_bdev = md->bdev;
 	md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 673efbd6fc47..4da06d813b8f 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -130,7 +130,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
 	}
 	multipath = conf->multipaths + mp_bh->path;
 
-	bio_init(&mp_bh->bio);
+	bio_init(&mp_bh->bio, NULL, 0);
 	__bio_clone_fast(&mp_bh->bio, bio);
 
 	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 28d015c6fffe..25e9622d7d80 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1201,7 +1201,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	INIT_LIST_HEAD(&log->io_end_ios);
 	INIT_LIST_HEAD(&log->flushing_ios);
 	INIT_LIST_HEAD(&log->finished_ios);
-	bio_init(&log->flush_bio);
+	bio_init(&log->flush_bio, NULL, 0);
 
 	log->io_kc = KMEM_CACHE(r5l_io_unit, 0);
 	if (!log->io_kc)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 70acdd379e44..5f9e28443c8a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2004,13 +2004,8 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 		for (i = 0; i < disks; i++) {
 			struct r5dev *dev = &sh->dev[i];
 
-			bio_init(&dev->req);
-			dev->req.bi_io_vec = &dev->vec;
-			dev->req.bi_max_vecs = 1;
-
-			bio_init(&dev->rreq);
-			dev->rreq.bi_io_vec = &dev->rvec;
-			dev->rreq.bi_max_vecs = 1;
+			bio_init(&dev->req, &dev->vec, 1);
+			bio_init(&dev->rreq, &dev->rvec, 1);
 		}
 	}
 	return sh;
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index ef52b1e70144..c4dc9ea8ade0 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -37,9 +37,7 @@ static void nvmet_inline_bio_init(struct nvmet_req *req)
 {
 	struct bio *bio = &req->inline_bio;
 
-	bio_init(bio);
-	bio->bi_max_vecs = NVMET_MAX_INLINE_BIOVEC;
-	bio->bi_io_vec = req->inline_bvec;
+	bio_init(bio, req->inline_bvec, NVMET_MAX_INLINE_BIOVEC);
 }
 
 static void nvmet_execute_rw(struct nvmet_req *req)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b0c790a19db9..7022ddc55b12 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -222,9 +222,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 			return -ENOMEM;
 	}
 
-	bio_init(&bio);
-	bio.bi_max_vecs = nr_pages;
-	bio.bi_io_vec = vecs;
+	bio_init(&bio, vecs, nr_pages);
 	bio.bi_bdev = bdev;
 	bio.bi_iter.bi_sector = pos >> 9;
 	bio.bi_private = current;
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index a8329cc47dec..dc8cafeee038 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -19,9 +19,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int op)
 	struct bio bio;
 	struct bio_vec bio_vec;
 
-	bio_init(&bio);
-	bio.bi_max_vecs = 1;
-	bio.bi_io_vec = &bio_vec;
+	bio_init(&bio, &bio_vec, 1);
 	bio_vec.bv_page = page;
 	bio_vec.bv_len = PAGE_SIZE;
 	bio_vec.bv_offset = 0;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d367cd37a7f7..70a7244f08a7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -420,7 +420,8 @@ extern int bio_phys_segments(struct request_queue *, struct bio *);
 extern int submit_bio_wait(struct bio *bio);
 extern void bio_advance(struct bio *, unsigned);
 
-extern void bio_init(struct bio *);
+extern void bio_init(struct bio *bio, struct bio_vec *table,
+		     unsigned short max_vecs);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
 
-- 
cgit v1.2.3


From ae0f5499511e5b1723792c848e44d661d0d4e22f Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Tue, 15 Nov 2016 01:36:18 -0500
Subject: kvm: x86: don't print warning messages for unimplemented msrs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change unimplemented msrs messages to use pr_debug.
If CONFIG_DYNAMIC_DEBUG is set, then these messages can be
enabled at run time or else -DDEBUG can be used at compile
time to enable them. These messages will still be printed if
ignore_msrs=1.

Signed-off-by: Bandan Das <bsd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/mmu.c       | 2 +-
 arch/x86/kvm/x86.c       | 5 +++--
 include/linux/kvm_host.h | 6 ++++++
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1d2f350dcf6c..2a5ccec8b2a8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4959,7 +4959,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
 	 * zap all shadow pages.
 	 */
 	if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) {
-		printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n");
+		kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
 		kvm_mmu_invalidate_zap_all_pages(kvm);
 	}
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 792980f6e123..6f9c9ad13f88 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2277,7 +2277,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (kvm_pmu_is_valid_msr(vcpu, msr))
 			return kvm_pmu_set_msr(vcpu, msr_info);
 		if (!ignore_msrs) {
-			vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
+			vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
 				    msr, data);
 			return 1;
 		} else {
@@ -2489,7 +2489,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
 			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
 		if (!ignore_msrs) {
-			vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
+			vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
+					       msr_info->index);
 			return 1;
 		} else {
 			vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01c0b9cc3915..274bf343cbd0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -439,6 +439,9 @@ struct kvm {
 	pr_info("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
 #define kvm_debug(fmt, ...) \
 	pr_debug("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
+#define kvm_debug_ratelimited(fmt, ...) \
+	pr_debug_ratelimited("kvm [%i]: " fmt, task_pid_nr(current), \
+			     ## __VA_ARGS__)
 #define kvm_pr_unimpl(fmt, ...) \
 	pr_err_ratelimited("kvm [%i]: " fmt, \
 			   task_tgid_nr(current), ## __VA_ARGS__)
@@ -450,6 +453,9 @@ struct kvm {
 
 #define vcpu_debug(vcpu, fmt, ...)					\
 	kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
+#define vcpu_debug_ratelimited(vcpu, fmt, ...)				\
+	kvm_debug_ratelimited("vcpu%i " fmt, (vcpu)->vcpu_id,           \
+			      ## __VA_ARGS__)
 #define vcpu_err(vcpu, fmt, ...)					\
 	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
-- 
cgit v1.2.3


From bfedb589252c01fa505ac9f6f2a3d5d68d707ef4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Oct 2016 21:23:16 -0500
Subject: mm: Add a user_ns owner to mm_struct and fix ptrace permission checks

During exec dumpable is cleared if the file that is being executed is
not readable by the user executing the file.  A bug in
ptrace_may_access allows reading the file if the executable happens to
enter into a subordinate user namespace (aka clone(CLONE_NEWUSER),
unshare(CLONE_NEWUSER), or setns(fd, CLONE_NEWUSER).

This problem is fixed with only necessary userspace breakage by adding
a user namespace owner to mm_struct, captured at the time of exec, so
it is clear in which user namespace CAP_SYS_PTRACE must be present in
to be able to safely give read permission to the executable.

The function ptrace_may_access is modified to verify that the ptracer
has CAP_SYS_ADMIN in task->mm->user_ns instead of task->cred->user_ns.
This ensures that if the task changes it's cred into a subordinate
user namespace it does not become ptraceable.

The function ptrace_attach is modified to only set PT_PTRACE_CAP when
CAP_SYS_PTRACE is held over task->mm->user_ns.  The intent of
PT_PTRACE_CAP is to be a flag to note that whatever permission changes
the task might go through the tracer has sufficient permissions for
it not to be an issue.  task->cred->user_ns is always the same
as or descendent of mm->user_ns.  Which guarantees that having
CAP_SYS_PTRACE over mm->user_ns is the worst case for the tasks
credentials.

To prevent regressions mm->dumpable and mm->user_ns are not considered
when a task has no mm.  As simply failing ptrace_may_attach causes
regressions in privileged applications attempting to read things
such as /proc/<pid>/stat

Cc: stable@vger.kernel.org
Acked-by: Kees Cook <keescook@chromium.org>
Tested-by: Cyrill Gorcunov <gorcunov@openvz.org>
Fixes: 8409cca70561 ("userns: allow ptrace from non-init user namespaces")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/mm_types.h |  1 +
 kernel/fork.c            |  9 ++++++---
 kernel/ptrace.c          | 26 +++++++++++---------------
 mm/init-mm.c             |  2 ++
 4 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4a8acedf4b7d..08d947fc4c59 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -473,6 +473,7 @@ struct mm_struct {
 	 */
 	struct task_struct __rcu *owner;
 #endif
+	struct user_namespace *user_ns;
 
 	/* store ref to file /proc/<pid>/exe symlink points to */
 	struct file __rcu *exe_file;
diff --git a/kernel/fork.c b/kernel/fork.c
index 997ac1d584f7..ba8a01564985 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -745,7 +745,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 #endif
 }
 
-static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
+static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
+	struct user_namespace *user_ns)
 {
 	mm->mmap = NULL;
 	mm->mm_rb = RB_ROOT;
@@ -785,6 +786,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 	if (init_new_context(p, mm))
 		goto fail_nocontext;
 
+	mm->user_ns = get_user_ns(user_ns);
 	return mm;
 
 fail_nocontext:
@@ -830,7 +832,7 @@ struct mm_struct *mm_alloc(void)
 		return NULL;
 
 	memset(mm, 0, sizeof(*mm));
-	return mm_init(mm, current);
+	return mm_init(mm, current, current_user_ns());
 }
 
 /*
@@ -845,6 +847,7 @@ void __mmdrop(struct mm_struct *mm)
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
 	check_mm(mm);
+	put_user_ns(mm->user_ns);
 	free_mm(mm);
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1126,7 +1129,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
 
 	memcpy(mm, oldmm, sizeof(*mm));
 
-	if (!mm_init(mm, tsk))
+	if (!mm_init(mm, tsk, mm->user_ns))
 		goto fail_nomem;
 
 	err = dup_mmap(mm, oldmm);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e6474f7272ec..282821557183 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -220,7 +220,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
 static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
 	const struct cred *cred = current_cred(), *tcred;
-	int dumpable = 0;
+	struct mm_struct *mm;
 	kuid_t caller_uid;
 	kgid_t caller_gid;
 
@@ -271,16 +271,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	return -EPERM;
 ok:
 	rcu_read_unlock();
-	smp_rmb();
-	if (task->mm)
-		dumpable = get_dumpable(task->mm);
-	rcu_read_lock();
-	if (dumpable != SUID_DUMP_USER &&
-	    !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
-		rcu_read_unlock();
-		return -EPERM;
-	}
-	rcu_read_unlock();
+	mm = task->mm;
+	if (mm &&
+	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
+	     !ptrace_has_cap(mm->user_ns, mode)))
+	    return -EPERM;
 
 	return security_ptrace_access_check(task, mode);
 }
@@ -331,6 +326,11 @@ static int ptrace_attach(struct task_struct *task, long request,
 
 	task_lock(task);
 	retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
+	if (!retval) {
+		struct mm_struct *mm = task->mm;
+		if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE))
+			flags |= PT_PTRACE_CAP;
+	}
 	task_unlock(task);
 	if (retval)
 		goto unlock_creds;
@@ -344,10 +344,6 @@ static int ptrace_attach(struct task_struct *task, long request,
 
 	if (seize)
 		flags |= PT_SEIZED;
-	rcu_read_lock();
-	if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
-		flags |= PT_PTRACE_CAP;
-	rcu_read_unlock();
 	task->ptrace = flags;
 
 	__ptrace_link(task, current);
diff --git a/mm/init-mm.c b/mm/init-mm.c
index a56a851908d2..975e49f00f34 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -6,6 +6,7 @@
 #include <linux/cpumask.h>
 
 #include <linux/atomic.h>
+#include <linux/user_namespace.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 
@@ -21,5 +22,6 @@ struct mm_struct init_mm = {
 	.mmap_sem	= __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
+	.user_ns	= &init_user_ns,
 	INIT_MM_CONTEXT(init_mm)
 };
-- 
cgit v1.2.3


From 64b875f7ac8a5d60a4e191479299e931ee949b67 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 14 Nov 2016 18:48:07 -0600
Subject: ptrace: Capture the ptracer's creds not PT_PTRACE_CAP

When the flag PT_PTRACE_CAP was added the PTRACE_TRACEME path was
overlooked.  This can result in incorrect behavior when an application
like strace traces an exec of a setuid executable.

Further PT_PTRACE_CAP does not have enough information for making good
security decisions as it does not report which user namespace the
capability is in.  This has already allowed one mistake through
insufficient granulariy.

I found this issue when I was testing another corner case of exec and
discovered that I could not get strace to set PT_PTRACE_CAP even when
running strace as root with a full set of caps.

This change fixes the above issue with strace allowing stracing as
root a setuid executable without disabling setuid.  More fundamentaly
this change allows what is allowable at all times, by using the correct
information in it's decision.

Cc: stable@vger.kernel.org
Fixes: 4214e42f96d4 ("v2.4.9.11 -> v2.4.9.12")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c                  |  2 +-
 include/linux/capability.h |  1 +
 include/linux/ptrace.h     |  1 -
 include/linux/sched.h      |  1 +
 kernel/capability.c        | 20 ++++++++++++++++++++
 kernel/ptrace.c            | 12 +++++++-----
 6 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 4e497b9ee71e..3cf2cfced97a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1406,7 +1406,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
 	unsigned n_fs;
 
 	if (p->ptrace) {
-		if (p->ptrace & PT_PTRACE_CAP)
+		if (ptracer_capable(p, current_user_ns()))
 			bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
 		else
 			bprm->unsafe |= LSM_UNSAFE_PTRACE;
diff --git a/include/linux/capability.h b/include/linux/capability.h
index dbc21c719ce6..d6088e2a7668 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -242,6 +242,7 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap)
 #endif /* CONFIG_MULTIUSER */
 extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
+extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
 
 /* audit system wants to get cap info from files as well */
 extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 504c98a278d4..e13bfdf7f314 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -19,7 +19,6 @@
 #define PT_SEIZED	0x00010000	/* SEIZE used, enable new behavior */
 #define PT_PTRACED	0x00000001
 #define PT_DTRACE	0x00000002	/* delayed trace (used on m68k, i386) */
-#define PT_PTRACE_CAP	0x00000004	/* ptracer can follow suid-exec */
 
 #define PT_OPT_FLAG_SHIFT	3
 /* PT_TRACE_* event enable flags */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b0ec92..e9f693598e15 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1656,6 +1656,7 @@ struct task_struct {
 	struct list_head cpu_timers[3];
 
 /* process credentials */
+	const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
 	const struct cred __rcu *real_cred; /* objective and real subjective task
 					 * credentials (COW) */
 	const struct cred __rcu *cred;	/* effective (overridable) subjective task
diff --git a/kernel/capability.c b/kernel/capability.c
index 00411c82dac5..dfa0e4528b0b 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -473,3 +473,23 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
 		kgid_has_mapping(ns, inode->i_gid);
 }
 EXPORT_SYMBOL(capable_wrt_inode_uidgid);
+
+/**
+ * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace
+ * @tsk: The task that may be ptraced
+ * @ns: The user namespace to search for CAP_SYS_PTRACE in
+ *
+ * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE
+ * in the specified user namespace.
+ */
+bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
+{
+	int ret = 0;  /* An absent tracer adds no restrictions */
+	const struct cred *cred;
+	rcu_read_lock();
+	cred = rcu_dereference(tsk->ptracer_cred);
+	if (cred)
+		ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
+	rcu_read_unlock();
+	return (ret == 0);
+}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 282821557183..e82c15cadd6d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -39,6 +39,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 	BUG_ON(!list_empty(&child->ptrace_entry));
 	list_add(&child->ptrace_entry, &new_parent->ptraced);
 	child->parent = new_parent;
+	rcu_read_lock();
+	child->ptracer_cred = get_cred(__task_cred(new_parent));
+	rcu_read_unlock();
 }
 
 /**
@@ -71,12 +74,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
  */
 void __ptrace_unlink(struct task_struct *child)
 {
+	const struct cred *old_cred;
 	BUG_ON(!child->ptrace);
 
 	clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 
 	child->parent = child->real_parent;
 	list_del_init(&child->ptrace_entry);
+	old_cred = child->ptracer_cred;
+	child->ptracer_cred = NULL;
+	put_cred(old_cred);
 
 	spin_lock(&child->sighand->siglock);
 	child->ptrace = 0;
@@ -326,11 +333,6 @@ static int ptrace_attach(struct task_struct *task, long request,
 
 	task_lock(task);
 	retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
-	if (!retval) {
-		struct mm_struct *mm = task->mm;
-		if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE))
-			flags |= PT_PTRACE_CAP;
-	}
 	task_unlock(task);
 	if (retval)
 		goto unlock_creds;
-- 
cgit v1.2.3


From 84d77d3f06e7e8dea057d10e8ec77ad71f721be3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 22 Nov 2016 12:06:50 -0600
Subject: ptrace: Don't allow accessing an undumpable mm

It is the reasonable expectation that if an executable file is not
readable there will be no way for a user without special privileges to
read the file.  This is enforced in ptrace_attach but if ptrace
is already attached before exec there is no enforcement for read-only
executables.

As the only way to read such an mm is through access_process_vm
spin a variant called ptrace_access_vm that will fail if the
target process is not being ptraced by the current process, or
the current process did not have sufficient privileges when ptracing
began to read the target processes mm.

In the ptrace implementations replace access_process_vm by
ptrace_access_vm.  There remain several ptrace sites that still use
access_process_vm as they are reading the target executables
instructions (for kernel consumption) or register stacks.  As such it
does not appear necessary to add a permission check to those calls.

This bug has always existed in Linux.

Fixes: v1.0
Cc: stable@vger.kernel.org
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/alpha/kernel/ptrace.c         |  2 +-
 arch/blackfin/kernel/ptrace.c      |  4 ++--
 arch/cris/arch-v32/kernel/ptrace.c |  2 +-
 arch/ia64/kernel/ptrace.c          |  2 +-
 arch/mips/kernel/ptrace32.c        |  4 ++--
 arch/powerpc/kernel/ptrace32.c     |  4 ++--
 include/linux/mm.h                 |  2 ++
 include/linux/ptrace.h             |  3 +++
 kernel/ptrace.c                    | 42 ++++++++++++++++++++++++++++++++------
 mm/memory.c                        |  2 +-
 mm/nommu.c                         |  2 +-
 11 files changed, 52 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index 940dfb406591..04abdec7f496 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -283,7 +283,7 @@ long arch_ptrace(struct task_struct *child, long request,
 	/* When I and D space are separate, these will need to be fixed.  */
 	case PTRACE_PEEKTEXT: /* read word at location addr. */
 	case PTRACE_PEEKDATA:
-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp),
+		copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp),
 				FOLL_FORCE);
 		ret = -EIO;
 		if (copied != sizeof(tmp))
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index 8d79286ee4e8..360d99645163 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -270,7 +270,7 @@ long arch_ptrace(struct task_struct *child, long request,
 			switch (bfin_mem_access_type(addr, to_copy)) {
 			case BFIN_MEM_ACCESS_CORE:
 			case BFIN_MEM_ACCESS_CORE_ONLY:
-				copied = access_process_vm(child, addr, &tmp,
+				copied = ptrace_access_vm(child, addr, &tmp,
 							   to_copy, FOLL_FORCE);
 				if (copied)
 					break;
@@ -323,7 +323,7 @@ long arch_ptrace(struct task_struct *child, long request,
 			switch (bfin_mem_access_type(addr, to_copy)) {
 			case BFIN_MEM_ACCESS_CORE:
 			case BFIN_MEM_ACCESS_CORE_ONLY:
-				copied = access_process_vm(child, addr, &data,
+				copied = ptrace_access_vm(child, addr, &data,
 				                           to_copy,
 							   FOLL_FORCE | FOLL_WRITE);
 				break;
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
index f0df654ac6fc..fe1f9cf7b391 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request,
 				/* The trampoline page is globally mapped, no page table to traverse.*/
 				tmp = *(unsigned long*)addr;
 			} else {
-				copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
+				copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
 
 				if (copied != sizeof(tmp))
 					break;
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 31aa8c0f68e1..36f660da8124 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1159,7 +1159,7 @@ arch_ptrace (struct task_struct *child, long request,
 	case PTRACE_PEEKTEXT:
 	case PTRACE_PEEKDATA:
 		/* read word at location addr */
-		if (access_process_vm(child, addr, &data, sizeof(data),
+		if (ptrace_access_vm(child, addr, &data, sizeof(data),
 				FOLL_FORCE)
 		    != sizeof(data))
 			return -EIO;
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 7e71a4e0281b..5fcbdcd7abd0 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -69,7 +69,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0)
 			break;
 
-		copied = access_process_vm(child, (u64)addrOthers, &tmp,
+		copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
 				sizeof(tmp), FOLL_FORCE);
 		if (copied != sizeof(tmp))
 			break;
@@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0)
 			break;
 		ret = 0;
-		if (access_process_vm(child, (u64)addrOthers, &data,
+		if (ptrace_access_vm(child, (u64)addrOthers, &data,
 					sizeof(data),
 					FOLL_FORCE | FOLL_WRITE) == sizeof(data))
 			break;
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 010b7b310237..1e887f3a61a6 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
 			break;
 
-		copied = access_process_vm(child, (u64)addrOthers, &tmp,
+		copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
 				sizeof(tmp), FOLL_FORCE);
 		if (copied != sizeof(tmp))
 			break;
@@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
 			break;
 		ret = 0;
-		if (access_process_vm(child, (u64)addrOthers, &tmp,
+		if (ptrace_access_vm(child, (u64)addrOthers, &tmp,
 					sizeof(tmp),
 					FOLL_FORCE | FOLL_WRITE) == sizeof(tmp))
 			break;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a92c8d73aeaf..0b5b2e4df14e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1270,6 +1270,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *
 		unsigned int gup_flags);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
 		void *buf, int len, unsigned int gup_flags);
+extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long addr, void *buf, int len, unsigned int gup_flags);
 
 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long start, unsigned long nr_pages,
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index e13bfdf7f314..e0e539321ab9 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -8,6 +8,9 @@
 #include <linux/pid_namespace.h>	/* For task_active_pid_ns.  */
 #include <uapi/linux/ptrace.h>
 
+extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
+			    void *buf, int len, unsigned int gup_flags);
+
 /*
  * Ptrace flags
  *
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e82c15cadd6d..49ba7c1ade9d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -27,6 +27,35 @@
 #include <linux/cn_proc.h>
 #include <linux/compat.h>
 
+/*
+ * Access another process' address space via ptrace.
+ * Source/target buffer must be kernel space,
+ * Do not walk the page table directly, use get_user_pages
+ */
+int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
+		     void *buf, int len, unsigned int gup_flags)
+{
+	struct mm_struct *mm;
+	int ret;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return 0;
+
+	if (!tsk->ptrace ||
+	    (current != tsk->parent) ||
+	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
+	     !ptracer_capable(tsk, mm->user_ns))) {
+		mmput(mm);
+		return 0;
+	}
+
+	ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
+	mmput(mm);
+
+	return ret;
+}
+
 
 /*
  * ptrace a task: make the debugger its new parent and
@@ -535,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
 		int this_len, retval;
 
 		this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
-		retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE);
+		retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE);
+
 		if (!retval) {
 			if (copied)
 				break;
@@ -562,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
 		this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
 		if (copy_from_user(buf, src, this_len))
 			return -EFAULT;
-		retval = access_process_vm(tsk, dst, buf, this_len,
+		retval = ptrace_access_vm(tsk, dst, buf, this_len,
 				FOLL_FORCE | FOLL_WRITE);
 		if (!retval) {
 			if (copied)
@@ -1126,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
 	unsigned long tmp;
 	int copied;
 
-	copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
+	copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
 	if (copied != sizeof(tmp))
 		return -EIO;
 	return put_user(tmp, (unsigned long __user *)data);
@@ -1137,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
 {
 	int copied;
 
-	copied = access_process_vm(tsk, addr, &data, sizeof(data),
+	copied = ptrace_access_vm(tsk, addr, &data, sizeof(data),
 			FOLL_FORCE | FOLL_WRITE);
 	return (copied == sizeof(data)) ? 0 : -EIO;
 }
@@ -1155,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 	switch (request) {
 	case PTRACE_PEEKTEXT:
 	case PTRACE_PEEKDATA:
-		ret = access_process_vm(child, addr, &word, sizeof(word),
+		ret = ptrace_access_vm(child, addr, &word, sizeof(word),
 				FOLL_FORCE);
 		if (ret != sizeof(word))
 			ret = -EIO;
@@ -1165,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 
 	case PTRACE_POKETEXT:
 	case PTRACE_POKEDATA:
-		ret = access_process_vm(child, addr, &data, sizeof(data),
+		ret = ptrace_access_vm(child, addr, &data, sizeof(data),
 				FOLL_FORCE | FOLL_WRITE);
 		ret = (ret != sizeof(data) ? -EIO : 0);
 		break;
diff --git a/mm/memory.c b/mm/memory.c
index e18c57bdc75c..cbb1e5e5f791 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3868,7 +3868,7 @@ EXPORT_SYMBOL_GPL(generic_access_phys);
  * Access another process' address space as given in mm.  If non-NULL, use the
  * given task for page fault accounting.
  */
-static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
+int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long addr, void *buf, int len, unsigned int gup_flags)
 {
 	struct vm_area_struct *vma;
diff --git a/mm/nommu.c b/mm/nommu.c
index 8b8faaf2a9e9..44265e00b701 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1808,7 +1808,7 @@ void filemap_map_pages(struct fault_env *fe,
 }
 EXPORT_SYMBOL(filemap_map_pages);
 
-static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
+int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long addr, void *buf, int len, unsigned int gup_flags)
 {
 	struct vm_area_struct *vma;
-- 
cgit v1.2.3


From f84df2a6f268de584a201e8911384a2d244876e3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 16 Nov 2016 22:06:51 -0600
Subject: exec: Ensure mm->user_ns contains the execed files

When the user namespace support was merged the need to prevent
ptrace from revealing the contents of an unreadable executable
was overlooked.

Correct this oversight by ensuring that the executed file
or files are in mm->user_ns, by adjusting mm->user_ns.

Use the new function privileged_wrt_inode_uidgid to see if
the executable is a member of the user namespace, and as such
if having CAP_SYS_PTRACE in the user namespace should allow
tracing the executable.  If not update mm->user_ns to
the parent user namespace until an appropriate parent is found.

Cc: stable@vger.kernel.org
Reported-by: Jann Horn <jann@thejh.net>
Fixes: 9e4a36ece652 ("userns: Fail exec for suid and sgid binaries with ids outside our user namespace.")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c                  | 19 +++++++++++++++++--
 include/linux/capability.h |  1 +
 kernel/capability.c        | 16 ++++++++++++++--
 3 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 3cf2cfced97a..fdf53f0c421b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1275,8 +1275,22 @@ EXPORT_SYMBOL(flush_old_exec);
 
 void would_dump(struct linux_binprm *bprm, struct file *file)
 {
-	if (inode_permission(file_inode(file), MAY_READ) < 0)
+	struct inode *inode = file_inode(file);
+	if (inode_permission(inode, MAY_READ) < 0) {
+		struct user_namespace *old, *user_ns;
 		bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+
+		/* Ensure mm->user_ns contains the executable */
+		user_ns = old = bprm->mm->user_ns;
+		while ((user_ns != &init_user_ns) &&
+		       !privileged_wrt_inode_uidgid(user_ns, inode))
+			user_ns = user_ns->parent;
+
+		if (old != user_ns) {
+			bprm->mm->user_ns = get_user_ns(user_ns);
+			put_user_ns(old);
+		}
+	}
 }
 EXPORT_SYMBOL(would_dump);
 
@@ -1306,7 +1320,6 @@ void setup_new_exec(struct linux_binprm * bprm)
 	    !gid_eq(bprm->cred->gid, current_egid())) {
 		current->pdeath_signal = 0;
 	} else {
-		would_dump(bprm, bprm->file);
 		if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
 			set_dumpable(current->mm, suid_dumpable);
 	}
@@ -1741,6 +1754,8 @@ static int do_execveat_common(int fd, struct filename *filename,
 	if (retval < 0)
 		goto out;
 
+	would_dump(bprm, bprm->file);
+
 	retval = exec_binprm(bprm);
 	if (retval < 0)
 		goto out;
diff --git a/include/linux/capability.h b/include/linux/capability.h
index d6088e2a7668..6ffb67e10c06 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -240,6 +240,7 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap)
 	return true;
 }
 #endif /* CONFIG_MULTIUSER */
+extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode);
 extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
 extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
diff --git a/kernel/capability.c b/kernel/capability.c
index dfa0e4528b0b..4984e1f552eb 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -456,6 +456,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns,
 }
 EXPORT_SYMBOL(file_ns_capable);
 
+/**
+ * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode?
+ * @ns: The user namespace in question
+ * @inode: The inode in question
+ *
+ * Return true if the inode uid and gid are within the namespace.
+ */
+bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
+{
+	return kuid_has_mapping(ns, inode->i_uid) &&
+		kgid_has_mapping(ns, inode->i_gid);
+}
+
 /**
  * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
  * @inode: The inode in question
@@ -469,8 +482,7 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
 {
 	struct user_namespace *ns = current_user_ns();
 
-	return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
-		kgid_has_mapping(ns, inode->i_gid);
+	return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
 }
 EXPORT_SYMBOL(capable_wrt_inode_uidgid);
 
-- 
cgit v1.2.3


From d6526e73dbbbc4c382c1b16942413eab77ed5e1a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Nov 2016 19:35:22 +0100
Subject: x86/mce/therm_throt: Convert to hotplug state machine

Install the callbacks via the state machine and let the core invoke
the callbacks on the already online CPUs.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: rt@linuxtronix.de
Cc: Borislav Petkov <bp@alien8.de>
Cc: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/20161117183541.8588-2-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/therm_throt.c | 55 ++++++++------------------------
 include/linux/cpuhotplug.h               |  1 +
 2 files changed, 14 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6b9dc4d18ccc..7f56620735ca 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -271,58 +271,29 @@ static void thermal_throttle_remove_dev(struct device *dev)
 }
 
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int
-thermal_throttle_cpu_callback(struct notifier_block *nfb,
-			      unsigned long action,
-			      void *hcpu)
+static int thermal_throttle_prepare(unsigned int cpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
-	struct device *dev;
-	int err = 0;
-
-	dev = get_cpu_device(cpu);
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		err = thermal_throttle_add_dev(dev, cpu);
-		WARN_ON(err);
-		break;
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		thermal_throttle_remove_dev(dev);
-		break;
-	}
-	return notifier_from_errno(err);
+	struct device *dev = get_cpu_device(cpu);
+
+	return thermal_throttle_add_dev(dev, cpu);
 }
 
-static struct notifier_block thermal_throttle_cpu_notifier =
+static int thermal_throttle_dead(unsigned int cpu)
 {
-	.notifier_call = thermal_throttle_cpu_callback,
-};
+	struct device *dev = get_cpu_device(cpu);
+
+	thermal_throttle_remove_dev(dev);
+	return 0;
+}
 
 static __init int thermal_throttle_init_device(void)
 {
-	unsigned int cpu = 0;
-	int err;
-
 	if (!atomic_read(&therm_throt_en))
 		return 0;
 
-	cpu_notifier_register_begin();
-
-	/* connect live CPUs to sysfs */
-	for_each_online_cpu(cpu) {
-		err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
-		WARN_ON(err);
-	}
-
-	__register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
-	cpu_notifier_register_done();
-
-	return 0;
+	return cpuhp_setup_state(CPUHP_X86_THERM_PREPARE, "x86/therm:prepare",
+				 thermal_throttle_prepare,
+				 thermal_throttle_dead);
 }
 device_initcall(thermal_throttle_init_device);
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 79b96f647d64..aea6c6a63139 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -59,6 +59,7 @@ enum cpuhp_state {
 	CPUHP_BLK_MQ_PREPARE,
 	CPUHP_NET_FLOW_PREPARE,
 	CPUHP_TOPOLOGY_PREPARE,
+	CPUHP_X86_THERM_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
-- 
cgit v1.2.3


From 33d97302eb502b72b76107d3122afbf18b09b3ec Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 21 Nov 2016 13:15:32 +0100
Subject: x86/mce/therm_throt: Move hotplug callbacks to online

No point to have the sysfs files around before the cpu is online and no
point to have them around until the cpu is dead. Get rid of the explicit
state.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
---
 arch/x86/kernel/cpu/mcheck/therm_throt.c | 13 ++++++++-----
 include/linux/cpuhotplug.h               |  1 -
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 7f56620735ca..e1d74fd79d5f 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -271,14 +271,14 @@ static void thermal_throttle_remove_dev(struct device *dev)
 }
 
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int thermal_throttle_prepare(unsigned int cpu)
+static int thermal_throttle_online(unsigned int cpu)
 {
 	struct device *dev = get_cpu_device(cpu);
 
 	return thermal_throttle_add_dev(dev, cpu);
 }
 
-static int thermal_throttle_dead(unsigned int cpu)
+static int thermal_throttle_offline(unsigned int cpu)
 {
 	struct device *dev = get_cpu_device(cpu);
 
@@ -288,12 +288,15 @@ static int thermal_throttle_dead(unsigned int cpu)
 
 static __init int thermal_throttle_init_device(void)
 {
+	int ret;
+
 	if (!atomic_read(&therm_throt_en))
 		return 0;
 
-	return cpuhp_setup_state(CPUHP_X86_THERM_PREPARE, "x86/therm:prepare",
-				 thermal_throttle_prepare,
-				 thermal_throttle_dead);
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
+				thermal_throttle_online,
+				thermal_throttle_offline);
+	return ret < 0 ? ret : 0;
 }
 device_initcall(thermal_throttle_init_device);
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index aea6c6a63139..79b96f647d64 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -59,7 +59,6 @@ enum cpuhp_state {
 	CPUHP_BLK_MQ_PREPARE,
 	CPUHP_NET_FLOW_PREPARE,
 	CPUHP_TOPOLOGY_PREPARE,
-	CPUHP_X86_THERM_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
-- 
cgit v1.2.3


From 8c07b494ab2859bc7efb27c40d6faff255f2d2ae Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Nov 2016 19:35:23 +0100
Subject: x86/cpuid: Convert to hotplug state machine

Install the callbacks via the state machine and let the core invoke
the callbacks on the already online CPUs.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: rt@linuxtronix.de
Link: http://lkml.kernel.org/r/20161117183541.8588-3-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpuid.c    | 64 ++++++++--------------------------------------
 include/linux/cpuhotplug.h |  1 +
 2 files changed, 12 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 2836de390f95..fd85e93e0691 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -115,7 +115,7 @@ static const struct file_operations cpuid_fops = {
 	.open = cpuid_open,
 };
 
-static int cpuid_device_create(int cpu)
+static int cpuid_device_create(unsigned int cpu)
 {
 	struct device *dev;
 
@@ -124,35 +124,12 @@ static int cpuid_device_create(int cpu)
 	return PTR_ERR_OR_ZERO(dev);
 }
 
-static void cpuid_device_destroy(int cpu)
+static int cpuid_device_destroy(unsigned int cpu)
 {
 	device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+	return 0;
 }
 
-static int cpuid_class_cpu_callback(struct notifier_block *nfb,
-				    unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	int err = 0;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-		err = cpuid_device_create(cpu);
-		break;
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-		cpuid_device_destroy(cpu);
-		break;
-	}
-	return notifier_from_errno(err);
-}
-
-static struct notifier_block cpuid_class_cpu_notifier =
-{
-	.notifier_call = cpuid_class_cpu_callback,
-};
-
 static char *cpuid_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
@@ -160,15 +137,13 @@ static char *cpuid_devnode(struct device *dev, umode_t *mode)
 
 static int __init cpuid_init(void)
 {
-	int i, err = 0;
-	i = 0;
+	int err;
 
 	if (__register_chrdev(CPUID_MAJOR, 0, NR_CPUS,
 			      "cpu/cpuid", &cpuid_fops)) {
 		printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n",
 		       CPUID_MAJOR);
-		err = -EBUSY;
-		goto out;
+		return -EBUSY;
 	}
 	cpuid_class = class_create(THIS_MODULE, "cpuid");
 	if (IS_ERR(cpuid_class)) {
@@ -177,42 +152,25 @@ static int __init cpuid_init(void)
 	}
 	cpuid_class->devnode = cpuid_devnode;
 
-	cpu_notifier_register_begin();
-	for_each_online_cpu(i) {
-		err = cpuid_device_create(i);
-		if (err != 0)
-			goto out_class;
-	}
-	__register_hotcpu_notifier(&cpuid_class_cpu_notifier);
-	cpu_notifier_register_done();
+	err = cpuhp_setup_state(CPUHP_X86_CPUID_PREPARE, "x86/cpuid:prepare",
+				cpuid_device_create, cpuid_device_destroy);
+	if (err)
+		goto out_class;
 
-	err = 0;
-	goto out;
+	return 0;
 
 out_class:
-	i = 0;
-	for_each_online_cpu(i) {
-		cpuid_device_destroy(i);
-	}
-	cpu_notifier_register_done();
 	class_destroy(cpuid_class);
 out_chrdev:
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
-out:
 	return err;
 }
 
 static void __exit cpuid_exit(void)
 {
-	int cpu = 0;
-
-	cpu_notifier_register_begin();
-	for_each_online_cpu(cpu)
-		cpuid_device_destroy(cpu);
+	cpuhp_remove_state(CPUHP_X86_CPUID_PREPARE);
 	class_destroy(cpuid_class);
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
-	__unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
-	cpu_notifier_register_done();
 }
 
 module_init(cpuid_init);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 79b96f647d64..bc340ef2f200 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -59,6 +59,7 @@ enum cpuhp_state {
 	CPUHP_BLK_MQ_PREPARE,
 	CPUHP_NET_FLOW_PREPARE,
 	CPUHP_TOPOLOGY_PREPARE,
+	CPUHP_X86_CPUID_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
-- 
cgit v1.2.3


From ee92be9b0d7ad34cb58b61d5c0933d2e5ff7c31d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 21 Nov 2016 13:07:34 +0100
Subject: x86/cpuid: Move the hotplug callbacks to online

No point to have this file around before the cpu is online and no point to
have it around until the cpu is dead. Get rid of the explicit state.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
---
 arch/x86/kernel/cpuid.c    | 11 ++++++-----
 include/linux/cpuhotplug.h |  1 -
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index fd85e93e0691..373e372a0e1c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -45,6 +45,7 @@
 #include <asm/msr.h>
 
 static struct class *cpuid_class;
+static enum cpuhp_state cpuhp_cpuid_state;
 
 struct cpuid_regs {
 	u32 eax, ebx, ecx, edx;
@@ -152,11 +153,12 @@ static int __init cpuid_init(void)
 	}
 	cpuid_class->devnode = cpuid_devnode;
 
-	err = cpuhp_setup_state(CPUHP_X86_CPUID_PREPARE, "x86/cpuid:prepare",
+	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/cpuid:online",
 				cpuid_device_create, cpuid_device_destroy);
-	if (err)
+	if (err < 0)
 		goto out_class;
 
+	cpuhp_cpuid_state = err;
 	return 0;
 
 out_class:
@@ -165,15 +167,14 @@ out_chrdev:
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
 	return err;
 }
+module_init(cpuid_init);
 
 static void __exit cpuid_exit(void)
 {
-	cpuhp_remove_state(CPUHP_X86_CPUID_PREPARE);
+	cpuhp_remove_state(cpuhp_cpuid_state);
 	class_destroy(cpuid_class);
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
 }
-
-module_init(cpuid_init);
 module_exit(cpuid_exit);
 
 MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index bc340ef2f200..79b96f647d64 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -59,7 +59,6 @@ enum cpuhp_state {
 	CPUHP_BLK_MQ_PREPARE,
 	CPUHP_NET_FLOW_PREPARE,
 	CPUHP_TOPOLOGY_PREPARE,
-	CPUHP_X86_CPUID_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
-- 
cgit v1.2.3


From 9c248f8896e6bf0c77abb98bfea8d69b5a7cd11d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Nov 2016 19:35:28 +0100
Subject: PCI/xgene-msi: Convert to hotplug state machine

Install the callbacks via the state machine and let the core invoke
the callbacks on the already online CPUs.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-pci@vger.kernel.org
Cc: Duc Dang <dhdang@apm.com>
Cc: rt@linuxtronix.de
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/20161117183541.8588-8-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pci/host/pci-xgene-msi.c | 69 +++++++++++-----------------------------
 include/linux/cpuhotplug.h       |  1 +
 2 files changed, 20 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
index a6456b578269..1f38d0836751 100644
--- a/drivers/pci/host/pci-xgene-msi.c
+++ b/drivers/pci/host/pci-xgene-msi.c
@@ -360,16 +360,16 @@ static void xgene_msi_isr(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
+static enum cpuhp_state pci_xgene_online;
+
 static int xgene_msi_remove(struct platform_device *pdev)
 {
-	int virq, i;
 	struct xgene_msi *msi = platform_get_drvdata(pdev);
 
-	for (i = 0; i < NR_HW_IRQS; i++) {
-		virq = msi->msi_groups[i].gic_irq;
-		if (virq != 0)
-			irq_set_chained_handler_and_data(virq, NULL, NULL);
-	}
+	if (pci_xgene_online)
+		cpuhp_remove_state(pci_xgene_online);
+	cpuhp_remove_state(CPUHP_PCI_XGENE_DEAD);
+
 	kfree(msi->msi_groups);
 
 	kfree(msi->bitmap);
@@ -427,7 +427,7 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu)
 	return 0;
 }
 
-static void xgene_msi_hwirq_free(unsigned int cpu)
+static int xgene_msi_hwirq_free(unsigned int cpu)
 {
 	struct xgene_msi *msi = &xgene_msi_ctrl;
 	struct xgene_msi_group *msi_group;
@@ -441,33 +441,9 @@ static void xgene_msi_hwirq_free(unsigned int cpu)
 		irq_set_chained_handler_and_data(msi_group->gic_irq, NULL,
 						 NULL);
 	}
+	return 0;
 }
 
-static int xgene_msi_cpu_callback(struct notifier_block *nfb,
-				  unsigned long action, void *hcpu)
-{
-	unsigned cpu = (unsigned long)hcpu;
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		xgene_msi_hwirq_alloc(cpu);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		xgene_msi_hwirq_free(cpu);
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block xgene_msi_cpu_notifier = {
-	.notifier_call = xgene_msi_cpu_callback,
-};
-
 static const struct of_device_id xgene_msi_match_table[] = {
 	{.compatible = "apm,xgene1-msi"},
 	{},
@@ -478,7 +454,6 @@ static int xgene_msi_probe(struct platform_device *pdev)
 	struct resource *res;
 	int rc, irq_index;
 	struct xgene_msi *xgene_msi;
-	unsigned int cpu;
 	int virt_msir;
 	u32 msi_val, msi_idx;
 
@@ -540,28 +515,22 @@ static int xgene_msi_probe(struct platform_device *pdev)
 		}
 	}
 
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(cpu)
-		if (xgene_msi_hwirq_alloc(cpu)) {
-			dev_err(&pdev->dev, "failed to register MSI handlers\n");
-			cpu_notifier_register_done();
-			goto error;
-		}
-
-	rc = __register_hotcpu_notifier(&xgene_msi_cpu_notifier);
-	if (rc) {
-		dev_err(&pdev->dev, "failed to add CPU MSI notifier\n");
-		cpu_notifier_register_done();
-		goto error;
-	}
-
-	cpu_notifier_register_done();
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "pci/xgene:online",
+			       xgene_msi_hwirq_alloc, NULL);
+	if (rc)
+		goto err_cpuhp;
+	pci_xgene_online = rc;
+	rc = cpuhp_setup_state(CPUHP_PCI_XGENE_DEAD, "pci/xgene:dead", NULL,
+			       xgene_msi_hwirq_free);
+	if (rc)
+		goto err_cpuhp;
 
 	dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
 
 	return 0;
 
+err_cpuhp:
+	dev_err(&pdev->dev, "failed to add CPU MSI notifier\n");
 error:
 	xgene_msi_remove(pdev);
 	return rc;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 79b96f647d64..94c6a189421f 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -38,6 +38,7 @@ enum cpuhp_state {
 	CPUHP_RADIX_DEAD,
 	CPUHP_PAGE_ALLOC_DEAD,
 	CPUHP_NET_DEV_DEAD,
+	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-- 
cgit v1.2.3


From 38b482929e8f96dfe459d2ef757d0a5c3a74cea3 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Nov 2016 19:35:33 +0100
Subject: net/iucv: Convert to hotplug state machine

Install the callbacks via the state machine and let the core invoke the
callbacks on the already online CPUs. The smp function calls in the
online/downprep callbacks are not required as the callback is guaranteed to
be invoked on the upcoming/outgoing cpu.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-s390@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
Cc: rt@linuxtronix.de
Link: http://lkml.kernel.org/r/20161117183541.8588-13-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |   1 +
 net/iucv/iucv.c            | 118 +++++++++++++++++----------------------------
 2 files changed, 45 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 94c6a189421f..12bbcf3ded70 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -60,6 +60,7 @@ enum cpuhp_state {
 	CPUHP_BLK_MQ_PREPARE,
 	CPUHP_NET_FLOW_PREPARE,
 	CPUHP_TOPOLOGY_PREPARE,
+	CPUHP_NET_IUCV_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 88a2a3ba4212..f0d6afc5d4a9 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -639,7 +639,7 @@ static void iucv_disable(void)
 	put_online_cpus();
 }
 
-static void free_iucv_data(int cpu)
+static int iucv_cpu_dead(unsigned int cpu)
 {
 	kfree(iucv_param_irq[cpu]);
 	iucv_param_irq[cpu] = NULL;
@@ -647,9 +647,10 @@ static void free_iucv_data(int cpu)
 	iucv_param[cpu] = NULL;
 	kfree(iucv_irq_data[cpu]);
 	iucv_irq_data[cpu] = NULL;
+	return 0;
 }
 
-static int alloc_iucv_data(int cpu)
+static int iucv_cpu_prepare(unsigned int cpu)
 {
 	/* Note: GFP_DMA used to get memory below 2G */
 	iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
@@ -671,58 +672,38 @@ static int alloc_iucv_data(int cpu)
 	return 0;
 
 out_free:
-	free_iucv_data(cpu);
+	iucv_cpu_dead(cpu);
 	return -ENOMEM;
 }
 
-static int iucv_cpu_notify(struct notifier_block *self,
-				     unsigned long action, void *hcpu)
+static int iucv_cpu_online(unsigned int cpu)
 {
-	cpumask_t cpumask;
-	long cpu = (long) hcpu;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		if (alloc_iucv_data(cpu))
-			return notifier_from_errno(-ENOMEM);
-		break;
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		free_iucv_data(cpu);
-		break;
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
-		if (!iucv_path_table)
-			break;
-		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
-		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		if (!iucv_path_table)
-			break;
-		cpumask_copy(&cpumask, &iucv_buffer_cpumask);
-		cpumask_clear_cpu(cpu, &cpumask);
-		if (cpumask_empty(&cpumask))
-			/* Can't offline last IUCV enabled cpu. */
-			return notifier_from_errno(-EINVAL);
-		smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
-		if (cpumask_empty(&iucv_irq_cpumask))
-			smp_call_function_single(
-				cpumask_first(&iucv_buffer_cpumask),
-				iucv_allow_cpu, NULL, 1);
-		break;
-	}
-	return NOTIFY_OK;
+	if (!iucv_path_table)
+		return 0;
+	iucv_declare_cpu(NULL);
+	return 0;
 }
 
-static struct notifier_block __refdata iucv_cpu_notifier = {
-	.notifier_call = iucv_cpu_notify,
-};
+static int iucv_cpu_down_prep(unsigned int cpu)
+{
+	cpumask_t cpumask;
+
+	if (!iucv_path_table)
+		return 0;
+
+	cpumask_copy(&cpumask, &iucv_buffer_cpumask);
+	cpumask_clear_cpu(cpu, &cpumask);
+	if (cpumask_empty(&cpumask))
+		/* Can't offline last IUCV enabled cpu. */
+		return -EINVAL;
+
+	iucv_retrieve_cpu(NULL);
+	if (!cpumask_empty(&iucv_irq_cpumask))
+		return 0;
+	smp_call_function_single(cpumask_first(&iucv_buffer_cpumask),
+				 iucv_allow_cpu, NULL, 1);
+	return 0;
+}
 
 /**
  * iucv_sever_pathid
@@ -2027,6 +2008,7 @@ struct iucv_interface iucv_if = {
 };
 EXPORT_SYMBOL(iucv_if);
 
+static enum cpuhp_state iucv_online;
 /**
  * iucv_init
  *
@@ -2035,7 +2017,6 @@ EXPORT_SYMBOL(iucv_if);
 static int __init iucv_init(void)
 {
 	int rc;
-	int cpu;
 
 	if (!MACHINE_IS_VM) {
 		rc = -EPROTONOSUPPORT;
@@ -2054,23 +2035,19 @@ static int __init iucv_init(void)
 		goto out_int;
 	}
 
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(cpu) {
-		if (alloc_iucv_data(cpu)) {
-			rc = -ENOMEM;
-			goto out_free;
-		}
-	}
-	rc = __register_hotcpu_notifier(&iucv_cpu_notifier);
+	rc = cpuhp_setup_state(CPUHP_NET_IUCV_PREPARE, "net/iucv:prepare",
+			       iucv_cpu_prepare, iucv_cpu_dead);
 	if (rc)
 		goto out_free;
-
-	cpu_notifier_register_done();
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "net/iucv:online",
+			       iucv_cpu_online, iucv_cpu_down_prep);
+	if (rc < 0)
+		goto out_free;
+	iucv_online = rc;
 
 	rc = register_reboot_notifier(&iucv_reboot_notifier);
 	if (rc)
-		goto out_cpu;
+		goto out_free;
 	ASCEBC(iucv_error_no_listener, 16);
 	ASCEBC(iucv_error_no_memory, 16);
 	ASCEBC(iucv_error_pathid, 16);
@@ -2084,14 +2061,10 @@ static int __init iucv_init(void)
 
 out_reboot:
 	unregister_reboot_notifier(&iucv_reboot_notifier);
-out_cpu:
-	cpu_notifier_register_begin();
-	__unregister_hotcpu_notifier(&iucv_cpu_notifier);
 out_free:
-	for_each_possible_cpu(cpu)
-		free_iucv_data(cpu);
-
-	cpu_notifier_register_done();
+	if (iucv_online)
+		cpuhp_remove_state(iucv_online);
+	cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
 
 	root_device_unregister(iucv_root);
 out_int:
@@ -2110,7 +2083,6 @@ out:
 static void __exit iucv_exit(void)
 {
 	struct iucv_irq_list *p, *n;
-	int cpu;
 
 	spin_lock_irq(&iucv_queue_lock);
 	list_for_each_entry_safe(p, n, &iucv_task_queue, list)
@@ -2119,11 +2091,9 @@ static void __exit iucv_exit(void)
 		kfree(p);
 	spin_unlock_irq(&iucv_queue_lock);
 	unregister_reboot_notifier(&iucv_reboot_notifier);
-	cpu_notifier_register_begin();
-	__unregister_hotcpu_notifier(&iucv_cpu_notifier);
-	for_each_possible_cpu(cpu)
-		free_iucv_data(cpu);
-	cpu_notifier_register_done();
+
+	cpuhp_remove_state_nocalls(iucv_online);
+	cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
 	root_device_unregister(iucv_root);
 	bus_unregister(&iucv_bus);
 	unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
-- 
cgit v1.2.3


From a3c9b14f6f151ee4c2a119fab14f9a60d1684d60 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Nov 2016 19:35:35 +0100
Subject: arm/bL_switcher: Convert to hotplug state machine

Install the callbacks via the state machine.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: rt@linuxtronix.de
Cc: linux-arm-kernel@lists.infradead.org
Cc: Russell King <linux@armlinux.org.uk>
Link: http://lkml.kernel.org/r/20161117183541.8588-15-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm/common/bL_switcher.c | 34 ++++++++++++++++++++--------------
 include/linux/cpuhotplug.h    |  1 +
 2 files changed, 21 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 37dc0fe1093f..46730017b3c5 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -757,19 +757,18 @@ EXPORT_SYMBOL_GPL(bL_switcher_put_enabled);
  * while the switcher is active.
  * We're just not ready to deal with that given the trickery involved.
  */
-static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
-					unsigned long action, void *hcpu)
+static int bL_switcher_cpu_pre(unsigned int cpu)
 {
-	if (bL_switcher_active) {
-		int pairing = bL_switcher_cpu_pairing[(unsigned long)hcpu];
-		switch (action & 0xf) {
-		case CPU_UP_PREPARE:
-		case CPU_DOWN_PREPARE:
-			if (pairing == -1)
-				return NOTIFY_BAD;
-		}
-	}
-	return NOTIFY_DONE;
+	int pairing;
+
+	if (!bL_switcher_active)
+		return 0;
+
+	pairing = bL_switcher_cpu_pairing[cpu];
+
+	if (pairing == -1)
+		return -EINVAL;
+	return 0;
 }
 
 static bool no_bL_switcher;
@@ -782,8 +781,15 @@ static int __init bL_switcher_init(void)
 	if (!mcpm_is_available())
 		return -ENODEV;
 
-	cpu_notifier(bL_switcher_hotplug_callback, 0);
-
+	cpuhp_setup_state_nocalls(CPUHP_ARM_BL_PREPARE, "arm/bl:prepare",
+				  bL_switcher_cpu_pre, NULL);
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "arm/bl:predown",
+					NULL, bL_switcher_cpu_pre);
+	if (ret < 0) {
+		cpuhp_remove_state_nocalls(CPUHP_ARM_BL_PREPARE);
+		pr_err("bL_switcher: Failed to allocate a hotplug state\n");
+		return ret;
+	}
 	if (!no_bL_switcher) {
 		ret = bL_switcher_enable();
 		if (ret)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 12bbcf3ded70..e3771fb959c0 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -61,6 +61,7 @@ enum cpuhp_state {
 	CPUHP_NET_FLOW_PREPARE,
 	CPUHP_TOPOLOGY_PREPARE,
 	CPUHP_NET_IUCV_PREPARE,
+	CPUHP_ARM_BL_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
-- 
cgit v1.2.3


From 478409dd683db76cbcfe7bf8332a37f01deb0a2d Mon Sep 17 00:00:00 2001
From: Chunyan Zhang <zhang.chunyan@linaro.org>
Date: Mon, 21 Nov 2016 15:57:18 +0800
Subject: tracing: Add hook to function tracing for other subsystems to use

Currently Function traces can be only exported to the ring buffer. This
adds a trace_export concept which can process traces and export
them to a registered destination as an addition to the current
one that outputs to Ftrace - i.e. ring buffer.

In this way, if we want function traces to be sent to other destinations
rather than only to the ring buffer, we just need to register a new
trace_export and implement its own .write() function for writing traces to
storage.

With this patch, only function tracing (trace type is TRACE_FN)
is supported.

Link: http://lkml.kernel.org/r/1479715043-6534-2-git-send-email-zhang.chunyan@linaro.org

Signed-off-by: Chunyan Zhang <zhang.chunyan@linaro.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace.h |  28 +++++++++++
 kernel/trace/trace.c  | 129 +++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/trace.h

(limited to 'include/linux')

diff --git a/include/linux/trace.h b/include/linux/trace.h
new file mode 100644
index 000000000000..9330a58e2651
--- /dev/null
+++ b/include/linux/trace.h
@@ -0,0 +1,28 @@
+#ifndef _LINUX_TRACE_H
+#define _LINUX_TRACE_H
+
+#ifdef CONFIG_TRACING
+/*
+ * The trace export - an export of Ftrace output. The trace_export
+ * can process traces and export them to a registered destination as
+ * an addition to the current only output of Ftrace - i.e. ring buffer.
+ *
+ * If you want traces to be sent to some other place rather than ring
+ * buffer only, just need to register a new trace_export and implement
+ * its own .write() function for writing traces to the storage.
+ *
+ * next		- pointer to the next trace_export
+ * write	- copy traces which have been delt with ->commit() to
+ *		  the destination
+ */
+struct trace_export {
+	struct trace_export __rcu	*next;
+	void (*write)(const void *, unsigned int);
+};
+
+int register_ftrace_export(struct trace_export *export);
+int unregister_ftrace_export(struct trace_export *export);
+
+#endif	/* CONFIG_TRACING */
+
+#endif	/* _LINUX_TRACE_H */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 57069e7f369c..edccdff8a36d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,6 +40,7 @@
 #include <linux/poll.h>
 #include <linux/nmi.h>
 #include <linux/fs.h>
+#include <linux/trace.h>
 #include <linux/sched/rt.h>
 
 #include "trace.h"
@@ -2128,6 +2129,129 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
 	ftrace_trace_userstack(buffer, flags, pc);
 }
 
+static void
+trace_process_export(struct trace_export *export,
+	       struct ring_buffer_event *event)
+{
+	struct trace_entry *entry;
+	unsigned int size = 0;
+
+	entry = ring_buffer_event_data(event);
+	size = ring_buffer_event_length(event);
+	export->write(entry, size);
+}
+
+static DEFINE_MUTEX(ftrace_export_lock);
+
+static struct trace_export __rcu *ftrace_exports_list __read_mostly;
+
+static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
+
+static inline void ftrace_exports_enable(void)
+{
+	static_branch_enable(&ftrace_exports_enabled);
+}
+
+static inline void ftrace_exports_disable(void)
+{
+	static_branch_disable(&ftrace_exports_enabled);
+}
+
+void ftrace_exports(struct ring_buffer_event *event)
+{
+	struct trace_export *export;
+
+	preempt_disable_notrace();
+
+	export = rcu_dereference_raw_notrace(ftrace_exports_list);
+	while (export) {
+		trace_process_export(export, event);
+		export = rcu_dereference_raw_notrace(export->next);
+	}
+
+	preempt_enable_notrace();
+}
+
+static inline void
+add_trace_export(struct trace_export **list, struct trace_export *export)
+{
+	rcu_assign_pointer(export->next, *list);
+	/*
+	 * We are entering export into the list but another
+	 * CPU might be walking that list. We need to make sure
+	 * the export->next pointer is valid before another CPU sees
+	 * the export pointer included into the list.
+	 */
+	rcu_assign_pointer(*list, export);
+}
+
+static inline int
+rm_trace_export(struct trace_export **list, struct trace_export *export)
+{
+	struct trace_export **p;
+
+	for (p = list; *p != NULL; p = &(*p)->next)
+		if (*p == export)
+			break;
+
+	if (*p != export)
+		return -1;
+
+	rcu_assign_pointer(*p, (*p)->next);
+
+	return 0;
+}
+
+static inline void
+add_ftrace_export(struct trace_export **list, struct trace_export *export)
+{
+	if (*list == NULL)
+		ftrace_exports_enable();
+
+	add_trace_export(list, export);
+}
+
+static inline int
+rm_ftrace_export(struct trace_export **list, struct trace_export *export)
+{
+	int ret;
+
+	ret = rm_trace_export(list, export);
+	if (*list == NULL)
+		ftrace_exports_disable();
+
+	return ret;
+}
+
+int register_ftrace_export(struct trace_export *export)
+{
+	if (WARN_ON_ONCE(!export->write))
+		return -1;
+
+	mutex_lock(&ftrace_export_lock);
+
+	add_ftrace_export(&ftrace_exports_list, export);
+
+	mutex_unlock(&ftrace_export_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_ftrace_export);
+
+int unregister_ftrace_export(struct trace_export *export)
+{
+	int ret;
+
+	mutex_lock(&ftrace_export_lock);
+
+	ret = rm_ftrace_export(&ftrace_exports_list, export);
+
+	mutex_unlock(&ftrace_export_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(unregister_ftrace_export);
+
 void
 trace_function(struct trace_array *tr,
 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -2146,8 +2270,11 @@ trace_function(struct trace_array *tr,
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
 
-	if (!call_filter_check_discard(call, entry, buffer, event))
+	if (!call_filter_check_discard(call, entry, buffer, event)) {
+		if (static_branch_unlikely(&ftrace_exports_enabled))
+			ftrace_exports(event);
 		__buffer_unlock_commit(buffer, event);
+	}
 }
 
 #ifdef CONFIG_STACKTRACE
-- 
cgit v1.2.3


From 9dfed80d87ca2c365cd1004a91ef4ed716c8e44e Mon Sep 17 00:00:00 2001
From: Chunyan Zhang <zhang.chunyan@linaro.org>
Date: Mon, 21 Nov 2016 15:57:23 +0800
Subject: stm: Mark the functions of writing STM with notrace

If CONFIG_STM_SOURCE_FTRACE is selected, Function trace data can be
writen to sink via STM, all functions that related to writing data
packets to STM should be marked 'notrace' to avoid being traced by
Ftrace, otherwise the program would stall into an endless loop.

Link: http://lkml.kernel.org/r/1479715043-6534-7-git-send-email-zhang.chunyan@linaro.org

Signed-off-by: Chunyan Zhang <zhang.chunyan@linaro.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 drivers/hwtracing/stm/core.c | 7 ++++---
 include/linux/stm.h          | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index 51f81d64ca37..37d3bcbd2534 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -425,7 +425,7 @@ static int stm_file_assign(struct stm_file *stmf, char *id, unsigned int width)
 	return ret;
 }
 
-static ssize_t stm_write(struct stm_data *data, unsigned int master,
+static ssize_t notrace stm_write(struct stm_data *data, unsigned int master,
 			  unsigned int channel, const char *buf, size_t count)
 {
 	unsigned int flags = STP_PACKET_TIMESTAMPED;
@@ -1121,8 +1121,9 @@ void stm_source_unregister_device(struct stm_source_data *data)
 }
 EXPORT_SYMBOL_GPL(stm_source_unregister_device);
 
-int stm_source_write(struct stm_source_data *data, unsigned int chan,
-		     const char *buf, size_t count)
+int notrace stm_source_write(struct stm_source_data *data,
+			     unsigned int chan,
+			     const char *buf, size_t count)
 {
 	struct stm_source_device *src = data->src;
 	struct stm_device *stm;
diff --git a/include/linux/stm.h b/include/linux/stm.h
index 8369d8a8cabd..210ff2292361 100644
--- a/include/linux/stm.h
+++ b/include/linux/stm.h
@@ -133,7 +133,7 @@ int stm_source_register_device(struct device *parent,
 			       struct stm_source_data *data);
 void stm_source_unregister_device(struct stm_source_data *data);
 
-int stm_source_write(struct stm_source_data *data, unsigned int chan,
-		     const char *buf, size_t count);
+int notrace stm_source_write(struct stm_source_data *data, unsigned int chan,
+			     const char *buf, size_t count);
 
 #endif /* _STM_H_ */
-- 
cgit v1.2.3


From 29fd0ec2bdbef6734fd4c39c23f61d9f030a66a0 Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick@shmanahar.org>
Date: Tue, 22 Nov 2016 17:44:12 -0800
Subject: Input: synaptics-rmi4 - add support for F34 device reflash

Add support for updating firmware, triggered by a sysfs attribute.

This patch has been tested on Synaptics S7300.

Signed-off-by: Nick Dyer <nick@shmanahar.org>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/Kconfig      |  11 +
 drivers/input/rmi4/Makefile     |   1 +
 drivers/input/rmi4/rmi_bus.c    |   3 +
 drivers/input/rmi4/rmi_driver.c | 105 ++++++---
 drivers/input/rmi4/rmi_driver.h |  24 ++
 drivers/input/rmi4/rmi_f01.c    |   6 +
 drivers/input/rmi4/rmi_f34.c    | 481 ++++++++++++++++++++++++++++++++++++++++
 drivers/input/rmi4/rmi_f34.h    |  68 ++++++
 include/linux/rmi.h             |   2 +
 9 files changed, 670 insertions(+), 31 deletions(-)
 create mode 100644 drivers/input/rmi4/rmi_f34.c
 create mode 100644 drivers/input/rmi4/rmi_f34.h

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig
index 8cbd362ae8a7..9a24867ae7a5 100644
--- a/drivers/input/rmi4/Kconfig
+++ b/drivers/input/rmi4/Kconfig
@@ -74,6 +74,17 @@ config RMI4_F30
 	  Function 30 provides GPIO and LED support for RMI4 devices. This
 	  includes support for buttons on TouchPads and ClickPads.
 
+config RMI4_F34
+	bool "RMI4 Function 34 (Device reflash)"
+	depends on RMI4_CORE
+	select FW_LOADER
+	help
+	  Say Y here if you want to add support for RMI4 function 34.
+
+	  Function 34 provides support for upgrading the firmware on the RMI4
+	  device via the firmware loader interface. This is triggered using a
+	  sysfs attribute.
+
 config RMI4_F54
 	bool "RMI4 Function 54 (Analog diagnostics)"
 	depends on RMI4_CORE
diff --git a/drivers/input/rmi4/Makefile b/drivers/input/rmi4/Makefile
index a6e27527d514..0250abf38ad9 100644
--- a/drivers/input/rmi4/Makefile
+++ b/drivers/input/rmi4/Makefile
@@ -7,6 +7,7 @@ rmi_core-$(CONFIG_RMI4_2D_SENSOR) += rmi_2d_sensor.o
 rmi_core-$(CONFIG_RMI4_F11) += rmi_f11.o
 rmi_core-$(CONFIG_RMI4_F12) += rmi_f12.o
 rmi_core-$(CONFIG_RMI4_F30) += rmi_f30.o
+rmi_core-$(CONFIG_RMI4_F34) += rmi_f34.o
 rmi_core-$(CONFIG_RMI4_F54) += rmi_f54.o
 
 # Transports
diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
index 84b321262d74..ef7a66230d83 100644
--- a/drivers/input/rmi4/rmi_bus.c
+++ b/drivers/input/rmi4/rmi_bus.c
@@ -315,6 +315,9 @@ static struct rmi_function_handler *fn_handlers[] = {
 #ifdef CONFIG_RMI4_F30
 	&rmi_f30_handler,
 #endif
+#ifdef CONFIG_RMI4_F34
+	&rmi_f34_handler,
+#endif
 #ifdef CONFIG_RMI4_F54
 	&rmi_f54_handler,
 #endif
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 4f8d19794b01..2b17d8cb3d10 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -35,14 +35,24 @@
 #define RMI_DEVICE_RESET_CMD	0x01
 #define DEFAULT_RESET_DELAY_MS	100
 
-static void rmi_free_function_list(struct rmi_device *rmi_dev)
+void rmi_free_function_list(struct rmi_device *rmi_dev)
 {
 	struct rmi_function *fn, *tmp;
 	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
 
 	rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, "Freeing function list\n");
 
+	mutex_lock(&data->irq_mutex);
+
+	devm_kfree(&rmi_dev->dev, data->irq_memory);
+	data->irq_memory = NULL;
+	data->irq_status = NULL;
+	data->fn_irq_bits = NULL;
+	data->current_irq_mask = NULL;
+	data->new_irq_mask = NULL;
+
 	data->f01_container = NULL;
+	data->f34_container = NULL;
 
 	/* Doing it in the reverse order so F01 will be removed last */
 	list_for_each_entry_safe_reverse(fn, tmp,
@@ -50,7 +60,10 @@ static void rmi_free_function_list(struct rmi_device *rmi_dev)
 		list_del(&fn->node);
 		rmi_unregister_function(fn);
 	}
+
+	mutex_unlock(&data->irq_mutex);
 }
+EXPORT_SYMBOL_GPL(rmi_free_function_list);
 
 static int reset_one_function(struct rmi_function *fn)
 {
@@ -147,24 +160,25 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
 	if (!data)
 		return 0;
 
+	mutex_lock(&data->irq_mutex);
+	if (!data->irq_status || !data->f01_container) {
+		mutex_unlock(&data->irq_mutex);
+		return 0;
+	}
+
 	if (!rmi_dev->xport->attn_data) {
 		error = rmi_read_block(rmi_dev,
 				data->f01_container->fd.data_base_addr + 1,
 				data->irq_status, data->num_of_irq_regs);
 		if (error < 0) {
 			dev_err(dev, "Failed to read irqs, code=%d\n", error);
+			mutex_unlock(&data->irq_mutex);
 			return error;
 		}
 	}
 
-	mutex_lock(&data->irq_mutex);
 	bitmap_and(data->irq_status, data->irq_status, data->current_irq_mask,
 	       data->irq_count);
-	/*
-	 * At this point, irq_status has all bits that are set in the
-	 * interrupt status register and are enabled.
-	 */
-	mutex_unlock(&data->irq_mutex);
 
 	/*
 	 * It would be nice to be able to use irq_chip to handle these
@@ -180,6 +194,8 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
 	if (data->input)
 		input_sync(data->input);
 
+	mutex_unlock(&data->irq_mutex);
+
 	return 0;
 }
 
@@ -244,12 +260,18 @@ static int rmi_suspend_functions(struct rmi_device *rmi_dev)
 	struct rmi_function *entry;
 	int retval;
 
+	mutex_lock(&data->irq_mutex);
+
 	list_for_each_entry(entry, &data->function_list, node) {
 		retval = suspend_one_function(entry);
-		if (retval < 0)
+		if (retval < 0) {
+			mutex_unlock(&data->irq_mutex);
 			return retval;
+		}
 	}
 
+	mutex_unlock(&data->irq_mutex);
+
 	return 0;
 }
 
@@ -278,16 +300,22 @@ static int rmi_resume_functions(struct rmi_device *rmi_dev)
 	struct rmi_function *entry;
 	int retval;
 
+	mutex_lock(&data->irq_mutex);
+
 	list_for_each_entry(entry, &data->function_list, node) {
 		retval = resume_one_function(entry);
-		if (retval < 0)
+		if (retval < 0) {
+			mutex_unlock(&data->irq_mutex);
 			return retval;
+		}
 	}
 
+	mutex_unlock(&data->irq_mutex);
+
 	return 0;
 }
 
-static int enable_sensor(struct rmi_device *rmi_dev)
+int rmi_enable_sensor(struct rmi_device *rmi_dev)
 {
 	int retval = 0;
 
@@ -297,6 +325,7 @@ static int enable_sensor(struct rmi_device *rmi_dev)
 
 	return rmi_process_interrupt_requests(rmi_dev);
 }
+EXPORT_SYMBOL_GPL(rmi_enable_sensor);
 
 /**
  * rmi_driver_set_input_params - set input device id and other data.
@@ -502,10 +531,9 @@ static int rmi_scan_pdt_page(struct rmi_device *rmi_dev,
 					RMI_SCAN_DONE : RMI_SCAN_CONTINUE;
 }
 
-static int rmi_scan_pdt(struct rmi_device *rmi_dev, void *ctx,
-			int (*callback)(struct rmi_device *rmi_dev,
-					void *ctx,
-					const struct pdt_entry *entry))
+int rmi_scan_pdt(struct rmi_device *rmi_dev, void *ctx,
+		 int (*callback)(struct rmi_device *rmi_dev,
+		 void *ctx, const struct pdt_entry *entry))
 {
 	int page;
 	int empty_pages = 0;
@@ -520,6 +548,7 @@ static int rmi_scan_pdt(struct rmi_device *rmi_dev, void *ctx,
 
 	return retval < 0 ? retval : 0;
 }
+EXPORT_SYMBOL_GPL(rmi_scan_pdt);
 
 int rmi_read_register_desc(struct rmi_device *d, u16 addr,
 				struct rmi_register_descriptor *rdesc)
@@ -740,19 +769,15 @@ static int rmi_count_irqs(struct rmi_device *rmi_dev,
 	int *irq_count = ctx;
 
 	*irq_count += pdt->interrupt_source_count;
-	if (pdt->function_number == 0x01) {
+	if (pdt->function_number == 0x01)
 		data->f01_bootloader_mode =
 			rmi_check_bootloader_mode(rmi_dev, pdt);
-		if (data->f01_bootloader_mode)
-			dev_warn(&rmi_dev->dev,
-				"WARNING: RMI4 device is in bootloader mode!\n");
-	}
 
 	return RMI_SCAN_CONTINUE;
 }
 
-static int rmi_initial_reset(struct rmi_device *rmi_dev,
-			     void *ctx, const struct pdt_entry *pdt)
+int rmi_initial_reset(struct rmi_device *rmi_dev, void *ctx,
+		      const struct pdt_entry *pdt)
 {
 	int error;
 
@@ -787,6 +812,7 @@ static int rmi_initial_reset(struct rmi_device *rmi_dev,
 	/* F01 should always be on page 0. If we don't find it there, fail. */
 	return pdt->page_start == 0 ? RMI_SCAN_CONTINUE : -ENODEV;
 }
+EXPORT_SYMBOL_GPL(rmi_initial_reset);
 
 static int rmi_create_function(struct rmi_device *rmi_dev,
 			       void *ctx, const struct pdt_entry *pdt)
@@ -828,6 +854,8 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
 
 	if (pdt->function_number == 0x01)
 		data->f01_container = fn;
+	else if (pdt->function_number == 0x34)
+		data->f34_container = fn;
 
 	list_add_tail(&fn->node, &data->function_list);
 
@@ -893,6 +921,7 @@ static int rmi_driver_remove(struct device *dev)
 
 	disable_irq(irq);
 
+	rmi_f34_remove_sysfs(rmi_dev);
 	rmi_free_function_list(rmi_dev);
 
 	return 0;
@@ -919,13 +948,12 @@ static inline int rmi_driver_of_probe(struct device *dev,
 }
 #endif
 
-static int rmi_probe_interrupts(struct rmi_driver_data *data)
+int rmi_probe_interrupts(struct rmi_driver_data *data)
 {
 	struct rmi_device *rmi_dev = data->rmi_dev;
 	struct device *dev = &rmi_dev->dev;
 	int irq_count;
 	size_t size;
-	void *irq_memory;
 	int retval;
 
 	/*
@@ -941,31 +969,38 @@ static int rmi_probe_interrupts(struct rmi_driver_data *data)
 		dev_err(dev, "IRQ counting failed with code %d.\n", retval);
 		return retval;
 	}
+
+	if (data->f01_bootloader_mode)
+		dev_warn(&rmi_dev->dev, "Device in bootloader mode.\n");
+
 	data->irq_count = irq_count;
 	data->num_of_irq_regs = (data->irq_count + 7) / 8;
 
 	size = BITS_TO_LONGS(data->irq_count) * sizeof(unsigned long);
-	irq_memory = devm_kzalloc(dev, size * 4, GFP_KERNEL);
-	if (!irq_memory) {
+	data->irq_memory = devm_kzalloc(dev, size * 4, GFP_KERNEL);
+	if (!data->irq_memory) {
 		dev_err(dev, "Failed to allocate memory for irq masks.\n");
 		return retval;
 	}
 
-	data->irq_status	= irq_memory + size * 0;
-	data->fn_irq_bits	= irq_memory + size * 1;
-	data->current_irq_mask	= irq_memory + size * 2;
-	data->new_irq_mask	= irq_memory + size * 3;
+	data->irq_status	= data->irq_memory + size * 0;
+	data->fn_irq_bits	= data->irq_memory + size * 1;
+	data->current_irq_mask	= data->irq_memory + size * 2;
+	data->new_irq_mask	= data->irq_memory + size * 3;
 
 	return retval;
 }
+EXPORT_SYMBOL_GPL(rmi_probe_interrupts);
 
-static int rmi_init_functions(struct rmi_driver_data *data)
+int rmi_init_functions(struct rmi_driver_data *data)
 {
 	struct rmi_device *rmi_dev = data->rmi_dev;
 	struct device *dev = &rmi_dev->dev;
 	int irq_count;
 	int retval;
 
+	mutex_lock(&data->irq_mutex);
+
 	irq_count = 0;
 	rmi_dbg(RMI_DEBUG_CORE, dev, "%s: Creating functions.\n", __func__);
 	retval = rmi_scan_pdt(rmi_dev, &irq_count, rmi_create_function);
@@ -990,12 +1025,16 @@ static int rmi_init_functions(struct rmi_driver_data *data)
 		goto err_destroy_functions;
 	}
 
+	mutex_unlock(&data->irq_mutex);
+
 	return 0;
 
 err_destroy_functions:
 	rmi_free_function_list(rmi_dev);
+	mutex_unlock(&data->irq_mutex);
 	return retval;
 }
+EXPORT_SYMBOL_GPL(rmi_init_functions);
 
 static int rmi_driver_probe(struct device *dev)
 {
@@ -1100,6 +1139,10 @@ static int rmi_driver_probe(struct device *dev)
 	if (retval)
 		goto err;
 
+	retval = rmi_f34_create_sysfs(rmi_dev);
+	if (retval)
+		goto err;
+
 	if (data->input) {
 		rmi_driver_set_input_name(rmi_dev, data->input);
 		if (!rmi_dev->xport->input) {
@@ -1117,7 +1160,7 @@ static int rmi_driver_probe(struct device *dev)
 
 	if (data->f01_container->dev.driver)
 		/* Driver already bound, so enable ATTN now. */
-		return enable_sensor(rmi_dev);
+		return rmi_enable_sensor(rmi_dev);
 
 	return 0;
 
diff --git a/drivers/input/rmi4/rmi_driver.h b/drivers/input/rmi4/rmi_driver.h
index 8dfbebe9bf86..e627a3a8aced 100644
--- a/drivers/input/rmi4/rmi_driver.h
+++ b/drivers/input/rmi4/rmi_driver.h
@@ -95,12 +95,36 @@ bool rmi_register_desc_has_subpacket(const struct rmi_register_desc_item *item,
 bool rmi_is_physical_driver(struct device_driver *);
 int rmi_register_physical_driver(void);
 void rmi_unregister_physical_driver(void);
+void rmi_free_function_list(struct rmi_device *rmi_dev);
+int rmi_enable_sensor(struct rmi_device *rmi_dev);
+int rmi_scan_pdt(struct rmi_device *rmi_dev, void *ctx,
+		 int (*callback)(struct rmi_device *rmi_dev, void *ctx,
+		 const struct pdt_entry *entry));
+int rmi_probe_interrupts(struct rmi_driver_data *data);
+int rmi_init_functions(struct rmi_driver_data *data);
+int rmi_initial_reset(struct rmi_device *rmi_dev, void *ctx,
+		      const struct pdt_entry *pdt);
 
 char *rmi_f01_get_product_ID(struct rmi_function *fn);
 
+#ifdef CONFIG_RMI4_F34
+int rmi_f34_create_sysfs(struct rmi_device *rmi_dev);
+void rmi_f34_remove_sysfs(struct rmi_device *rmi_dev);
+#else
+static inline int rmi_f34_create_sysfs(struct rmi_device *rmi_dev)
+{
+	return 0;
+}
+
+static inline void rmi_f34_remove_sysfs(struct rmi_device *rmi_dev)
+{
+}
+#endif /* CONFIG_RMI_F34 */
+
 extern struct rmi_function_handler rmi_f01_handler;
 extern struct rmi_function_handler rmi_f11_handler;
 extern struct rmi_function_handler rmi_f12_handler;
 extern struct rmi_function_handler rmi_f30_handler;
+extern struct rmi_function_handler rmi_f34_handler;
 extern struct rmi_function_handler rmi_f54_handler;
 #endif
diff --git a/drivers/input/rmi4/rmi_f01.c b/drivers/input/rmi4/rmi_f01.c
index 2cfa9f64acfb..cae35c6cde31 100644
--- a/drivers/input/rmi4/rmi_f01.c
+++ b/drivers/input/rmi4/rmi_f01.c
@@ -63,6 +63,8 @@ struct f01_basic_properties {
 #define RMI_F01_STATUS_CODE(status)		((status) & 0x0f)
 /* The device has lost its configuration for some reason. */
 #define RMI_F01_STATUS_UNCONFIGURED(status)	(!!((status) & 0x80))
+/* The device is in bootloader mode */
+#define RMI_F01_STATUS_BOOTLOADER(status)	((status) & 0x40)
 
 /* Control register bits */
 
@@ -594,6 +596,10 @@ static int rmi_f01_attention(struct rmi_function *fn,
 		return error;
 	}
 
+	if (RMI_F01_STATUS_BOOTLOADER(device_status))
+		dev_warn(&fn->dev,
+			 "Device in bootloader mode, please update firmware\n");
+
 	if (RMI_F01_STATUS_UNCONFIGURED(device_status)) {
 		dev_warn(&fn->dev, "Device reset detected.\n");
 		error = rmi_dev->driver->reset_handler(rmi_dev);
diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c
new file mode 100644
index 000000000000..03df85ac91a5
--- /dev/null
+++ b/drivers/input/rmi4/rmi_f34.c
@@ -0,0 +1,481 @@
+/*
+ * Copyright (c) 2007-2016, Synaptics Incorporated
+ * Copyright (C) 2016 Zodiac Inflight Innovations
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/rmi.h>
+#include <linux/firmware.h>
+#include <asm/unaligned.h>
+#include <asm/unaligned.h>
+
+#include "rmi_driver.h"
+#include "rmi_f34.h"
+
+static int rmi_f34_write_bootloader_id(struct f34_data *f34)
+{
+	struct rmi_function *fn = f34->fn;
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	u8 bootloader_id[F34_BOOTLOADER_ID_LEN];
+	int ret;
+
+	ret = rmi_read_block(rmi_dev, fn->fd.query_base_addr,
+			     bootloader_id, sizeof(bootloader_id));
+	if (ret) {
+		dev_err(&fn->dev, "%s: Reading bootloader ID failed: %d\n",
+				__func__, ret);
+		return ret;
+	}
+
+	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s: writing bootloader id '%c%c'\n",
+			__func__, bootloader_id[0], bootloader_id[1]);
+
+	ret = rmi_write_block(rmi_dev,
+			      fn->fd.data_base_addr + F34_BLOCK_DATA_OFFSET,
+			      bootloader_id, sizeof(bootloader_id));
+	if (ret) {
+		dev_err(&fn->dev, "Failed to write bootloader ID: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rmi_f34_command(struct f34_data *f34, u8 command,
+			   unsigned int timeout, bool write_bl_id)
+{
+	struct rmi_function *fn = f34->fn;
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	int ret;
+
+	if (write_bl_id) {
+		ret = rmi_f34_write_bootloader_id(f34);
+		if (ret)
+			return ret;
+	}
+
+	init_completion(&f34->v5.cmd_done);
+
+	ret = rmi_read(rmi_dev, f34->v5.ctrl_address, &f34->v5.status);
+	if (ret) {
+		dev_err(&f34->fn->dev,
+			"%s: Failed to read cmd register: %d (command %#02x)\n",
+			__func__, ret, command);
+		return ret;
+	}
+
+	f34->v5.status |= command & 0x0f;
+
+	ret = rmi_write(rmi_dev, f34->v5.ctrl_address, f34->v5.status);
+	if (ret < 0) {
+		dev_err(&f34->fn->dev,
+			"Failed to write F34 command %#02x: %d\n",
+			command, ret);
+		return ret;
+	}
+
+	if (!wait_for_completion_timeout(&f34->v5.cmd_done,
+				msecs_to_jiffies(timeout))) {
+
+		ret = rmi_read(rmi_dev, f34->v5.ctrl_address, &f34->v5.status);
+		if (ret) {
+			dev_err(&f34->fn->dev,
+				"%s: cmd %#02x timed out: %d\n",
+				__func__, command, ret);
+			return ret;
+		}
+
+		if (f34->v5.status & 0x7f) {
+			dev_err(&f34->fn->dev,
+				"%s: cmd %#02x timed out, status: %#02x\n",
+				__func__, command, f34->v5.status);
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+static int rmi_f34_attention(struct rmi_function *fn, unsigned long *irq_bits)
+{
+	struct f34_data *f34 = dev_get_drvdata(&fn->dev);
+	int ret;
+
+	ret = rmi_read(f34->fn->rmi_dev, f34->v5.ctrl_address, &f34->v5.status);
+	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s: status: %#02x, ret: %d\n",
+		__func__, f34->v5.status, ret);
+
+	if (!ret && !(f34->v5.status & 0x7f))
+		complete(&f34->v5.cmd_done);
+
+	return 0;
+}
+
+static int rmi_f34_write_blocks(struct f34_data *f34, const void *data,
+				int block_count, u8 command)
+{
+	struct rmi_function *fn = f34->fn;
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	u16 address = fn->fd.data_base_addr + F34_BLOCK_DATA_OFFSET;
+	u8 start_address[] = { 0, 0 };
+	int i;
+	int ret;
+
+	ret = rmi_write_block(rmi_dev, fn->fd.data_base_addr,
+			      start_address, sizeof(start_address));
+	if (ret) {
+		dev_err(&fn->dev, "Failed to write initial zeros: %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < block_count; i++) {
+		ret = rmi_write_block(rmi_dev, address,
+				      data, f34->v5.block_size);
+		if (ret) {
+			dev_err(&fn->dev,
+				"failed to write block #%d: %d\n", i, ret);
+			return ret;
+		}
+
+		ret = rmi_f34_command(f34, command, F34_IDLE_WAIT_MS, false);
+		if (ret) {
+			dev_err(&fn->dev,
+				"Failed to write command for block #%d: %d\n",
+				i, ret);
+			return ret;
+		}
+
+		rmi_dbg(RMI_DEBUG_FN, &fn->dev, "wrote block %d of %d\n",
+			i + 1, block_count);
+
+		data += f34->v5.block_size;
+	}
+
+	return 0;
+}
+
+static int rmi_f34_write_firmware(struct f34_data *f34, const void *data)
+{
+	return rmi_f34_write_blocks(f34, data, f34->v5.fw_blocks,
+				    F34_WRITE_FW_BLOCK);
+}
+
+static int rmi_f34_write_config(struct f34_data *f34, const void *data)
+{
+	return rmi_f34_write_blocks(f34, data, f34->v5.config_blocks,
+				    F34_WRITE_CONFIG_BLOCK);
+}
+
+int rmi_f34_enable_flash(struct f34_data *f34)
+{
+	return rmi_f34_command(f34, F34_ENABLE_FLASH_PROG,
+			       F34_ENABLE_WAIT_MS, true);
+}
+
+static int rmi_f34_flash_firmware(struct f34_data *f34,
+				  const struct rmi_f34_firmware *syn_fw)
+{
+	struct rmi_function *fn = f34->fn;
+	int ret;
+
+	if (syn_fw->image_size) {
+		dev_info(&fn->dev, "Erasing firmware...\n");
+		ret = rmi_f34_command(f34, F34_ERASE_ALL,
+				      F34_ERASE_WAIT_MS, true);
+		if (ret)
+			return ret;
+
+		dev_info(&fn->dev, "Writing firmware (%d bytes)...\n",
+			 syn_fw->image_size);
+		ret = rmi_f34_write_firmware(f34, syn_fw->data);
+		if (ret)
+			return ret;
+	}
+
+	if (syn_fw->config_size) {
+		/*
+		 * We only need to erase config if we haven't updated
+		 * firmware.
+		 */
+		if (!syn_fw->image_size) {
+			dev_info(&fn->dev, "Erasing config...\n");
+			ret = rmi_f34_command(f34, F34_ERASE_CONFIG,
+					      F34_ERASE_WAIT_MS, true);
+			if (ret)
+				return ret;
+		}
+
+		dev_info(&fn->dev, "Writing config (%d bytes)...\n",
+			 syn_fw->config_size);
+		ret = rmi_f34_write_config(f34,
+				&syn_fw->data[syn_fw->image_size]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int rmi_f34_update_firmware(struct f34_data *f34, const struct firmware *fw)
+{
+	const struct rmi_f34_firmware *syn_fw;
+	int ret;
+
+	syn_fw = (const struct rmi_f34_firmware *)fw->data;
+	BUILD_BUG_ON(offsetof(struct rmi_f34_firmware, data) !=
+			F34_FW_IMAGE_OFFSET);
+
+	rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+		"FW size:%d, checksum:%08x, image_size:%d, config_size:%d\n",
+		(int)fw->size,
+		le32_to_cpu(syn_fw->checksum),
+		le32_to_cpu(syn_fw->image_size),
+		le32_to_cpu(syn_fw->config_size));
+
+	rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+		"FW bootloader_id:%02x, product_id:%.*s, info: %02x%02x\n",
+		syn_fw->bootloader_version,
+		(int)sizeof(syn_fw->product_id), syn_fw->product_id,
+		syn_fw->product_info[0], syn_fw->product_info[1]);
+
+	if (syn_fw->image_size &&
+	    syn_fw->image_size != f34->v5.fw_blocks * f34->v5.block_size) {
+		dev_err(&f34->fn->dev,
+			"Bad firmware image: fw size %d, expected %d\n",
+			syn_fw->image_size,
+			f34->v5.fw_blocks * f34->v5.block_size);
+		ret = -EILSEQ;
+		goto out;
+	}
+
+	if (syn_fw->config_size &&
+	    syn_fw->config_size != f34->v5.config_blocks * f34->v5.block_size) {
+		dev_err(&f34->fn->dev,
+			"Bad firmware image: config size %d, expected %d\n",
+			syn_fw->config_size,
+			f34->v5.config_blocks * f34->v5.block_size);
+		ret = -EILSEQ;
+		goto out;
+	}
+
+	if (syn_fw->image_size && !syn_fw->config_size) {
+		dev_err(&f34->fn->dev, "Bad firmware image: no config data\n");
+		ret = -EILSEQ;
+		goto out;
+	}
+
+	dev_info(&f34->fn->dev, "Firmware image OK\n");
+	mutex_lock(&f34->v5.flash_mutex);
+
+	ret = rmi_f34_flash_firmware(f34, syn_fw);
+
+	mutex_unlock(&f34->v5.flash_mutex);
+
+out:
+	return ret;
+}
+
+static int rmi_firmware_update(struct rmi_driver_data *data,
+			       const struct firmware *fw)
+{
+	struct device *dev = &data->rmi_dev->dev;
+	struct f34_data *f34;
+	int ret;
+
+	if (!data->f34_container) {
+		dev_warn(dev, "%s: No F34 present!\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Only version 0 currently supported */
+	if (data->f34_container->fd.function_version != 0) {
+		dev_warn(dev, "F34 V%d not supported!\n",
+			 data->f34_container->fd.function_version);
+		return -ENODEV;
+	}
+
+	f34 = dev_get_drvdata(&data->f34_container->dev);
+
+	/* Enter flash mode */
+	ret = rmi_f34_enable_flash(f34);
+	if (ret)
+		return ret;
+
+	/* Tear down functions and re-probe */
+	rmi_free_function_list(data->rmi_dev);
+
+	ret = rmi_probe_interrupts(data);
+	if (ret)
+		return ret;
+
+	ret = rmi_init_functions(data);
+	if (ret)
+		return ret;
+
+	if (!data->f01_bootloader_mode || !data->f34_container) {
+		dev_warn(dev, "%s: No F34 present or not in bootloader!\n",
+				__func__);
+		return -EINVAL;
+	}
+
+	f34 = dev_get_drvdata(&data->f34_container->dev);
+
+	/* Perform firmware update */
+	ret = rmi_f34_update_firmware(f34, fw);
+
+	dev_info(&f34->fn->dev, "Firmware update complete, status:%d\n", ret);
+
+	/* Re-probe */
+	rmi_dbg(RMI_DEBUG_FN, dev, "Re-probing device\n");
+	rmi_free_function_list(data->rmi_dev);
+
+	ret = rmi_scan_pdt(data->rmi_dev, NULL, rmi_initial_reset);
+	if (ret < 0)
+		dev_warn(dev, "RMI reset failed!\n");
+
+	ret = rmi_probe_interrupts(data);
+	if (ret)
+		return ret;
+
+	ret = rmi_init_functions(data);
+	if (ret)
+		return ret;
+
+	if (data->f01_container->dev.driver)
+		/* Driver already bound, so enable ATTN now. */
+		return rmi_enable_sensor(data->rmi_dev);
+
+	rmi_dbg(RMI_DEBUG_FN, dev, "%s complete\n", __func__);
+
+	return ret;
+}
+
+static ssize_t rmi_driver_update_fw_store(struct device *dev,
+					  struct device_attribute *dattr,
+					  const char *buf, size_t count)
+{
+	struct rmi_driver_data *data = dev_get_drvdata(dev);
+	char fw_name[NAME_MAX];
+	const struct firmware *fw;
+	size_t copy_count = count;
+	int ret;
+
+	if (count == 0 || count >= NAME_MAX)
+		return -EINVAL;
+
+	if (buf[count - 1] == '\0' || buf[count - 1] == '\n')
+		copy_count -= 1;
+
+	strncpy(fw_name, buf, copy_count);
+	fw_name[copy_count] = '\0';
+
+	ret = request_firmware(&fw, fw_name, dev);
+	if (ret)
+		return ret;
+
+	dev_info(dev, "Flashing %s\n", fw_name);
+
+	ret = rmi_firmware_update(data, fw);
+
+	release_firmware(fw);
+
+	return ret ?: count;
+}
+
+static DEVICE_ATTR(update_fw, 0200, NULL, rmi_driver_update_fw_store);
+
+static struct attribute *rmi_firmware_attrs[] = {
+	&dev_attr_update_fw.attr,
+	NULL
+};
+
+static struct attribute_group rmi_firmware_attr_group = {
+	.attrs = rmi_firmware_attrs,
+};
+
+static int rmi_f34_probe(struct rmi_function *fn)
+{
+	struct f34_data *f34;
+	unsigned char f34_queries[9];
+	bool has_config_id;
+	int ret;
+
+	f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL);
+	if (!f34)
+		return -ENOMEM;
+
+	f34->fn = fn;
+	dev_set_drvdata(&fn->dev, f34);
+
+	ret = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
+			     f34_queries, sizeof(f34_queries));
+	if (ret) {
+		dev_err(&fn->dev, "%s: Failed to query properties\n",
+			__func__);
+		return ret;
+	}
+
+	snprintf(f34->bootloader_id, sizeof(f34->bootloader_id),
+		 "%c%c", f34_queries[0], f34_queries[1]);
+
+	mutex_init(&f34->v5.flash_mutex);
+	init_completion(&f34->v5.cmd_done);
+
+	f34->v5.block_size = get_unaligned_le16(&f34_queries[3]);
+	f34->v5.fw_blocks = get_unaligned_le16(&f34_queries[5]);
+	f34->v5.config_blocks = get_unaligned_le16(&f34_queries[7]);
+	f34->v5.ctrl_address = fn->fd.data_base_addr + F34_BLOCK_DATA_OFFSET +
+		f34->v5.block_size;
+	has_config_id = f34_queries[2] & (1 << 2);
+
+	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Bootloader ID: %s\n",
+		f34->bootloader_id);
+	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Block size: %d\n",
+		f34->v5.block_size);
+	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "FW blocks: %d\n",
+		f34->v5.fw_blocks);
+	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "CFG blocks: %d\n",
+		f34->v5.config_blocks);
+
+	if (has_config_id) {
+		ret = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr,
+				     f34_queries, sizeof(f34_queries));
+		if (ret) {
+			dev_err(&fn->dev, "Failed to read F34 config ID\n");
+			return ret;
+		}
+
+		snprintf(f34->configuration_id, sizeof(f34->configuration_id),
+			 "%02x%02x%02x%02x",
+			 f34_queries[0], f34_queries[1],
+			 f34_queries[2], f34_queries[3]);
+
+		rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Configuration ID: %s\n",
+			 f34->configuration_id);
+	}
+
+	return 0;
+}
+
+int rmi_f34_create_sysfs(struct rmi_device *rmi_dev)
+{
+	return sysfs_create_group(&rmi_dev->dev.kobj, &rmi_firmware_attr_group);
+}
+
+void rmi_f34_remove_sysfs(struct rmi_device *rmi_dev)
+{
+	sysfs_remove_group(&rmi_dev->dev.kobj, &rmi_firmware_attr_group);
+}
+
+struct rmi_function_handler rmi_f34_handler = {
+	.driver = {
+		.name = "rmi4_f34",
+	},
+	.func = 0x34,
+	.probe = rmi_f34_probe,
+	.attention = rmi_f34_attention,
+};
diff --git a/drivers/input/rmi4/rmi_f34.h b/drivers/input/rmi4/rmi_f34.h
new file mode 100644
index 000000000000..6cee5282fbb4
--- /dev/null
+++ b/drivers/input/rmi4/rmi_f34.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2007-2016, Synaptics Incorporated
+ * Copyright (C) 2016 Zodiac Inflight Innovations
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _RMI_F34_H
+#define _RMI_F34_H
+
+/* F34 image file offsets. */
+#define F34_FW_IMAGE_OFFSET	0x100
+
+/* F34 register offsets. */
+#define F34_BLOCK_DATA_OFFSET	2
+
+/* F34 commands */
+#define F34_WRITE_FW_BLOCK	0x2
+#define F34_ERASE_ALL		0x3
+#define F34_READ_CONFIG_BLOCK	0x5
+#define F34_WRITE_CONFIG_BLOCK	0x6
+#define F34_ERASE_CONFIG	0x7
+#define F34_ENABLE_FLASH_PROG	0xf
+
+#define F34_STATUS_IN_PROGRESS	0xff
+#define F34_STATUS_IDLE		0x80
+
+#define F34_IDLE_WAIT_MS	500
+#define F34_ENABLE_WAIT_MS	300
+#define F34_ERASE_WAIT_MS	5000
+
+#define F34_BOOTLOADER_ID_LEN	2
+
+struct rmi_f34_firmware {
+	__le32 checksum;
+	u8 pad1[3];
+	u8 bootloader_version;
+	__le32 image_size;
+	__le32 config_size;
+	u8 product_id[10];
+	u8 product_info[2];
+	u8 pad2[228];
+	u8 data[];
+};
+
+struct f34v5_data {
+	u16 block_size;
+	u16 fw_blocks;
+	u16 config_blocks;
+	u16 ctrl_address;
+	u8 status;
+
+	struct completion cmd_done;
+	struct mutex flash_mutex;
+};
+
+struct f34_data {
+	struct rmi_function *fn;
+
+	unsigned char bootloader_id[5];
+	unsigned char configuration_id[9];
+
+	struct f34v5_data v5;
+};
+
+#endif /* _RMI_F34_H */
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index ac904bb439a5..4096b0246c23 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -337,11 +337,13 @@ struct rmi_driver_data {
 	struct rmi_device *rmi_dev;
 
 	struct rmi_function *f01_container;
+	struct rmi_function *f34_container;
 	bool f01_bootloader_mode;
 
 	u32 attn_count;
 	int num_of_irq_regs;
 	int irq_count;
+	void *irq_memory;
 	unsigned long *irq_status;
 	unsigned long *fn_irq_bits;
 	unsigned long *current_irq_mask;
-- 
cgit v1.2.3


From c762cc68b6a12eedebefc156ea4838e54804e2eb Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 22 Nov 2016 17:57:02 -0800
Subject: Input: synaptics-rmi4 - propagate correct number of rx and tx
 electrodes to F54

F54 diagnostics report functions provide data based on the number of
enabled rx and tx electrodes, which is not identical to the number of
electrodes reported with F54:Query0 and F54:Query1. Those values report
the number of supported electrodes, not the number of enabled electrodes.
The number of enabled electrodes can be determined by analyzing F55:Ctrl1
(sensor receiver assignment) and F55:Ctrl2 (sensor transmitter assignment).

Propagate the number of enabled electrodes from F55 to F54 to avoid
corrupted output if not all electrodes are enabled.

Fixes: 3a762dbd5347 ("[media] Input: synaptics-rmi4 - add support for F54 ...")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Nick Dyer <nick@shmanahar.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/Kconfig   |  1 +
 drivers/input/rmi4/rmi_f54.c | 14 ++++++++++----
 drivers/input/rmi4/rmi_f55.c |  7 +++++++
 include/linux/rmi.h          |  3 +++
 4 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig
index f4460f3ebac5..a9c36a5fe708 100644
--- a/drivers/input/rmi4/Kconfig
+++ b/drivers/input/rmi4/Kconfig
@@ -90,6 +90,7 @@ config RMI4_F54
 	depends on RMI4_CORE
 	depends on VIDEO_V4L2=y || (RMI4_CORE=m && VIDEO_V4L2=m)
 	select VIDEOBUF2_VMALLOC
+	select RMI4_F55
 	help
 	  Say Y here if you want to add support for RMI4 function 54
 
diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c
index 2e934aef3d2a..dea63e2db3e6 100644
--- a/drivers/input/rmi4/rmi_f54.c
+++ b/drivers/input/rmi4/rmi_f54.c
@@ -217,8 +217,10 @@ unlock:
 
 static size_t rmi_f54_get_report_size(struct f54_data *f54)
 {
-	u8 rx = f54->num_rx_electrodes ? : f54->num_rx_electrodes;
-	u8 tx = f54->num_tx_electrodes ? : f54->num_tx_electrodes;
+	struct rmi_device *rmi_dev = f54->fn->rmi_dev;
+	struct rmi_driver_data *drv_data = dev_get_drvdata(&rmi_dev->dev);
+	u8 rx = drv_data->num_rx_electrodes ? : f54->num_rx_electrodes;
+	u8 tx = drv_data->num_tx_electrodes ? : f54->num_tx_electrodes;
 	size_t size;
 
 	switch (rmi_f54_get_reptype(f54, f54->input)) {
@@ -402,6 +404,10 @@ static int rmi_f54_vidioc_enum_input(struct file *file, void *priv,
 
 static int rmi_f54_set_input(struct f54_data *f54, unsigned int i)
 {
+	struct rmi_device *rmi_dev = f54->fn->rmi_dev;
+	struct rmi_driver_data *drv_data = dev_get_drvdata(&rmi_dev->dev);
+	u8 rx = drv_data->num_rx_electrodes ? : f54->num_rx_electrodes;
+	u8 tx = drv_data->num_tx_electrodes ? : f54->num_tx_electrodes;
 	struct v4l2_pix_format *f = &f54->format;
 	enum rmi_f54_report_type reptype;
 	int ret;
@@ -416,8 +422,8 @@ static int rmi_f54_set_input(struct f54_data *f54, unsigned int i)
 
 	f54->input = i;
 
-	f->width = f54->num_rx_electrodes;
-	f->height = f54->num_tx_electrodes;
+	f->width = rx;
+	f->height = tx;
 	f->field = V4L2_FIELD_NONE;
 	f->colorspace = V4L2_COLORSPACE_RAW;
 	f->bytesperline = f->width * sizeof(u16);
diff --git a/drivers/input/rmi4/rmi_f55.c b/drivers/input/rmi4/rmi_f55.c
index 2d221cc97391..37390ca6a924 100644
--- a/drivers/input/rmi4/rmi_f55.c
+++ b/drivers/input/rmi4/rmi_f55.c
@@ -38,6 +38,8 @@ struct f55_data {
 
 static int rmi_f55_detect(struct rmi_function *fn)
 {
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	struct rmi_driver_data *drv_data = dev_get_drvdata(&rmi_dev->dev);
 	struct f55_data *f55;
 	int error;
 
@@ -57,6 +59,9 @@ static int rmi_f55_detect(struct rmi_function *fn)
 	f55->cfg_num_rx_electrodes = f55->num_rx_electrodes;
 	f55->cfg_num_tx_electrodes = f55->num_rx_electrodes;
 
+	drv_data->num_rx_electrodes = f55->cfg_num_rx_electrodes;
+	drv_data->num_tx_electrodes = f55->cfg_num_rx_electrodes;
+
 	if (f55->qry[F55_PHYS_CHAR_OFFSET] & F55_CAP_SENSOR_ASSIGN) {
 		int i, total;
 		u8 buf[256];
@@ -78,6 +83,7 @@ static int rmi_f55_detect(struct rmi_function *fn)
 					total++;
 			}
 			f55->cfg_num_rx_electrodes = total;
+			drv_data->num_rx_electrodes = total;
 		}
 
 		error = rmi_read_block(fn->rmi_dev,
@@ -90,6 +96,7 @@ static int rmi_f55_detect(struct rmi_function *fn)
 					total++;
 			}
 			f55->cfg_num_tx_electrodes = total;
+			drv_data->num_tx_electrodes = total;
 		}
 	}
 
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index 4096b0246c23..8499b6aa2221 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -354,6 +354,9 @@ struct rmi_driver_data {
 	u8 pdt_props;
 	u8 bsr;
 
+	u8 num_rx_electrodes;
+	u8 num_tx_electrodes;
+
 	bool enabled;
 
 	void *data;
-- 
cgit v1.2.3


From 4d0fe7490d7f4d61b582acbae718328284f151b9 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <edubezval@gmail.com>
Date: Mon, 7 Nov 2016 21:08:52 -0800
Subject: thermal: core: move trips attributes to tz->device.groups

Finally, move the last thermal zone sysfs attributes to
tz->device.groups: trips attributes. This requires adding a
attribute_group to thermal_zone_device, creating it dynamically, and
then setting all trips attributes in it. The trips attribute is then
added to the tz->device.groups.

As the removal of all attributes are handled by device core, the device
remove calls are not needed anymore.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_core.c | 86 +++++++++++++++++++++++-------------------
 include/linux/thermal.h        |  2 +
 2 files changed, 50 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 971f033368b2..b1d6de9185f4 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1251,8 +1251,13 @@ static const struct attribute_group *thermal_zone_attribute_groups[] = {
  */
 static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
 {
-	int indx;
 	int size = sizeof(struct thermal_attr) * tz->trips;
+	struct attribute **attrs;
+	int indx;
+
+	/* This function works only for zones with at least one trip */
+	if (tz->trips <= 0)
+		return -EINVAL;
 
 	tz->trip_type_attrs = kzalloc(size, GFP_KERNEL);
 	if (!tz->trip_type_attrs)
@@ -1273,6 +1278,15 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
 		}
 	}
 
+	attrs = kzalloc(sizeof(*attrs) * (tz->trips * 3 + 1), GFP_KERNEL);
+	if (!attrs) {
+		kfree(tz->trip_type_attrs);
+		kfree(tz->trip_temp_attrs);
+		if (tz->ops->get_trip_hyst)
+			kfree(tz->trip_hyst_attrs);
+		return -ENOMEM;
+	}
+
 	for (indx = 0; indx < tz->trips; indx++) {
 		/* create trip type attribute */
 		snprintf(tz->trip_type_attrs[indx].name, THERMAL_NAME_LENGTH,
@@ -1283,9 +1297,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
 						tz->trip_type_attrs[indx].name;
 		tz->trip_type_attrs[indx].attr.attr.mode = S_IRUGO;
 		tz->trip_type_attrs[indx].attr.show = trip_point_type_show;
-
-		device_create_file(&tz->device,
-				   &tz->trip_type_attrs[indx].attr);
+		attrs[indx] = &tz->trip_type_attrs[indx].attr.attr;
 
 		/* create trip temp attribute */
 		snprintf(tz->trip_temp_attrs[indx].name, THERMAL_NAME_LENGTH,
@@ -1302,9 +1314,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
 			tz->trip_temp_attrs[indx].attr.store =
 							trip_point_temp_store;
 		}
-
-		device_create_file(&tz->device,
-				   &tz->trip_temp_attrs[indx].attr);
+		attrs[indx + tz->trips] = &tz->trip_temp_attrs[indx].attr.attr;
 
 		/* create Optional trip hyst attribute */
 		if (!tz->ops->get_trip_hyst)
@@ -1322,45 +1332,43 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
 			tz->trip_hyst_attrs[indx].attr.store =
 					trip_point_hyst_store;
 		}
-
-		device_create_file(&tz->device,
-				   &tz->trip_hyst_attrs[indx].attr);
+		attrs[indx + tz->trips * 2] =
+					&tz->trip_hyst_attrs[indx].attr.attr;
 	}
-	return 0;
-}
+	attrs[tz->trips * 3] = NULL;
 
-static void remove_trip_attrs(struct thermal_zone_device *tz)
-{
-	int indx;
+	tz->trips_attribute_group.attrs = attrs;
 
-	for (indx = 0; indx < tz->trips; indx++) {
-		device_remove_file(&tz->device,
-				   &tz->trip_type_attrs[indx].attr);
-		device_remove_file(&tz->device,
-				   &tz->trip_temp_attrs[indx].attr);
-		if (tz->ops->get_trip_hyst)
-			device_remove_file(&tz->device,
-					   &tz->trip_hyst_attrs[indx].attr);
-	}
-	kfree(tz->trip_type_attrs);
-	kfree(tz->trip_temp_attrs);
-	kfree(tz->trip_hyst_attrs);
+	return 0;
 }
 
-static int thermal_zone_create_device_groups(struct thermal_zone_device *tz)
+static int thermal_zone_create_device_groups(struct thermal_zone_device *tz,
+					     int mask)
 {
 	const struct attribute_group **groups;
-	int i, size;
+	int i, size, result;
 
-	size = ARRAY_SIZE(thermal_zone_attribute_groups) + 1;
+	/* we need one extra for trips and the NULL to terminate the array */
+	size = ARRAY_SIZE(thermal_zone_attribute_groups) + 2;
 	/* This also takes care of API requirement to be NULL terminated */
 	groups = kcalloc(size, sizeof(*groups), GFP_KERNEL);
 	if (!groups)
 		return -ENOMEM;
 
-	for (i = 0; i < size - 1; i++)
+	for (i = 0; i < size - 2; i++)
 		groups[i] = thermal_zone_attribute_groups[i];
 
+	if (tz->trips) {
+		result = create_trip_attrs(tz, mask);
+		if (result) {
+			kfree(groups);
+
+			return result;
+		}
+
+		groups[size - 2] = &tz->trips_attribute_group;
+	}
+
 	tz->device.groups = groups;
 
 	return 0;
@@ -2000,8 +2008,12 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	tz->passive_delay = passive_delay;
 	tz->polling_delay = polling_delay;
 
+	/* sys I/F */
 	/* Add nodes that are always present via .groups */
-	thermal_zone_create_device_groups(tz);
+	result = thermal_zone_create_device_groups(tz, mask);
+	if (result)
+		goto unregister;
+
 	/* A new thermal zone needs to be updated anyway. */
 	atomic_set(&tz->need_update, 1);
 
@@ -2013,11 +2025,6 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 		return ERR_PTR(result);
 	}
 
-	/* sys I/F */
-	result = create_trip_attrs(tz, mask);
-	if (result)
-		goto unregister;
-
 	for (count = 0; count < trips; count++) {
 		if (tz->ops->get_trip_type(tz, count, &trip_type))
 			set_bit(count, &tz->trips_disabled);
@@ -2122,7 +2129,10 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 
 	thermal_zone_device_set_polling(tz, 0);
 
-	remove_trip_attrs(tz);
+	kfree(tz->trip_type_attrs);
+	kfree(tz->trip_temp_attrs);
+	kfree(tz->trip_hyst_attrs);
+	kfree(tz->trips_attribute_group.attrs);
 	thermal_set_governor(tz, NULL);
 
 	thermal_remove_hwmon_sysfs(tz);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 511182a88e76..e275e98bdceb 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -28,6 +28,7 @@
 #include <linux/of.h>
 #include <linux/idr.h>
 #include <linux/device.h>
+#include <linux/sysfs.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/thermal.h>
 
@@ -204,6 +205,7 @@ struct thermal_zone_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
 	struct device device;
+	struct attribute_group trips_attribute_group;
 	struct thermal_attr *trip_temp_attrs;
 	struct thermal_attr *trip_type_attrs;
 	struct thermal_attr *trip_hyst_attrs;
-- 
cgit v1.2.3


From 1cea4e7776b7e9096b696eeb66364f51a8d321ec Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Thu, 15 Sep 2016 15:44:22 +0100
Subject: devfreq_cooling: make the structs devfreq_cooling_xxx visible for all
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the protection #ifdef CONFIG_DEVFREQ_THERMAL cuts the needed
structures devfreq_cooling_ops and devfreq_cooling_device.
The functions which are supposed to provide the empty implementation complain
about unknown structures.
Similar solution is present in include/linux/devfreq.h.

Reviewed-by: Ørjan Eide <orjan.eide@arm.com>
Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/devfreq_cooling.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
index 7adf6cc4b305..3049f9422507 100644
--- a/include/linux/devfreq_cooling.h
+++ b/include/linux/devfreq_cooling.h
@@ -20,7 +20,6 @@
 #include <linux/devfreq.h>
 #include <linux/thermal.h>
 
-#ifdef CONFIG_DEVFREQ_THERMAL
 
 /**
  * struct devfreq_cooling_power - Devfreq cooling power ops
@@ -43,6 +42,8 @@ struct devfreq_cooling_power {
 	unsigned long dyn_power_coeff;
 };
 
+#ifdef CONFIG_DEVFREQ_THERMAL
+
 struct thermal_cooling_device *
 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 				  struct devfreq_cooling_power *dfc_power);
-- 
cgit v1.2.3


From 3aa5374376746f20a27be8682be7f91c8b71d1d8 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Thu, 15 Sep 2016 15:44:23 +0100
Subject: devfreq_cooling: pass a pointer to devfreq in the power model
 callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the devfreq cooling device was designed, it was an oversight not to
pass a pointer to the struct devfreq as the first parameters of the
callbacks.  The design patterns of the kernel suggest it for a good
reason.

By passing a pointer to struct devfreq, the driver can register one
function that works with multiple devices.  With the current
implementation, a driver that can work with multiple devices has to
create multiple copies of the same function with different parameters so
that each devfreq_cooling_device can use the appropriate one.  By
passing a pointer to struct devfreq, the driver can identify which
device it's referring to.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Reviewed-by: Punit Agrawal <punit.agrawal@arm.com>
Reviewed-by: Ørjan Eide <orjan.eide@arm.com>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/devfreq_cooling.c | 5 +++--
 include/linux/devfreq_cooling.h   | 6 ++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 81631b110e17..5a737fd5f1aa 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -238,7 +238,7 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 		return 0;
 	}
 
-	return dfc->power_ops->get_static_power(voltage);
+	return dfc->power_ops->get_static_power(df, voltage);
 }
 
 /**
@@ -262,7 +262,8 @@ get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq,
 	struct devfreq_cooling_power *dfc_power = dfc->power_ops;
 
 	if (dfc_power->get_dynamic_power)
-		return dfc_power->get_dynamic_power(freq, voltage);
+		return dfc_power->get_dynamic_power(dfc->devfreq, freq,
+						    voltage);
 
 	freq_mhz = freq / 1000000;
 	power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage;
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
index 3049f9422507..c35d0c0e0ada 100644
--- a/include/linux/devfreq_cooling.h
+++ b/include/linux/devfreq_cooling.h
@@ -36,8 +36,10 @@
  *			@dyn_power_coeff * frequency * voltage^2
  */
 struct devfreq_cooling_power {
-	unsigned long (*get_static_power)(unsigned long voltage);
-	unsigned long (*get_dynamic_power)(unsigned long freq,
+	unsigned long (*get_static_power)(struct devfreq *devfreq,
+					  unsigned long voltage);
+	unsigned long (*get_dynamic_power)(struct devfreq *devfreq,
+					   unsigned long freq,
 					   unsigned long voltage);
 	unsigned long dyn_power_coeff;
 };
-- 
cgit v1.2.3


From ba6379f7e6c7e51b3c0e92672bc61bb6961c2b5e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 23 Nov 2016 12:53:00 +0100
Subject: fs: Provide function to get superblock with exclusive s_umount

Quota code will need a variant of get_super_thawed() that returns
superblock with s_umount held in exclusive mode to serialize quota on
and quota off operations. Provide this functionality.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/super.c         | 80 ++++++++++++++++++++++++++++++++++++++++--------------
 include/linux/fs.h |  2 ++
 2 files changed, 62 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/super.c b/fs/super.c
index c183835566c1..f7f724230e2b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -558,6 +558,13 @@ void drop_super(struct super_block *sb)
 
 EXPORT_SYMBOL(drop_super);
 
+void drop_super_exclusive(struct super_block *sb)
+{
+	up_write(&sb->s_umount);
+	put_super(sb);
+}
+EXPORT_SYMBOL(drop_super_exclusive);
+
 /**
  *	iterate_supers - call function for all active superblocks
  *	@f: function to call
@@ -628,15 +635,7 @@ void iterate_supers_type(struct file_system_type *type,
 
 EXPORT_SYMBOL(iterate_supers_type);
 
-/**
- *	get_super - get the superblock of a device
- *	@bdev: device to get the superblock for
- *	
- *	Scans the superblock list and finds the superblock of the file system
- *	mounted on the device given. %NULL is returned if no match is found.
- */
-
-struct super_block *get_super(struct block_device *bdev)
+static struct super_block *__get_super(struct block_device *bdev, bool excl)
 {
 	struct super_block *sb;
 
@@ -651,11 +650,17 @@ rescan:
 		if (sb->s_bdev == bdev) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&sb->s_umount);
+			if (!excl)
+				down_read(&sb->s_umount);
+			else
+				down_write(&sb->s_umount);
 			/* still alive? */
 			if (sb->s_root && (sb->s_flags & MS_BORN))
 				return sb;
-			up_read(&sb->s_umount);
+			if (!excl)
+				up_read(&sb->s_umount);
+			else
+				up_write(&sb->s_umount);
 			/* nope, got unmounted */
 			spin_lock(&sb_lock);
 			__put_super(sb);
@@ -666,31 +671,66 @@ rescan:
 	return NULL;
 }
 
-EXPORT_SYMBOL(get_super);
-
 /**
- *	get_super_thawed - get thawed superblock of a device
+ *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
  *
  *	Scans the superblock list and finds the superblock of the file system
- *	mounted on the device. The superblock is returned once it is thawed
- *	(or immediately if it was not frozen). %NULL is returned if no match
- *	is found.
+ *	mounted on the device given. %NULL is returned if no match is found.
  */
-struct super_block *get_super_thawed(struct block_device *bdev)
+struct super_block *get_super(struct block_device *bdev)
+{
+	return __get_super(bdev, false);
+}
+EXPORT_SYMBOL(get_super);
+
+static struct super_block *__get_super_thawed(struct block_device *bdev,
+					      bool excl)
 {
 	while (1) {
-		struct super_block *s = get_super(bdev);
+		struct super_block *s = __get_super(bdev, excl);
 		if (!s || s->s_writers.frozen == SB_UNFROZEN)
 			return s;
-		up_read(&s->s_umount);
+		if (!excl)
+			up_read(&s->s_umount);
+		else
+			up_write(&s->s_umount);
 		wait_event(s->s_writers.wait_unfrozen,
 			   s->s_writers.frozen == SB_UNFROZEN);
 		put_super(s);
 	}
 }
+
+/**
+ *	get_super_thawed - get thawed superblock of a device
+ *	@bdev: device to get the superblock for
+ *
+ *	Scans the superblock list and finds the superblock of the file system
+ *	mounted on the device. The superblock is returned once it is thawed
+ *	(or immediately if it was not frozen). %NULL is returned if no match
+ *	is found.
+ */
+struct super_block *get_super_thawed(struct block_device *bdev)
+{
+	return __get_super_thawed(bdev, false);
+}
 EXPORT_SYMBOL(get_super_thawed);
 
+/**
+ *	get_super_exclusive_thawed - get thawed superblock of a device
+ *	@bdev: device to get the superblock for
+ *
+ *	Scans the superblock list and finds the superblock of the file system
+ *	mounted on the device. The superblock is returned once it is thawed
+ *	(or immediately if it was not frozen) and s_umount semaphore is held
+ *	in exclusive mode. %NULL is returned if no match is found.
+ */
+struct super_block *get_super_exclusive_thawed(struct block_device *bdev)
+{
+	return __get_super_thawed(bdev, true);
+}
+EXPORT_SYMBOL(get_super_exclusive_thawed);
+
 /**
  * get_active_super - get an active reference to the superblock of a device
  * @bdev: device to get the superblock for
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dc0478c07b2a..d04cfdefcd11 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2949,8 +2949,10 @@ extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
 extern struct super_block *get_super_thawed(struct block_device *);
+extern struct super_block *get_super_exclusive_thawed(struct block_device *bdev);
 extern struct super_block *get_active_super(struct block_device *bdev);
 extern void drop_super(struct super_block *sb);
+extern void drop_super_exclusive(struct super_block *sb);
 extern void iterate_supers(void (*)(struct super_block *, void *), void *);
 extern void iterate_supers_type(struct file_system_type *,
 			        void (*)(struct super_block *, void *), void *);
-- 
cgit v1.2.3


From 383d0fca7035a12f1201277d33e8fc87c9d60c9a Mon Sep 17 00:00:00 2001
From: Venkat Reddy Talla <vreddytalla@nvidia.com>
Date: Thu, 17 Nov 2016 23:24:35 +0530
Subject: regulator: max77620: add support to configure MPOK

Adding support to configure regulator POK mapping bit
to control nRST_IO and GPIO1 POK function.
In  tegra based platform which uses MAX20024 pmic, when
some of regulators are configured FPS_NONE(flexible power sequencer)
causes PMIC GPIO1 to go low which lead to various other rails turning off,
to avoid this MPOK bit of those regulators need to be set to 0
so that PMIC GPIO1 will not go low.

Signed-off-by: Venkat Reddy Talla <vreddytalla@nvidia.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77620-regulator.c | 46 ++++++++++++++++++++++++++++++++++
 include/linux/mfd/max77620.h           |  2 ++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index c39a56b41901..d088a7c79e60 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -80,6 +80,7 @@ struct max77620_regulator_pdata {
 	int suspend_fps_pd_slot;
 	int suspend_fps_pu_slot;
 	int current_mode;
+	int power_ok;
 	int ramp_rate_setting;
 };
 
@@ -350,11 +351,48 @@ static int max77620_set_slew_rate(struct max77620_regulator *pmic, int id,
 	return 0;
 }
 
+static int max77620_config_power_ok(struct max77620_regulator *pmic, int id)
+{
+	struct max77620_regulator_pdata *rpdata = &pmic->reg_pdata[id];
+	struct max77620_regulator_info *rinfo = pmic->rinfo[id];
+	struct max77620_chip *chip = dev_get_drvdata(pmic->dev->parent);
+	u8 val, mask;
+	int ret;
+
+	switch (chip->chip_id) {
+	case MAX20024:
+		if (rpdata->power_ok >= 0) {
+			if (rinfo->type == MAX77620_REGULATOR_TYPE_SD)
+				mask = MAX20024_SD_CFG1_MPOK_MASK;
+			else
+				mask = MAX20024_LDO_CFG2_MPOK_MASK;
+
+			val = rpdata->power_ok ? mask : 0;
+
+			ret = regmap_update_bits(pmic->rmap, rinfo->cfg_addr,
+						 mask, val);
+			if (ret < 0) {
+				dev_err(pmic->dev, "Reg 0x%02x update failed %d\n",
+					rinfo->cfg_addr, ret);
+				return ret;
+			}
+		}
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 static int max77620_init_pmic(struct max77620_regulator *pmic, int id)
 {
 	struct max77620_regulator_pdata *rpdata = &pmic->reg_pdata[id];
 	int ret;
 
+	max77620_config_power_ok(pmic, id);
+
 	/* Update power mode */
 	ret = max77620_regulator_get_power_mode(pmic, id);
 	if (ret < 0)
@@ -594,6 +632,12 @@ static int max77620_of_parse_cb(struct device_node *np,
 			np, "maxim,suspend-fps-power-down-slot", &pval);
 	rpdata->suspend_fps_pd_slot = (!ret) ? pval : -1;
 
+	ret = of_property_read_u32(np, "maxim,power-ok-control", &pval);
+	if (!ret)
+		rpdata->power_ok = pval;
+	else
+		rpdata->power_ok = -1;
+
 	ret = of_property_read_u32(np, "maxim,ramp-rate-setting", &pval);
 	rpdata->ramp_rate_setting = (!ret) ? pval : 0;
 
@@ -806,6 +850,8 @@ static int max77620_regulator_resume(struct device *dev)
 	for (id = 0; id < MAX77620_NUM_REGS; id++) {
 		reg_pdata = &pmic->reg_pdata[id];
 
+		max77620_config_power_ok(pmic, id);
+
 		max77620_regulator_set_fps_slots(pmic, id, false);
 		if (reg_pdata->active_fps_src < 0)
 			continue;
diff --git a/include/linux/mfd/max77620.h b/include/linux/mfd/max77620.h
index 3ca0af07fc78..ad2a9a852aea 100644
--- a/include/linux/mfd/max77620.h
+++ b/include/linux/mfd/max77620.h
@@ -180,6 +180,7 @@
 #define MAX77620_SD_CFG1_FPWM_SD_MASK		BIT(2)
 #define MAX77620_SD_CFG1_FPWM_SD_SKIP		0
 #define MAX77620_SD_CFG1_FPWM_SD_FPWM		BIT(2)
+#define MAX20024_SD_CFG1_MPOK_MASK		BIT(1)
 #define MAX77620_SD_CFG1_FSRADE_SD_MASK		BIT(0)
 #define MAX77620_SD_CFG1_FSRADE_SD_DISABLE	0
 #define MAX77620_SD_CFG1_FSRADE_SD_ENABLE	BIT(0)
@@ -187,6 +188,7 @@
 /* LDO_CNFG2 */
 #define MAX77620_LDO_POWER_MODE_MASK		0xC0
 #define MAX77620_LDO_POWER_MODE_SHIFT		6
+#define MAX20024_LDO_CFG2_MPOK_MASK		BIT(2)
 #define MAX77620_LDO_CFG2_ADE_MASK		BIT(1)
 #define MAX77620_LDO_CFG2_ADE_DISABLE		0
 #define MAX77620_LDO_CFG2_ADE_ENABLE		BIT(1)
-- 
cgit v1.2.3


From 4239174570da080f3623724d97062bf55de7e36b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 23 Nov 2016 15:52:45 -0500
Subject: tracing: Make tracepoint_printk a static_key

Currently, when tracepoint_printk is set (enabled by the "tp_printk" kernel
command line), it causes trace events to print via printk(). This is a very
dangerous operation, but is useful for debugging.

The issue is, it's seldom used, but it is always checked even if it's not
enabled by the kernel command line. Instead of having this feature called by
a branch against a variable, turn that variable into a static key, and this
will remove the test and jump.

To simplify things, the functions output_printk() and
trace_event_buffer_commit() were moved from trace_events.c to trace.c.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h      |  4 +++
 kernel/sysctl.c             |  2 +-
 kernel/trace/trace.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_events.c | 40 -----------------------
 4 files changed, 83 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b3d34d3e0e7e..8700049fd0e5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -945,6 +945,10 @@ extern int __disable_trace_on_warning;
 #define INIT_TRACE_RECURSION		.trace_recursion = 0,
 #endif
 
+int tracepoint_printk_sysctl(struct ctl_table *table, int write,
+			     void __user *buffer, size_t *lenp,
+			     loff_t *ppos);
+
 #else /* CONFIG_TRACING */
 static inline void  disable_trace_on_warning(void) { }
 #endif /* CONFIG_TRACING */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 706309f9ed84..6ccc60dfbc7a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -634,7 +634,7 @@ static struct ctl_table kern_table[] = {
 		.data		= &tracepoint_printk,
 		.maxlen		= sizeof(tracepoint_printk),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= tracepoint_printk_sysctl,
 	},
 #endif
 #ifdef CONFIG_KEXEC_CORE
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 490533726b54..725e8b2c453f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -69,6 +69,7 @@ bool __read_mostly tracing_selftest_disabled;
 /* Pipe tracepoints to printk */
 struct trace_iterator *tracepoint_print_iter;
 int tracepoint_printk;
+static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
 
 /* For tracers that don't implement custom flags */
 static struct tracer_opt dummy_tracer_opt[] = {
@@ -2116,6 +2117,81 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
 }
 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
 
+static DEFINE_SPINLOCK(tracepoint_iter_lock);
+static DEFINE_MUTEX(tracepoint_printk_mutex);
+
+static void output_printk(struct trace_event_buffer *fbuffer)
+{
+	struct trace_event_call *event_call;
+	struct trace_event *event;
+	unsigned long flags;
+	struct trace_iterator *iter = tracepoint_print_iter;
+
+	/* We should never get here if iter is NULL */
+	if (WARN_ON_ONCE(!iter))
+		return;
+
+	event_call = fbuffer->trace_file->event_call;
+	if (!event_call || !event_call->event.funcs ||
+	    !event_call->event.funcs->trace)
+		return;
+
+	event = &fbuffer->trace_file->event_call->event;
+
+	spin_lock_irqsave(&tracepoint_iter_lock, flags);
+	trace_seq_init(&iter->seq);
+	iter->ent = fbuffer->entry;
+	event_call->event.funcs->trace(iter, 0, event);
+	trace_seq_putc(&iter->seq, 0);
+	printk("%s", iter->seq.buffer);
+
+	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
+}
+
+int tracepoint_printk_sysctl(struct ctl_table *table, int write,
+			     void __user *buffer, size_t *lenp,
+			     loff_t *ppos)
+{
+	int save_tracepoint_printk;
+	int ret;
+
+	mutex_lock(&tracepoint_printk_mutex);
+	save_tracepoint_printk = tracepoint_printk;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+	/*
+	 * This will force exiting early, as tracepoint_printk
+	 * is always zero when tracepoint_printk_iter is not allocated
+	 */
+	if (!tracepoint_print_iter)
+		tracepoint_printk = 0;
+
+	if (save_tracepoint_printk == tracepoint_printk)
+		goto out;
+
+	if (tracepoint_printk)
+		static_key_enable(&tracepoint_printk_key.key);
+	else
+		static_key_disable(&tracepoint_printk_key.key);
+
+ out:
+	mutex_unlock(&tracepoint_printk_mutex);
+
+	return ret;
+}
+
+void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
+{
+	if (static_key_false(&tracepoint_printk_key.key))
+		output_printk(fbuffer);
+
+	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
+				    fbuffer->event, fbuffer->entry,
+				    fbuffer->flags, fbuffer->pc);
+}
+EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
+
 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
 				     struct ring_buffer *buffer,
 				     struct ring_buffer_event *event,
@@ -7977,6 +8053,8 @@ void __init trace_init(void)
 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
 		if (WARN_ON(!tracepoint_print_iter))
 			tracepoint_printk = 0;
+		else
+			static_key_enable(&tracepoint_printk_key.key);
 	}
 	tracer_alloc_buffers();
 	trace_event_init();
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index ba67ede48822..d35fc2b0d304 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -283,46 +283,6 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 }
 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
 
-static DEFINE_SPINLOCK(tracepoint_iter_lock);
-
-static void output_printk(struct trace_event_buffer *fbuffer)
-{
-	struct trace_event_call *event_call;
-	struct trace_event *event;
-	unsigned long flags;
-	struct trace_iterator *iter = tracepoint_print_iter;
-
-	if (!iter)
-		return;
-
-	event_call = fbuffer->trace_file->event_call;
-	if (!event_call || !event_call->event.funcs ||
-	    !event_call->event.funcs->trace)
-		return;
-
-	event = &fbuffer->trace_file->event_call->event;
-
-	spin_lock_irqsave(&tracepoint_iter_lock, flags);
-	trace_seq_init(&iter->seq);
-	iter->ent = fbuffer->entry;
-	event_call->event.funcs->trace(iter, 0, event);
-	trace_seq_putc(&iter->seq, 0);
-	printk("%s", iter->seq.buffer);
-
-	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
-}
-
-void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
-{
-	if (tracepoint_printk)
-		output_printk(fbuffer);
-
-	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
-				    fbuffer->event, fbuffer->entry,
-				    fbuffer->flags, fbuffer->pc);
-}
-EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
-
 int trace_event_reg(struct trace_event_call *call,
 		    enum trace_reg type, void *data)
 {
-- 
cgit v1.2.3


From 3bee9ea1de687925d116670f036599cbed8b66b0 Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Fri, 4 Nov 2016 13:33:13 -0500
Subject: power: supply: bq27xxx_battery: Fix register map for BQ27510 and
 BQ27520

The BQ27510 and BQ27520 use a slightly different register map than the
BQ27500, add a new type enum and add these gauges to it.

Fixes: d74534c27775 ("power: bq27xxx_battery: Add support for additional bq27xxx family devices")
Based-on-patch-by: Kenneth R. Crudup <kenny@panix.com>
Signed-off-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/supply/bq27xxx_battery.c     | 41 +++++++++++++++++++++++++++++-
 drivers/power/supply/bq27xxx_battery_i2c.c |  4 +--
 include/linux/power/bq27xxx_battery.h      |  3 ++-
 3 files changed, 44 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 0c343a0c941e..08c36b8e04bd 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -164,6 +164,25 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = INVALID_REG_ADDR,
 	},
+	[BQ27510] = {
+		[BQ27XXX_REG_CTRL] = 0x00,
+		[BQ27XXX_REG_TEMP] = 0x06,
+		[BQ27XXX_REG_INT_TEMP] = 0x28,
+		[BQ27XXX_REG_VOLT] = 0x08,
+		[BQ27XXX_REG_AI] = 0x14,
+		[BQ27XXX_REG_FLAGS] = 0x0a,
+		[BQ27XXX_REG_TTE] = 0x16,
+		[BQ27XXX_REG_TTF] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTES] = 0x1a,
+		[BQ27XXX_REG_TTECP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_NAC] = 0x0c,
+		[BQ27XXX_REG_FCC] = 0x12,
+		[BQ27XXX_REG_CYCT] = 0x1e,
+		[BQ27XXX_REG_AE] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_SOC] = 0x20,
+		[BQ27XXX_REG_DCAP] = 0x2e,
+		[BQ27XXX_REG_AP] = INVALID_REG_ADDR,
+	},
 	[BQ27530] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
 		[BQ27XXX_REG_TEMP] = 0x06,
@@ -302,6 +321,24 @@ static enum power_supply_property bq27500_battery_props[] = {
 	POWER_SUPPLY_PROP_MANUFACTURER,
 };
 
+static enum power_supply_property bq27510_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
 static enum power_supply_property bq27530_battery_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
@@ -385,6 +422,7 @@ static struct {
 	BQ27XXX_PROP(BQ27000, bq27000_battery_props),
 	BQ27XXX_PROP(BQ27010, bq27010_battery_props),
 	BQ27XXX_PROP(BQ27500, bq27500_battery_props),
+	BQ27XXX_PROP(BQ27510, bq27510_battery_props),
 	BQ27XXX_PROP(BQ27530, bq27530_battery_props),
 	BQ27XXX_PROP(BQ27541, bq27541_battery_props),
 	BQ27XXX_PROP(BQ27545, bq27545_battery_props),
@@ -636,7 +674,8 @@ static int bq27xxx_battery_read_pwr_avg(struct bq27xxx_device_info *di)
  */
 static bool bq27xxx_battery_overtemp(struct bq27xxx_device_info *di, u16 flags)
 {
-	if (di->chip == BQ27500 || di->chip == BQ27541 || di->chip == BQ27545)
+	if (di->chip == BQ27500 || di->chip == BQ27510 ||
+	    di->chip == BQ27541 || di->chip == BQ27545)
 		return flags & (BQ27XXX_FLAG_OTC | BQ27XXX_FLAG_OTD);
 	if (di->chip == BQ27530 || di->chip == BQ27421)
 		return flags & BQ27XXX_FLAG_OT;
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index 85d4ea2a9c20..5c5c3a6f9923 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -149,8 +149,8 @@ static const struct i2c_device_id bq27xxx_i2c_id_table[] = {
 	{ "bq27200", BQ27000 },
 	{ "bq27210", BQ27010 },
 	{ "bq27500", BQ27500 },
-	{ "bq27510", BQ27500 },
-	{ "bq27520", BQ27500 },
+	{ "bq27510", BQ27510 },
+	{ "bq27520", BQ27510 },
 	{ "bq27530", BQ27530 },
 	{ "bq27531", BQ27530 },
 	{ "bq27541", BQ27541 },
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index e30deb046156..bed9557b69e7 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -4,7 +4,8 @@
 enum bq27xxx_chip {
 	BQ27000 = 1, /* bq27000, bq27200 */
 	BQ27010, /* bq27010, bq27210 */
-	BQ27500, /* bq27500, bq27510, bq27520 */
+	BQ27500, /* bq27500 */
+	BQ27510, /* bq27510, bq27520 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From afe06efdf07c12fd9370d5cce5383398cedf6c90 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Tue, 22 Nov 2016 12:23:53 -0800
Subject: sched: Extend scheduler's asym packing

We generalize the scheduler's asym packing to provide an ordering
of the cpu beyond just the cpu number.  This allows the use of the
ASYM_PACKING scheduler machinery to move loads to preferred CPU in a
sched domain. The preference is defined with the cpu priority
given by arch_asym_cpu_priority(cpu).

We also record the most preferred cpu in a sched group when
we build the cpu's capacity for fast lookup of preferred cpu
during load balancing.

Co-developed-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: linux-pm@vger.kernel.org
Cc: jolsa@redhat.com
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: bp@suse.de
Link: http://lkml.kernel.org/r/0e73ae12737dfaafa46c07066cc7c5d3f1675e46.1479844244.git.tim.c.chen@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h |  2 ++
 kernel/sched/core.c   | 15 +++++++++++++++
 kernel/sched/fair.c   | 53 ++++++++++++++++++++++++++++++++++-----------------
 kernel/sched/sched.h  |  6 ++++++
 4 files changed, 59 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 19abba04ceca..fe9a499d5aa4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1077,6 +1077,8 @@ static inline int cpu_numa_flags(void)
 }
 #endif
 
+extern int arch_asym_cpu_priority(int cpu);
+
 struct sched_domain_attr {
 	int relax_domain_level;
 };
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dc64bd71ed2b..393759bd526e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6303,7 +6303,22 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 	WARN_ON(!sg);
 
 	do {
+		int cpu, max_cpu = -1;
+
 		sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+
+		if (!(sd->flags & SD_ASYM_PACKING))
+			goto next;
+
+		for_each_cpu(cpu, sched_group_cpus(sg)) {
+			if (max_cpu < 0)
+				max_cpu = cpu;
+			else if (sched_asym_prefer(cpu, max_cpu))
+				max_cpu = cpu;
+		}
+		sg->asym_prefer_cpu = max_cpu;
+
+next:
 		sg = sg->next;
 	} while (sg != sd->groups);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa475896782d..18d9e75f1f6e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -97,6 +97,16 @@ unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
 
 const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
 
+#ifdef CONFIG_SMP
+/*
+ * For asym packing, by default the lower numbered cpu has higher priority.
+ */
+int __weak arch_asym_cpu_priority(int cpu)
+{
+	return -cpu;
+}
+#endif
+
 #ifdef CONFIG_CFS_BANDWIDTH
 /*
  * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
@@ -7388,16 +7398,18 @@ asym_packing:
 	if (env->idle == CPU_NOT_IDLE)
 		return true;
 	/*
-	 * ASYM_PACKING needs to move all the work to the lowest
-	 * numbered CPUs in the group, therefore mark all groups
-	 * higher than ourself as busy.
+	 * ASYM_PACKING needs to move all the work to the highest
+	 * prority CPUs in the group, therefore mark all groups
+	 * of lower priority than ourself as busy.
 	 */
-	if (sgs->sum_nr_running && env->dst_cpu < group_first_cpu(sg)) {
+	if (sgs->sum_nr_running &&
+	    sched_asym_prefer(env->dst_cpu, sg->asym_prefer_cpu)) {
 		if (!sds->busiest)
 			return true;
 
-		/* Prefer to move from highest possible cpu's work */
-		if (group_first_cpu(sds->busiest) < group_first_cpu(sg))
+		/* Prefer to move from lowest priority cpu's work */
+		if (sched_asym_prefer(sds->busiest->asym_prefer_cpu,
+				      sg->asym_prefer_cpu))
 			return true;
 	}
 
@@ -7549,8 +7561,8 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
 	if (!sds->busiest)
 		return 0;
 
-	busiest_cpu = group_first_cpu(sds->busiest);
-	if (env->dst_cpu > busiest_cpu)
+	busiest_cpu = sds->busiest->asym_prefer_cpu;
+	if (sched_asym_prefer(busiest_cpu, env->dst_cpu))
 		return 0;
 
 	env->imbalance = DIV_ROUND_CLOSEST(
@@ -7888,10 +7900,11 @@ static int need_active_balance(struct lb_env *env)
 
 		/*
 		 * ASYM_PACKING needs to force migrate tasks from busy but
-		 * higher numbered CPUs in order to pack all tasks in the
-		 * lowest numbered CPUs.
+		 * lower priority CPUs in order to pack all tasks in the
+		 * highest priority CPUs.
 		 */
-		if ((sd->flags & SD_ASYM_PACKING) && env->src_cpu > env->dst_cpu)
+		if ((sd->flags & SD_ASYM_PACKING) &&
+		    sched_asym_prefer(env->dst_cpu, env->src_cpu))
 			return 1;
 	}
 
@@ -8740,7 +8753,7 @@ static inline bool nohz_kick_needed(struct rq *rq)
 	unsigned long now = jiffies;
 	struct sched_domain_shared *sds;
 	struct sched_domain *sd;
-	int nr_busy, cpu = rq->cpu;
+	int nr_busy, i, cpu = rq->cpu;
 	bool kick = false;
 
 	if (unlikely(rq->idle_balance))
@@ -8791,12 +8804,18 @@ static inline bool nohz_kick_needed(struct rq *rq)
 	}
 
 	sd = rcu_dereference(per_cpu(sd_asym, cpu));
-	if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
-				  sched_domain_span(sd)) < cpu)) {
-		kick = true;
-		goto unlock;
-	}
+	if (sd) {
+		for_each_cpu(i, sched_domain_span(sd)) {
+			if (i == cpu ||
+			    !cpumask_test_cpu(i, nohz.idle_cpus_mask))
+				continue;
 
+			if (sched_asym_prefer(i, cpu)) {
+				kick = true;
+				goto unlock;
+			}
+		}
+	}
 unlock:
 	rcu_read_unlock();
 	return kick;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d7e39317d688..7b34c7826ca5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -540,6 +540,11 @@ struct dl_rq {
 
 #ifdef CONFIG_SMP
 
+static inline bool sched_asym_prefer(int a, int b)
+{
+	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
+}
+
 /*
  * We add the notion of a root-domain which will be used to define per-domain
  * variables. Each exclusive cpuset essentially defines an island domain by
@@ -908,6 +913,7 @@ struct sched_group {
 
 	unsigned int group_weight;
 	struct sched_group_capacity *sgc;
+	int asym_prefer_cpu;		/* cpu of highest priority in group */
 
 	/*
 	 * The CPUs this group covers.
-- 
cgit v1.2.3


From 2027cbcf497579cb3bd71ed4173ae9b950aa09a3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 22 Nov 2016 18:31:49 +0100
Subject: driver core: add CLASS_ATTR_WO()

Some class subsystems are open-coding CLASS_ATTR_WO because the driver
core never provided it.  Add the macro to device.h so that we can go
around and fix up the individual subsystems as needed.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 49f453892ca5..4264caacebb9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -465,6 +465,8 @@ struct class_attribute {
 	struct class_attribute class_attr_##_name = __ATTR_RW(_name)
 #define CLASS_ATTR_RO(_name) \
 	struct class_attribute class_attr_##_name = __ATTR_RO(_name)
+#define CLASS_ATTR_WO(_name) \
+	struct class_attribute class_attr_##_name = __ATTR_WO(_name)
 
 extern int __must_check class_create_file_ns(struct class *class,
 					     const struct class_attribute *attr,
-- 
cgit v1.2.3


From 4d5538f5882a6b67eefbab0f0a3a67ce811621aa Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Thu, 13 Oct 2016 14:10:40 +0200
Subject: i2c: use an IRQ to report Host Notify events, not alert

The current SMBus Host Notify implementation relies on .alert() to
relay its notifications. However, the use cases where SMBus Host
Notify is needed currently is to signal data ready on touchpads.

This is closer to an IRQ than a custom API through .alert().
Given that the 2 touchpad manufacturers (Synaptics and Elan) that
use SMBus Host Notify don't put any data in the SMBus payload, the
concept actually matches one to one.

Benefits are multiple:
- simpler code and API: the client will just have an IRQ, and
  nothing needs to be added in the adapter beside internally
  enabling it.
- no more specific workqueue, the threading is handled by IRQ core
  directly (when required)
- no more races when removing the device (the drivers are already
  required to disable irq on remove)
- simpler handling for drivers: use plain regular IRQs
- no more dependency on i2c-smbus for i2c-i801 (and any other adapter)
- the IRQ domain is created automatically when the adapter exports
  the Host Notify capability
- the IRQ are assign only if ACPI, OF and the caller did not assign
  one already
- the domain is automatically destroyed on remove
- fewer lines of code (minus 20, yeah!)

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/smbus-protocol |  12 +++--
 drivers/i2c/Kconfig              |   1 +
 drivers/i2c/busses/i2c-i801.c    |  32 +++--------
 drivers/i2c/i2c-core.c           | 113 +++++++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-smbus.c          | 102 -----------------------------------
 include/linux/i2c-smbus.h        |  27 ----------
 include/linux/i2c.h              |   4 ++
 7 files changed, 133 insertions(+), 158 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/i2c/smbus-protocol b/Documentation/i2c/smbus-protocol
index 14d4ec1be245..092d474f5843 100644
--- a/Documentation/i2c/smbus-protocol
+++ b/Documentation/i2c/smbus-protocol
@@ -200,10 +200,14 @@ alerting device's address.
 [S] [HostAddr] [Wr] A [DevAddr] A [DataLow] A [DataHigh] A [P]
 
 This is implemented in the following way in the Linux kernel:
-* I2C bus drivers which support SMBus Host Notify should call
-  i2c_setup_smbus_host_notify() to setup SMBus Host Notify support.
-* I2C drivers for devices which can trigger SMBus Host Notify should implement
-  the optional alert() callback.
+* I2C bus drivers which support SMBus Host Notify should report
+  I2C_FUNC_SMBUS_HOST_NOTIFY.
+* I2C bus drivers trigger SMBus Host Notify by a call to
+  i2c_handle_smbus_host_notify().
+* I2C drivers for devices which can trigger SMBus Host Notify will have
+  client->irq assigned to a Host Notify IRQ if noone else specified an other.
+
+There is currently no way to retrieve the data parameter from the client.
 
 
 Packet Error Checking (PEC)
diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index d223650a97e4..de305f89a659 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -7,6 +7,7 @@ menu "I2C support"
 config I2C
 	tristate "I2C support"
 	select RT_MUTEXES
+	select IRQ_DOMAIN
 	---help---
 	  I2C (pronounce: I-squared-C) is a slow serial bus protocol used in
 	  many micro controller applications and developed by Philips.  SMBus,
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index c0c0cac9950c..e242db43774b 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -269,7 +269,6 @@ struct i801_priv {
 	 */
 	bool acpi_reserved;
 	struct mutex acpi_lock;
-	struct smbus_host_notify *host_notify;
 };
 
 #define FEATURE_SMBUS_PEC	BIT(0)
@@ -585,10 +584,10 @@ static irqreturn_t i801_host_notify_isr(struct i801_priv *priv)
 
 	/*
 	 * With the tested platforms, reading SMBNTFDDAT (22 + (p)->smba)
-	 * always returns 0 and is safe to read.
-	 * We just use 0 given we have no use of the data right now.
+	 * always returns 0. Our current implementation doesn't provide
+	 * data, so we just ignore it.
 	 */
-	i2c_handle_smbus_host_notify(priv->host_notify, addr, 0);
+	i2c_handle_smbus_host_notify(&priv->adapter, addr);
 
 	/* clear Host Notify bit and return */
 	outb_p(SMBSLVSTS_HST_NTFY_STS, SMBSLVSTS(priv));
@@ -951,17 +950,12 @@ static u32 i801_func(struct i2c_adapter *adapter)
 		I2C_FUNC_SMBUS_HOST_NOTIFY : 0);
 }
 
-static int i801_enable_host_notify(struct i2c_adapter *adapter)
+static void i801_enable_host_notify(struct i2c_adapter *adapter)
 {
 	struct i801_priv *priv = i2c_get_adapdata(adapter);
 
 	if (!(priv->features & FEATURE_HOST_NOTIFY))
-		return -ENOTSUPP;
-
-	if (!priv->host_notify)
-		priv->host_notify = i2c_setup_smbus_host_notify(adapter);
-	if (!priv->host_notify)
-		return -ENOMEM;
+		return;
 
 	priv->original_slvcmd = inb_p(SMBSLVCMD(priv));
 
@@ -971,8 +965,6 @@ static int i801_enable_host_notify(struct i2c_adapter *adapter)
 
 	/* clear Host Notify bit to allow a new notification */
 	outb_p(SMBSLVSTS_HST_NTFY_STS, SMBSLVSTS(priv));
-
-	return 0;
 }
 
 static void i801_disable_host_notify(struct i801_priv *priv)
@@ -1647,14 +1639,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		return err;
 	}
 
-	/*
-	 * Enable Host Notify for chips that supports it.
-	 * It is done after i2c_add_adapter() so that we are sure the work queue
-	 * is not used if i2c_add_adapter() fails.
-	 */
-	err = i801_enable_host_notify(&priv->adapter);
-	if (err && err != -ENOTSUPP)
-		dev_warn(&dev->dev, "Unable to enable SMBus Host Notify\n");
+	i801_enable_host_notify(&priv->adapter);
 
 	i801_probe_optional_slaves(priv);
 	/* We ignore errors - multiplexing is optional */
@@ -1705,11 +1690,8 @@ static int i801_resume(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	struct i801_priv *priv = pci_get_drvdata(pci_dev);
-	int err;
 
-	err = i801_enable_host_notify(&priv->adapter);
-	if (err && err != -ENOTSUPP)
-		dev_warn(dev, "Unable to enable SMBus Host Notify\n");
+	i801_enable_host_notify(&priv->adapter);
 
 	return 0;
 }
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 8b93a262e237..3a1bc9c4efc7 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -65,6 +65,9 @@
 #define I2C_ADDR_OFFSET_TEN_BIT	0xa000
 #define I2C_ADDR_OFFSET_SLAVE	0x1000
 
+#define I2C_ADDR_7BITS_MAX	0x77
+#define I2C_ADDR_7BITS_COUNT	(I2C_ADDR_7BITS_MAX + 1)
+
 /* core_lock protects i2c_adapter_idr, and guarantees
    that device detection, deletion of detected devices, and attach_adapter
    calls are serialized */
@@ -896,6 +899,25 @@ static void i2c_init_recovery(struct i2c_adapter *adap)
 	adap->bus_recovery_info = NULL;
 }
 
+static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client)
+{
+	struct i2c_adapter *adap = client->adapter;
+	unsigned int irq;
+
+	if (!adap->host_notify_domain)
+		return -ENXIO;
+
+	if (client->flags & I2C_CLIENT_TEN)
+		return -EINVAL;
+
+	irq = irq_find_mapping(adap->host_notify_domain, client->addr);
+	if (!irq)
+		irq = irq_create_mapping(adap->host_notify_domain,
+					 client->addr);
+
+	return irq > 0 ? irq : -ENXIO;
+}
+
 static int i2c_device_probe(struct device *dev)
 {
 	struct i2c_client	*client = i2c_verify_client(dev);
@@ -917,6 +939,14 @@ static int i2c_device_probe(struct device *dev)
 		}
 		if (irq == -EPROBE_DEFER)
 			return irq;
+		/*
+		 * ACPI and OF did not find any useful IRQ, try to see
+		 * if Host Notify can be used.
+		 */
+		if (irq < 0) {
+			dev_dbg(dev, "Using Host Notify IRQ\n");
+			irq = i2c_smbus_host_notify_to_irq(client);
+		}
 		if (irq < 0)
 			irq = 0;
 
@@ -1866,6 +1896,79 @@ static const struct i2c_lock_operations i2c_adapter_lock_ops = {
 	.unlock_bus =  i2c_adapter_unlock_bus,
 };
 
+static void i2c_host_notify_irq_teardown(struct i2c_adapter *adap)
+{
+	struct irq_domain *domain = adap->host_notify_domain;
+	irq_hw_number_t hwirq;
+
+	if (!domain)
+		return;
+
+	for (hwirq = 0 ; hwirq < I2C_ADDR_7BITS_COUNT ; hwirq++)
+		irq_dispose_mapping(irq_find_mapping(domain, hwirq));
+
+	irq_domain_remove(domain);
+	adap->host_notify_domain = NULL;
+}
+
+static int i2c_host_notify_irq_map(struct irq_domain *h,
+					  unsigned int virq,
+					  irq_hw_number_t hw_irq_num)
+{
+	irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops i2c_host_notify_irq_ops = {
+	.map = i2c_host_notify_irq_map,
+};
+
+static int i2c_setup_host_notify_irq_domain(struct i2c_adapter *adap)
+{
+	struct irq_domain *domain;
+
+	if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_HOST_NOTIFY))
+		return 0;
+
+	domain = irq_domain_create_linear(adap->dev.fwnode,
+					  I2C_ADDR_7BITS_COUNT,
+					  &i2c_host_notify_irq_ops, adap);
+	if (!domain)
+		return -ENOMEM;
+
+	adap->host_notify_domain = domain;
+
+	return 0;
+}
+
+/**
+ * i2c_handle_smbus_host_notify - Forward a Host Notify event to the correct
+ * I2C client.
+ * @adap: the adapter
+ * @addr: the I2C address of the notifying device
+ * Context: can't sleep
+ *
+ * Helper function to be called from an I2C bus driver's interrupt
+ * handler. It will schedule the Host Notify IRQ.
+ */
+int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr)
+{
+	int irq;
+
+	if (!adap)
+		return -EINVAL;
+
+	irq = irq_find_mapping(adap->host_notify_domain, addr);
+	if (irq <= 0)
+		return -ENXIO;
+
+	generic_handle_irq(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(i2c_handle_smbus_host_notify);
+
 static int i2c_register_adapter(struct i2c_adapter *adap)
 {
 	int res = -EINVAL;
@@ -1897,6 +2000,14 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 	if (adap->timeout == 0)
 		adap->timeout = HZ;
 
+	/* register soft irqs for Host Notify */
+	res = i2c_setup_host_notify_irq_domain(adap);
+	if (res) {
+		pr_err("adapter '%s': can't create Host Notify IRQs (%d)\n",
+		       adap->name, res);
+		goto out_list;
+	}
+
 	dev_set_name(&adap->dev, "i2c-%d", adap->nr);
 	adap->dev.bus = &i2c_bus_type;
 	adap->dev.type = &i2c_adapter_type;
@@ -2134,6 +2245,8 @@ void i2c_del_adapter(struct i2c_adapter *adap)
 
 	pm_runtime_disable(&adap->dev);
 
+	i2c_host_notify_irq_teardown(adap);
+
 	/* wait until all references to the device are gone
 	 *
 	 * FIXME: This is old code and should ideally be replaced by an
diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c
index b0d2679c60d1..f9271c713d20 100644
--- a/drivers/i2c/i2c-smbus.c
+++ b/drivers/i2c/i2c-smbus.c
@@ -241,108 +241,6 @@ int i2c_handle_smbus_alert(struct i2c_client *ara)
 }
 EXPORT_SYMBOL_GPL(i2c_handle_smbus_alert);
 
-static void smbus_host_notify_work(struct work_struct *work)
-{
-	struct alert_data alert;
-	struct i2c_adapter *adapter;
-	unsigned long flags;
-	u16 payload;
-	u8 addr;
-	struct smbus_host_notify *data;
-
-	data = container_of(work, struct smbus_host_notify, work);
-
-	spin_lock_irqsave(&data->lock, flags);
-	payload = data->payload;
-	addr = data->addr;
-	adapter = data->adapter;
-
-	/* clear the pending bit and release the spinlock */
-	data->pending = false;
-	spin_unlock_irqrestore(&data->lock, flags);
-
-	if (!adapter || !addr)
-		return;
-
-	alert.type = I2C_PROTOCOL_SMBUS_HOST_NOTIFY;
-	alert.addr = addr;
-	alert.data = payload;
-
-	device_for_each_child(&adapter->dev, &alert, smbus_do_alert);
-}
-
-/**
- * i2c_setup_smbus_host_notify - Allocate a new smbus_host_notify for the given
- * I2C adapter.
- * @adapter: the adapter we want to associate a Host Notify function
- *
- * Returns a struct smbus_host_notify pointer on success, and NULL on failure.
- * The resulting smbus_host_notify must not be freed afterwards, it is a
- * managed resource already.
- */
-struct smbus_host_notify *i2c_setup_smbus_host_notify(struct i2c_adapter *adap)
-{
-	struct smbus_host_notify *host_notify;
-
-	host_notify = devm_kzalloc(&adap->dev, sizeof(struct smbus_host_notify),
-				   GFP_KERNEL);
-	if (!host_notify)
-		return NULL;
-
-	host_notify->adapter = adap;
-
-	spin_lock_init(&host_notify->lock);
-	INIT_WORK(&host_notify->work, smbus_host_notify_work);
-
-	return host_notify;
-}
-EXPORT_SYMBOL_GPL(i2c_setup_smbus_host_notify);
-
-/**
- * i2c_handle_smbus_host_notify - Forward a Host Notify event to the correct
- * I2C client.
- * @host_notify: the struct host_notify attached to the relevant adapter
- * @addr: the I2C address of the notifying device
- * @data: the payload of the notification
- * Context: can't sleep
- *
- * Helper function to be called from an I2C bus driver's interrupt
- * handler. It will schedule the Host Notify work, in turn calling the
- * corresponding I2C device driver's alert function.
- *
- * host_notify should be a valid pointer previously returned by
- * i2c_setup_smbus_host_notify().
- */
-int i2c_handle_smbus_host_notify(struct smbus_host_notify *host_notify,
-				 unsigned short addr, unsigned int data)
-{
-	unsigned long flags;
-	struct i2c_adapter *adapter;
-
-	if (!host_notify || !host_notify->adapter)
-		return -EINVAL;
-
-	adapter = host_notify->adapter;
-
-	spin_lock_irqsave(&host_notify->lock, flags);
-
-	if (host_notify->pending) {
-		spin_unlock_irqrestore(&host_notify->lock, flags);
-		dev_warn(&adapter->dev, "Host Notify already scheduled.\n");
-		return -EBUSY;
-	}
-
-	host_notify->payload = data;
-	host_notify->addr = addr;
-
-	/* Mark that there is a pending notification and release the lock */
-	host_notify->pending = true;
-	spin_unlock_irqrestore(&host_notify->lock, flags);
-
-	return schedule_work(&host_notify->work);
-}
-EXPORT_SYMBOL_GPL(i2c_handle_smbus_host_notify);
-
 module_i2c_driver(smbalert_driver);
 
 MODULE_AUTHOR("Jean Delvare <jdelvare@suse.de>");
diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h
index c2e3324f9468..a1385023a29b 100644
--- a/include/linux/i2c-smbus.h
+++ b/include/linux/i2c-smbus.h
@@ -50,31 +50,4 @@ struct i2c_client *i2c_setup_smbus_alert(struct i2c_adapter *adapter,
 					 struct i2c_smbus_alert_setup *setup);
 int i2c_handle_smbus_alert(struct i2c_client *ara);
 
-/**
- * smbus_host_notify - internal structure used by the Host Notify mechanism.
- * @adapter: the I2C adapter associated with this struct
- * @work: worker used to schedule the IRQ in the slave device
- * @lock: spinlock to check if a notification is already pending
- * @pending: flag set when a notification is pending (any new notification will
- *		be rejected if pending is true)
- * @payload: the actual payload of the Host Notify event
- * @addr: the address of the slave device which raised the notification
- *
- * This struct needs to be allocated by i2c_setup_smbus_host_notify() and does
- * not need to be freed. Internally, i2c_setup_smbus_host_notify() uses a
- * managed resource to clean this up when the adapter get released.
- */
-struct smbus_host_notify {
-	struct i2c_adapter	*adapter;
-	struct work_struct	work;
-	spinlock_t		lock;
-	bool			pending;
-	u16			payload;
-	u8			addr;
-};
-
-struct smbus_host_notify *i2c_setup_smbus_host_notify(struct i2c_adapter *adap);
-int i2c_handle_smbus_host_notify(struct smbus_host_notify *host_notify,
-				 unsigned short addr, unsigned int data);
-
 #endif /* _LINUX_I2C_SMBUS_H */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 82cf90945bb8..b2109c522dec 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -30,6 +30,7 @@
 #include <linux/device.h>	/* for struct device */
 #include <linux/sched.h>	/* for completion */
 #include <linux/mutex.h>
+#include <linux/irqdomain.h>		/* for Host Notify IRQ */
 #include <linux/of.h>		/* for struct device_node */
 #include <linux/swab.h>		/* for swab16 */
 #include <uapi/linux/i2c.h>
@@ -575,6 +576,8 @@ struct i2c_adapter {
 
 	struct i2c_bus_recovery_info *bus_recovery_info;
 	const struct i2c_adapter_quirks *quirks;
+
+	struct irq_domain *host_notify_domain;
 };
 #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev)
 
@@ -747,6 +750,7 @@ static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg)
 	return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0);
 }
 
+int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr);
 /**
  * module_i2c_driver() - Helper macro for registering a modular I2C driver
  * @__i2c_driver: i2c_driver struct
-- 
cgit v1.2.3


From 8b533a0eeefc5861cea57163dd3cec2798a77f6c Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 22 Nov 2016 12:23:59 -0800
Subject: acpi/bus: Set _OSC for diverse core support

Set the OSC_SB_CPC_DIVERSE_HIGH_SUPPORT (bit 12) to enable diverse
core support.

This is required to enable the BIOS support of the Intel Turbo Boost Max
Technology 3.0 feature.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: linux-pm@vger.kernel.org
Cc: peterz@infradead.org
Cc: jolsa@redhat.com
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: bp@suse.de
Link: http://lkml.kernel.org/r/a023623a727e86040a1715797055f6402caefd7e.1479844244.git.tim.c.chen@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/bus.c   | 3 +++
 include/linux/acpi.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 2f381ba1e1f2..806db0d6e6e8 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -338,6 +338,9 @@ static void acpi_bus_osc_support(void)
 	}
 #endif
 
+	if (IS_ENABLED(CONFIG_SCHED_ITMT))
+		capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT;
+
 	if (!ghes_disable)
 		capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_APEI_SUPPORT;
 	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 61a3d90f32b3..051023756520 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -469,6 +469,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_SB_CPCV2_SUPPORT			0x00000040
 #define OSC_SB_PCLPI_SUPPORT			0x00000080
 #define OSC_SB_OSLPI_SUPPORT			0x00000100
+#define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT		0x00001000
 
 extern bool osc_sb_apei_support_acked;
 extern bool osc_pc_lpi_support_confirmed;
-- 
cgit v1.2.3


From d06f78c4232d6a84b50839f61d9d7fbb222d8118 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 22 Nov 2016 11:40:55 -0800
Subject: net: phy: broadcom: Add support code for downshift/Wirespeed

Broadcom's Wirespeed feature allows us to configure how auto-negotiation
should behave with fewer working pairs of wires on a cable. Add support
code for retrieving and setting such downshift counters using the
recently added ethtool downshift tunables.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm-phy-lib.c | 86 +++++++++++++++++++++++++++++++++++++++++++
 drivers/net/phy/bcm-phy-lib.h |  5 +++
 include/linux/brcmphy.h       | 10 +++++
 3 files changed, 101 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index 18e11b3a0f41..d742894816f6 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -225,6 +225,92 @@ int bcm_phy_enable_eee(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(bcm_phy_enable_eee);
 
+int bcm_phy_downshift_get(struct phy_device *phydev, u8 *count)
+{
+	int val;
+
+	val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
+	if (val < 0)
+		return val;
+
+	/* Check if wirespeed is enabled or not */
+	if (!(val & MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN)) {
+		*count = DOWNSHIFT_DEV_DISABLE;
+		return 0;
+	}
+
+	val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_SCR2);
+	if (val < 0)
+		return val;
+
+	/* Downgrade after one link attempt */
+	if (val & BCM54XX_SHD_SCR2_WSPD_RTRY_DIS) {
+		*count = 1;
+	} else {
+		/* Downgrade after configured retry count */
+		val >>= BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT;
+		val &= BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK;
+		*count = val + BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_downshift_get);
+
+int bcm_phy_downshift_set(struct phy_device *phydev, u8 count)
+{
+	int val = 0, ret = 0;
+
+	/* Range check the number given */
+	if (count - BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET >
+	    BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK &&
+	    count != DOWNSHIFT_DEV_DEFAULT_COUNT) {
+		return -ERANGE;
+	}
+
+	val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
+	if (val < 0)
+		return val;
+
+	/* Se the write enable bit */
+	val |= MII_BCM54XX_AUXCTL_MISC_WREN;
+
+	if (count == DOWNSHIFT_DEV_DISABLE) {
+		val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN;
+		return bcm54xx_auxctl_write(phydev,
+					    MII_BCM54XX_AUXCTL_SHDWSEL_MISC,
+					    val);
+	} else {
+		val |= MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN;
+		ret = bcm54xx_auxctl_write(phydev,
+					   MII_BCM54XX_AUXCTL_SHDWSEL_MISC,
+					   val);
+		if (ret < 0)
+			return ret;
+	}
+
+	val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_SCR2);
+	val &= ~(BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK <<
+		 BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT |
+		 BCM54XX_SHD_SCR2_WSPD_RTRY_DIS);
+
+	switch (count) {
+	case 1:
+		val |= BCM54XX_SHD_SCR2_WSPD_RTRY_DIS;
+		break;
+	case DOWNSHIFT_DEV_DEFAULT_COUNT:
+		val |= 1 << BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT;
+		break;
+	default:
+		val |= (count - BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET) <<
+			BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT;
+		break;
+	}
+
+	return bcm_phy_write_shadow(phydev, BCM54XX_SHD_SCR2, val);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_downshift_set);
+
 MODULE_DESCRIPTION("Broadcom PHY Library");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Broadcom Corporation");
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index 31cb4fdf5d5a..3f492e629094 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -37,4 +37,9 @@ int bcm_phy_config_intr(struct phy_device *phydev);
 int bcm_phy_enable_apd(struct phy_device *phydev, bool dll_pwr_down);
 
 int bcm_phy_enable_eee(struct phy_device *phydev);
+
+int bcm_phy_downshift_get(struct phy_device *phydev, u8 *count);
+
+int bcm_phy_downshift_set(struct phy_device *phydev, u8 count);
+
 #endif /* _LINUX_BCM_PHY_LIB_H */
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 848dc508ef57..f9f8aaf9c943 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -114,6 +114,7 @@
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC	0x0007
 #define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT	12
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN	(1 << 8)
+#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN	(1 << 4)
 
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK	0x0007
 
@@ -130,6 +131,7 @@
 #define BCM_LED_SRC_INTR	0x6
 #define BCM_LED_SRC_QUALITY	0x7
 #define BCM_LED_SRC_RCVLED	0x8
+#define BCM_LED_SRC_WIRESPEED	0x9
 #define BCM_LED_SRC_MULTICOLOR1	0xa
 #define BCM_LED_SRC_OPENSHORT	0xb
 #define BCM_LED_SRC_OFF		0xe	/* Tied high */
@@ -141,6 +143,14 @@
  * Shadow values go into bits [14:10] of register 0x1c to select a shadow
  * register to access.
  */
+
+/* 00100: Reserved control register 2 */
+#define BCM54XX_SHD_SCR2		0x04
+#define  BCM54XX_SHD_SCR2_WSPD_RTRY_DIS	0x100
+#define  BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT	2
+#define  BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET	2
+#define  BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK	0x7
+
 /* 00101: Spare Control Register 3 */
 #define BCM54XX_SHD_SCR3		0x05
 #define  BCM54XX_SHD_SCR3_DEF_CLK125	0x0001
-- 
cgit v1.2.3


From 3df5b3c67546fb05266766b6abaf71563f82efe4 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 22 Nov 2016 23:09:54 +0200
Subject: net: Add net-device param to the get offloaded stats ndo

Some drivers would need to check few internal matters for
that. To be used in downstream mlx5 commit.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +-
 include/linux/netdevice.h                      | 4 ++--
 net/core/rtnetlink.c                           | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4a1f9d5f7c03..e0d7d5adbaee 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -857,7 +857,7 @@ mlxsw_sp_port_get_sw_stats64(const struct net_device *dev,
 	return 0;
 }
 
-static bool mlxsw_sp_port_has_offload_stats(int attr_id)
+static bool mlxsw_sp_port_has_offload_stats(const struct net_device *dev, int attr_id)
 {
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e84800edd249..ae32a27523f9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -925,7 +925,7 @@ struct netdev_xdp {
  *	3. Update dev->stats asynchronously and atomically, and define
  *	   neither operation.
  *
- * bool (*ndo_has_offload_stats)(int attr_id)
+ * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id)
  *	Return true if this device supports offload stats of this attr_id.
  *
  * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev,
@@ -1165,7 +1165,7 @@ struct net_device_ops {
 
 	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
 						     struct rtnl_link_stats64 *storage);
-	bool			(*ndo_has_offload_stats)(int attr_id);
+	bool			(*ndo_has_offload_stats)(const struct net_device *dev, int attr_id);
 	int			(*ndo_get_offload_stats)(int attr_id,
 							 const struct net_device *dev,
 							 void *attr_data);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a99917b5de33..ef8a96010816 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3671,7 +3671,7 @@ static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
 		if (!size)
 			continue;
 
-		if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+		if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
 			continue;
 
 		attr = nla_reserve_64bit(skb, attr_id, size,
@@ -3712,7 +3712,7 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev)
 
 	for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
 	     attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
-		if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+		if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
 			continue;
 		size = rtnl_get_offload_stats_attr_size(attr_id);
 		nla_size += nla_total_size_64bit(size);
-- 
cgit v1.2.3


From 34e4e99078667d30f71a50c1e5181e4270e9d8bb Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Tue, 22 Nov 2016 23:09:58 +0200
Subject: net/mlx5: Enable to query min inline for a specific vport

Also move the inline capablities enum to a shared header vport.h

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  6 ------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------
 drivers/net/ethernet/mellanox/mlx5/core/vport.c   | 14 ++++++++------
 include/linux/mlx5/vport.h                        | 10 ++++++++--
 4 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ebf5dbc85bff..a2b32ed24315 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -150,12 +150,6 @@ static inline int mlx5_max_log_rq_size(int wq_type)
 	}
 }
 
-enum {
-	MLX5E_INLINE_MODE_L2,
-	MLX5E_INLINE_MODE_VPORT_CONTEXT,
-	MLX5_INLINE_MODE_NOT_REQUIRED,
-};
-
 struct mlx5e_tx_wqe {
 	struct mlx5_wqe_ctrl_seg ctrl;
 	struct mlx5_wqe_eth_seg  eth;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8e8d809bf3fd..19403d6bf369 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -957,7 +957,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
 	sq->max_inline  = param->max_inline;
 	sq->min_inline_mode =
-		MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5E_INLINE_MODE_VPORT_CONTEXT ?
+		MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT ?
 		param->min_inline_mode : 0;
 
 	err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu));
@@ -3417,14 +3417,13 @@ static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev,
 				   u8 *min_inline_mode)
 {
 	switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
-	case MLX5E_INLINE_MODE_L2:
+	case MLX5_CAP_INLINE_MODE_L2:
 		*min_inline_mode = MLX5_INLINE_MODE_L2;
 		break;
-	case MLX5E_INLINE_MODE_VPORT_CONTEXT:
-		mlx5_query_nic_vport_min_inline(mdev,
-						min_inline_mode);
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
 		break;
-	case MLX5_INLINE_MODE_NOT_REQUIRED:
+	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
 		*min_inline_mode = MLX5_INLINE_MODE_NONE;
 		break;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 525f17af108e..269e4401c342 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -113,15 +113,17 @@ static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
 	return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 }
 
-void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
-				     u8 *min_inline_mode)
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				    u16 vport, u8 *min_inline)
 {
 	u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+	int err;
 
-	mlx5_query_nic_vport_context(mdev, 0, out, sizeof(out));
-
-	*min_inline_mode = MLX5_GET(query_nic_vport_context_out, out,
-				    nic_vport_context.min_wqe_inline_mode);
+	err = mlx5_query_nic_vport_context(mdev, vport, out, sizeof(out));
+	if (!err)
+		*min_inline = MLX5_GET(query_nic_vport_context_out, out,
+				       nic_vport_context.min_wqe_inline_mode);
+	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
 
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 451b0bde9083..ec35157ea725 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -36,6 +36,12 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/device.h>
 
+enum {
+	MLX5_CAP_INLINE_MODE_L2,
+	MLX5_CAP_INLINE_MODE_VPORT_CONTEXT,
+	MLX5_CAP_INLINE_MODE_NOT_REQUIRED,
+};
+
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
 u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				u16 vport);
@@ -43,8 +49,8 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 *addr);
-void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
-				     u8 *min_inline);
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				    u16 vport, u8 *min_inline);
 int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
-- 
cgit v1.2.3


From d245b3f9bd36f02fd641cba9931d8b4c77126e74 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 24 Nov 2016 10:57:25 +0100
Subject: gpio: simplify adding threaded interrupts

This tries to simplify the use of CONFIG_GPIOLIB_IRQCHIP when
using threaded interrupts: add a new call
gpiochip_irqchip_add_nested() to indicate that we're dealing
with a nested rather than a chained irqchip, then create a
separate gpiochip_set_nested_irqchip() to mirror
the gpiochip_set_chained_irqchip() call to connect the
parent and child interrupts.

In the nested case gpiochip_set_nested_irqchip() does nothing
more than call irq_set_parent() on each valid child interrupt,
which has little semantic effect in the kernel, but this is
probably still formally correct.

Update all drivers using nested interrupts to use
gpiochip_irqchip_add_nested() so we can now see clearly
which these users are.

The DLN2 driver can drop its specific hack with
.irq_not_threaded as we now recognize whether a chip is
threaded or not from its use of gpiochip_irqchip_add_nested()
signature rather than from inspecting .can_sleep.

We rename the .irq_parent to .irq_chained_parent since this
parent IRQ is only really kept around for the chained
interrupt handlers.

Cc: Lars Poeschel <poeschel@lemonage.de>
Cc: Octavian Purdila <octavian.purdila@intel.com>
Cc: Daniel Baluta <daniel.baluta@intel.com>
Cc: Bin Gao <bin.gao@linux.intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Ajay Thomas <ajay.thomas.david.rajamanickam@intel.com>
Cc: Semen Protsenko <semen.protsenko@globallogic.com>
Cc: Alexander Stein <alexander.stein@systec-electronic.com>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/driver.txt   | 62 ++++++++++++++++++++----------------
 drivers/gpio/gpio-adnp.c        | 10 +++---
 drivers/gpio/gpio-crystalcove.c |  4 +--
 drivers/gpio/gpio-dln2.c        |  1 -
 drivers/gpio/gpio-max732x.c     | 17 +++++-----
 drivers/gpio/gpio-mcp23s08.c    | 17 +++++-----
 drivers/gpio/gpio-pca953x.c     | 16 +++++-----
 drivers/gpio/gpio-pcf857x.c     | 11 ++++---
 drivers/gpio/gpio-stmpe.c       | 17 +++++-----
 drivers/gpio/gpio-tc3589x.c     | 17 +++++-----
 drivers/gpio/gpio-wcove.c       |  4 +--
 drivers/gpio/gpiolib.c          | 69 +++++++++++++++++++++++++++++++++--------
 include/linux/gpio/driver.h     | 32 ++++++++++++++-----
 13 files changed, 171 insertions(+), 106 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index 368d5a294d89..747c721776ed 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -175,8 +175,8 @@ The IRQ portions of the GPIO block are implemented using an irqchip, using
 the header <linux/irq.h>. So basically such a driver is utilizing two sub-
 systems simultaneously: gpio and irq.
 
-RT_FULL: GPIO driver should not use spinlock_t or any sleepable APIs
-(like PM runtime) as part of its irq_chip implementation on -RT.
+RT_FULL: a realtime compliant GPIO driver should not use spinlock_t or any
+sleepable APIs (like PM runtime) as part of its irq_chip implementation.
 - spinlock_t should be replaced with raw_spinlock_t [1].
 - If sleepable APIs have to be used, these can be done from the .irq_bus_lock()
   and .irq_bus_unlock() callbacks, as these are the only slowpath callbacks
@@ -185,33 +185,32 @@ RT_FULL: GPIO driver should not use spinlock_t or any sleepable APIs
 GPIO irqchips usually fall in one of two categories:
 
 * CHAINED GPIO irqchips: these are usually the type that is embedded on
-  an SoC. This means that there is a fast IRQ handler for the GPIOs that
+  an SoC. This means that there is a fast IRQ flow handler for the GPIOs that
   gets called in a chain from the parent IRQ handler, most typically the
-  system interrupt controller. This means the GPIO irqchip is registered
-  using irq_set_chained_handler() or the corresponding
-  gpiochip_set_chained_irqchip() helper function, and the GPIO irqchip
-  handler will be called immediately from the parent irqchip, while
-  holding the IRQs disabled. The GPIO irqchip will then end up calling
-  something like this sequence in its interrupt handler:
-
-  static irqreturn_t tc3589x_gpio_irq(int irq, void *data)
+  system interrupt controller. This means that the GPIO irqchip handler will
+  be called immediately from the parent irqchip, while holding the IRQs
+  disabled. The GPIO irqchip will then end up calling something like this
+  sequence in its interrupt handler:
+
+  static irqreturn_t foo_gpio_irq(int irq, void *data)
       chained_irq_enter(...);
       generic_handle_irq(...);
       chained_irq_exit(...);
 
   Chained GPIO irqchips typically can NOT set the .can_sleep flag on
-  struct gpio_chip, as everything happens directly in the callbacks.
+  struct gpio_chip, as everything happens directly in the callbacks: no
+  slow bus traffic like I2C can be used.
 
   RT_FULL: Note, chained IRQ handlers will not be forced threaded on -RT.
   As result, spinlock_t or any sleepable APIs (like PM runtime) can't be used
   in chained IRQ handler.
-  if required (and if it can't be converted to the nested threaded GPIO irqchip)
-  - chained IRQ handler can be converted to generic irq handler and this way
-  it will be threaded IRQ handler on -RT and hard IRQ handler on non-RT
+  If required (and if it can't be converted to the nested threaded GPIO irqchip)
+  a chained IRQ handler can be converted to generic irq handler and this way
+  it will be a threaded IRQ handler on -RT and a hard IRQ handler on non-RT
   (for example, see [3]).
   Know W/A: The generic_handle_irq() is expected to be called with IRQ disabled,
-  so IRQ core will complain if it will be called from IRQ handler which is
-  forced thread. The "fake?" raw lock can be used to W/A this problem:
+  so the IRQ core will complain if it is called from an IRQ handler which is
+  forced to a thread. The "fake?" raw lock can be used to W/A this problem:
 
 	raw_spinlock_t wa_lock;
 	static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank)
@@ -243,7 +242,7 @@ GPIO irqchips usually fall in one of two categories:
   by the driver. The hallmark of this driver is to call something like
   this in its interrupt handler:
 
-  static irqreturn_t tc3589x_gpio_irq(int irq, void *data)
+  static irqreturn_t foo_gpio_irq(int irq, void *data)
       ...
       handle_nested_irq(irq);
 
@@ -256,23 +255,31 @@ associated irqdomain and resource allocation callbacks, the gpiolib has
 some helpers that can be enabled by selecting the GPIOLIB_IRQCHIP Kconfig
 symbol:
 
-* gpiochip_irqchip_add(): adds an irqchip to a gpiochip. It will pass
+* gpiochip_irqchip_add(): adds a chained irqchip to a gpiochip. It will pass
   the struct gpio_chip* for the chip to all IRQ callbacks, so the callbacks
   need to embed the gpio_chip in its state container and obtain a pointer
   to the container using container_of().
   (See Documentation/driver-model/design-patterns.txt)
 
-  If there is a need to exclude certain GPIOs from the IRQ domain, one can
-  set .irq_need_valid_mask of the gpiochip before gpiochip_add_data() is
-  called. This allocates .irq_valid_mask with as many bits set as there are
-  GPIOs in the chip. Drivers can exclude GPIOs by clearing bits from this
-  mask. The mask must be filled in before gpiochip_irqchip_add() is called.
+* gpiochip_irqchip_add_nested(): adds a nested irqchip to a gpiochip.
+  Apart from that it works exactly like the chained irqchip.
 
 * gpiochip_set_chained_irqchip(): sets up a chained irq handler for a
   gpio_chip from a parent IRQ and passes the struct gpio_chip* as handler
   data. (Notice handler data, since the irqchip data is likely used by the
-  parent irqchip!) This is for the chained type of chip. This is also used
-  to set up a nested irqchip if NULL is passed as handler.
+  parent irqchip!).
+
+* gpiochip_set_nested_irqchip(): sets up a nested irq handler for a
+  gpio_chip from a parent IRQ. As the parent IRQ has usually been
+  explicitly requested by the driver, this does very little more than
+  mark all the child IRQs as having the other IRQ as parent.
+
+If there is a need to exclude certain GPIOs from the IRQ domain, you can
+set .irq_need_valid_mask of the gpiochip before gpiochip_add_data() is
+called. This allocates an .irq_valid_mask with as many bits set as there
+are GPIOs in the chip. Drivers can exclude GPIOs by clearing bits from this
+mask. The mask must be filled in before gpiochip_irqchip_add() or
+gpiochip_irqchip_add_nested() is called.
 
 To use the helpers please keep the following in mind:
 
@@ -323,6 +330,9 @@ When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .startup() and .shutdown() callbacks from the
 irqchip.
 
+When using the gpiolib irqchip helpers, these callback are automatically
+assigned.
+
 Real-Time compliance for GPIO IRQ chips
 ---------------------------------------
 
diff --git a/drivers/gpio/gpio-adnp.c b/drivers/gpio/gpio-adnp.c
index 8ff7b0d3eac6..7a5c0a93e1ff 100644
--- a/drivers/gpio/gpio-adnp.c
+++ b/drivers/gpio/gpio-adnp.c
@@ -468,11 +468,11 @@ static int adnp_irq_setup(struct adnp *adnp)
 		return err;
 	}
 
-	err = gpiochip_irqchip_add(chip,
-				   &adnp_irq_chip,
-				   0,
-				   handle_simple_irq,
-				   IRQ_TYPE_NONE);
+	err = gpiochip_irqchip_add_nested(chip,
+					  &adnp_irq_chip,
+					  0,
+					  handle_simple_irq,
+					  IRQ_TYPE_NONE);
 	if (err) {
 		dev_err(chip->parent,
 			"could not connect irqchip to gpiochip\n");
diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c
index 7c446d118cd6..d0022d655a09 100644
--- a/drivers/gpio/gpio-crystalcove.c
+++ b/drivers/gpio/gpio-crystalcove.c
@@ -351,8 +351,8 @@ static int crystalcove_gpio_probe(struct platform_device *pdev)
 		return retval;
 	}
 
-	gpiochip_irqchip_add(&cg->chip, &crystalcove_irqchip, 0,
-			     handle_simple_irq, IRQ_TYPE_NONE);
+	gpiochip_irqchip_add_nested(&cg->chip, &crystalcove_irqchip, 0,
+				    handle_simple_irq, IRQ_TYPE_NONE);
 
 	retval = request_threaded_irq(irq, NULL, crystalcove_gpio_irq_handler,
 				      IRQF_ONESHOT, KBUILD_MODNAME, cg);
diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c
index f7a60a441e95..5d38b08d1ee2 100644
--- a/drivers/gpio/gpio-dln2.c
+++ b/drivers/gpio/gpio-dln2.c
@@ -467,7 +467,6 @@ static int dln2_gpio_probe(struct platform_device *pdev)
 	dln2->gpio.base = -1;
 	dln2->gpio.ngpio = pins;
 	dln2->gpio.can_sleep = true;
-	dln2->gpio.irq_not_threaded = true;
 	dln2->gpio.set = dln2_gpio_set;
 	dln2->gpio.get = dln2_gpio_get;
 	dln2->gpio.request = dln2_gpio_request;
diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c
index a9aaf9d822b4..4ea4c6a1313b 100644
--- a/drivers/gpio/gpio-max732x.c
+++ b/drivers/gpio/gpio-max732x.c
@@ -520,20 +520,19 @@ static int max732x_irq_setup(struct max732x_chip *chip,
 				client->irq);
 			return ret;
 		}
-		ret =  gpiochip_irqchip_add(&chip->gpio_chip,
-					    &max732x_irq_chip,
-					    irq_base,
-					    handle_simple_irq,
-					    IRQ_TYPE_NONE);
+		ret =  gpiochip_irqchip_add_nested(&chip->gpio_chip,
+						   &max732x_irq_chip,
+						   irq_base,
+						   handle_simple_irq,
+						   IRQ_TYPE_NONE);
 		if (ret) {
 			dev_err(&client->dev,
 				"could not connect irqchip to gpiochip\n");
 			return ret;
 		}
-		gpiochip_set_chained_irqchip(&chip->gpio_chip,
-					     &max732x_irq_chip,
-					     client->irq,
-					     NULL);
+		gpiochip_set_nested_irqchip(&chip->gpio_chip,
+					    &max732x_irq_chip,
+					    client->irq);
 	}
 
 	return 0;
diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index 99d37b56c258..504550665091 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -473,21 +473,20 @@ static int mcp23s08_irq_setup(struct mcp23s08 *mcp)
 		return err;
 	}
 
-	err =  gpiochip_irqchip_add(chip,
-				    &mcp23s08_irq_chip,
-				    0,
-				    handle_simple_irq,
-				    IRQ_TYPE_NONE);
+	err =  gpiochip_irqchip_add_nested(chip,
+					   &mcp23s08_irq_chip,
+					   0,
+					   handle_simple_irq,
+					   IRQ_TYPE_NONE);
 	if (err) {
 		dev_err(chip->parent,
 			"could not connect irqchip to gpiochip: %d\n", err);
 		return err;
 	}
 
-	gpiochip_set_chained_irqchip(chip,
-				     &mcp23s08_irq_chip,
-				     mcp->irq,
-				     NULL);
+	gpiochip_set_nested_irqchip(chip,
+				    &mcp23s08_irq_chip,
+				    mcp->irq);
 
 	return 0;
 }
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index e422568e14ad..121108b6602d 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -635,20 +635,20 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 			return ret;
 		}
 
-		ret =  gpiochip_irqchip_add(&chip->gpio_chip,
-					    &pca953x_irq_chip,
-					    irq_base,
-					    handle_simple_irq,
-					    IRQ_TYPE_NONE);
+		ret =  gpiochip_irqchip_add_nested(&chip->gpio_chip,
+						   &pca953x_irq_chip,
+						   irq_base,
+						   handle_simple_irq,
+						   IRQ_TYPE_NONE);
 		if (ret) {
 			dev_err(&client->dev,
 				"could not connect irqchip to gpiochip\n");
 			return ret;
 		}
 
-		gpiochip_set_chained_irqchip(&chip->gpio_chip,
-					     &pca953x_irq_chip,
-					     client->irq, NULL);
+		gpiochip_set_nested_irqchip(&chip->gpio_chip,
+					    &pca953x_irq_chip,
+					    client->irq);
 	}
 
 	return 0;
diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c
index d168410e2338..895af42a4513 100644
--- a/drivers/gpio/gpio-pcf857x.c
+++ b/drivers/gpio/gpio-pcf857x.c
@@ -378,9 +378,10 @@ static int pcf857x_probe(struct i2c_client *client,
 
 	/* Enable irqchip if we have an interrupt */
 	if (client->irq) {
-		status = gpiochip_irqchip_add(&gpio->chip, &pcf857x_irq_chip,
-					      0, handle_level_irq,
-					      IRQ_TYPE_NONE);
+		status = gpiochip_irqchip_add_nested(&gpio->chip,
+						     &pcf857x_irq_chip,
+						     0, handle_level_irq,
+						     IRQ_TYPE_NONE);
 		if (status) {
 			dev_err(&client->dev, "cannot add irqchip\n");
 			goto fail;
@@ -393,8 +394,8 @@ static int pcf857x_probe(struct i2c_client *client,
 		if (status)
 			goto fail;
 
-		gpiochip_set_chained_irqchip(&gpio->chip, &pcf857x_irq_chip,
-					     client->irq, NULL);
+		gpiochip_set_nested_irqchip(&gpio->chip, &pcf857x_irq_chip,
+					    client->irq);
 		gpio->irq_parent = client->irq;
 	}
 
diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c
index e7d422a6b90b..e194d8ad8612 100644
--- a/drivers/gpio/gpio-stmpe.c
+++ b/drivers/gpio/gpio-stmpe.c
@@ -484,21 +484,20 @@ static int stmpe_gpio_probe(struct platform_device *pdev)
 				if (stmpe_gpio->norequest_mask & BIT(i))
 					clear_bit(i, stmpe_gpio->chip.irq_valid_mask);
 		}
-		ret =  gpiochip_irqchip_add(&stmpe_gpio->chip,
-					    &stmpe_gpio_irq_chip,
-					    0,
-					    handle_simple_irq,
-					    IRQ_TYPE_NONE);
+		ret =  gpiochip_irqchip_add_nested(&stmpe_gpio->chip,
+						   &stmpe_gpio_irq_chip,
+						   0,
+						   handle_simple_irq,
+						   IRQ_TYPE_NONE);
 		if (ret) {
 			dev_err(&pdev->dev,
 				"could not connect irqchip to gpiochip\n");
 			goto out_disable;
 		}
 
-		gpiochip_set_chained_irqchip(&stmpe_gpio->chip,
-					     &stmpe_gpio_irq_chip,
-					     irq,
-					     NULL);
+		gpiochip_set_nested_irqchip(&stmpe_gpio->chip,
+					    &stmpe_gpio_irq_chip,
+					    irq);
 	}
 
 	platform_set_drvdata(pdev, stmpe_gpio);
diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c
index 5a5a6cb00eea..f041965f1b03 100644
--- a/drivers/gpio/gpio-tc3589x.c
+++ b/drivers/gpio/gpio-tc3589x.c
@@ -337,21 +337,20 @@ static int tc3589x_gpio_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret =  gpiochip_irqchip_add(&tc3589x_gpio->chip,
-				    &tc3589x_gpio_irq_chip,
-				    0,
-				    handle_simple_irq,
-				    IRQ_TYPE_NONE);
+	ret =  gpiochip_irqchip_add_nested(&tc3589x_gpio->chip,
+					   &tc3589x_gpio_irq_chip,
+					   0,
+					   handle_simple_irq,
+					   IRQ_TYPE_NONE);
 	if (ret) {
 		dev_err(&pdev->dev,
 			"could not connect irqchip to gpiochip\n");
 		return ret;
 	}
 
-	gpiochip_set_chained_irqchip(&tc3589x_gpio->chip,
-				     &tc3589x_gpio_irq_chip,
-				     irq,
-				     NULL);
+	gpiochip_set_nested_irqchip(&tc3589x_gpio->chip,
+				    &tc3589x_gpio_irq_chip,
+				    irq);
 
 	platform_set_drvdata(pdev, tc3589x_gpio);
 
diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c
index d0ddba7a9d08..88f29601f8de 100644
--- a/drivers/gpio/gpio-wcove.c
+++ b/drivers/gpio/gpio-wcove.c
@@ -426,8 +426,8 @@ static int wcove_gpio_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = gpiochip_irqchip_add(&wg->chip, &wcove_irqchip, 0,
-			     handle_simple_irq, IRQ_TYPE_NONE);
+	ret = gpiochip_irqchip_add_nested(&wg->chip, &wcove_irqchip, 0,
+					  handle_simple_irq, IRQ_TYPE_NONE);
 	if (ret) {
 		dev_err(dev, "Failed to add irqchip: %d\n", ret);
 		return ret;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index f0fc3a0d37c8..7be4fdafb1a3 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1439,7 +1439,7 @@ static bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gpiochip,
 }
 
 /**
- * gpiochip_set_chained_irqchip() - sets a chained irqchip to a gpiochip
+ * gpiochip_set_cascaded_irqchip() - connects a cascaded irqchip to a gpiochip
  * @gpiochip: the gpiochip to set the irqchip chain to
  * @irqchip: the irqchip to chain to the gpiochip
  * @parent_irq: the irq number corresponding to the parent IRQ for this
@@ -1448,10 +1448,10 @@ static bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gpiochip,
  * coming out of the gpiochip. If the interrupt is nested rather than
  * cascaded, pass NULL in this handler argument
  */
-void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
-				  struct irq_chip *irqchip,
-				  int parent_irq,
-				  irq_flow_handler_t parent_handler)
+static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gpiochip,
+					  struct irq_chip *irqchip,
+					  int parent_irq,
+					  irq_flow_handler_t parent_handler)
 {
 	unsigned int offset;
 
@@ -1475,7 +1475,7 @@ void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
 		irq_set_chained_handler_and_data(parent_irq, parent_handler,
 						 gpiochip);
 
-		gpiochip->irq_parent = parent_irq;
+		gpiochip->irq_chained_parent = parent_irq;
 	}
 
 	/* Set the parent IRQ for all affected IRQs */
@@ -1486,8 +1486,47 @@ void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
 			       parent_irq);
 	}
 }
+
+/**
+ * gpiochip_set_chained_irqchip() - connects a chained irqchip to a gpiochip
+ * @gpiochip: the gpiochip to set the irqchip chain to
+ * @irqchip: the irqchip to chain to the gpiochip
+ * @parent_irq: the irq number corresponding to the parent IRQ for this
+ * chained irqchip
+ * @parent_handler: the parent interrupt handler for the accumulated IRQ
+ * coming out of the gpiochip. If the interrupt is nested rather than
+ * cascaded, pass NULL in this handler argument
+ */
+void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
+				  struct irq_chip *irqchip,
+				  int parent_irq,
+				  irq_flow_handler_t parent_handler)
+{
+	gpiochip_set_cascaded_irqchip(gpiochip, irqchip, parent_irq,
+				      parent_handler);
+}
 EXPORT_SYMBOL_GPL(gpiochip_set_chained_irqchip);
 
+/**
+ * gpiochip_set_nested_irqchip() - connects a nested irqchip to a gpiochip
+ * @gpiochip: the gpiochip to set the irqchip nested handler to
+ * @irqchip: the irqchip to nest to the gpiochip
+ * @parent_irq: the irq number corresponding to the parent IRQ for this
+ * nested irqchip
+ */
+void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip,
+				 struct irq_chip *irqchip,
+				 int parent_irq)
+{
+	if (!gpiochip->irq_nested) {
+		chip_err(gpiochip, "tried to nest a chained gpiochip\n");
+		return;
+	}
+	gpiochip_set_cascaded_irqchip(gpiochip, irqchip, parent_irq,
+				      NULL);
+}
+EXPORT_SYMBOL_GPL(gpiochip_set_nested_irqchip);
+
 /**
  * gpiochip_irq_map() - maps an IRQ into a GPIO irqchip
  * @d: the irqdomain used by this irqchip
@@ -1510,8 +1549,8 @@ static int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
 	 */
 	irq_set_lockdep_class(irq, chip->lock_key);
 	irq_set_chip_and_handler(irq, chip->irqchip, chip->irq_handler);
-	/* Chips that can sleep need nested thread handlers */
-	if (chip->can_sleep && !chip->irq_not_threaded)
+	/* Chips that use nested thread handlers have them marked */
+	if (chip->irq_nested)
 		irq_set_nested_thread(irq, 1);
 	irq_set_noprobe(irq);
 
@@ -1529,7 +1568,7 @@ static void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq)
 {
 	struct gpio_chip *chip = d->host_data;
 
-	if (chip->can_sleep)
+	if (chip->irq_nested)
 		irq_set_nested_thread(irq, 0);
 	irq_set_chip_and_handler(irq, NULL, NULL);
 	irq_set_chip_data(irq, NULL);
@@ -1584,9 +1623,9 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip)
 
 	acpi_gpiochip_free_interrupts(gpiochip);
 
-	if (gpiochip->irq_parent) {
-		irq_set_chained_handler(gpiochip->irq_parent, NULL);
-		irq_set_handler_data(gpiochip->irq_parent, NULL);
+	if (gpiochip->irq_chained_parent) {
+		irq_set_chained_handler(gpiochip->irq_chained_parent, NULL);
+		irq_set_handler_data(gpiochip->irq_chained_parent, NULL);
 	}
 
 	/* Remove all IRQ mappings and delete the domain */
@@ -1610,7 +1649,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip)
 }
 
 /**
- * gpiochip_irqchip_add() - adds an irqchip to a gpiochip
+ * _gpiochip_irqchip_add() - adds an irqchip to a gpiochip
  * @gpiochip: the gpiochip to add the irqchip to
  * @irqchip: the irqchip to add to the gpiochip
  * @first_irq: if not dynamically assigned, the base (first) IRQ to
@@ -1618,6 +1657,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip)
  * @handler: the irq handler to use (often a predefined irq core function)
  * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE
  * to have the core avoid setting up any default type in the hardware.
+ * @nested: whether this is a nested irqchip calling handle_nested_irq()
+ * in its IRQ handler
  * @lock_key: lockdep class
  *
  * This function closely associates a certain irqchip with a certain
@@ -1639,6 +1680,7 @@ int _gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 			  unsigned int first_irq,
 			  irq_flow_handler_t handler,
 			  unsigned int type,
+			  bool nested,
 			  struct lock_class_key *lock_key)
 {
 	struct device_node *of_node;
@@ -1653,6 +1695,7 @@ int _gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 		pr_err("missing gpiochip .dev parent pointer\n");
 		return -EINVAL;
 	}
+	gpiochip->irq_nested = nested;
 	of_node = gpiochip->parent->of_node;
 #ifdef CONFIG_OF_GPIO
 	/*
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 24e2cc56beb1..4b20238e7570 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -82,8 +82,6 @@ enum single_ended_mode {
  *	implies that if the chip supports IRQs, these IRQs need to be threaded
  *	as the chip access may sleep when e.g. reading out the IRQ status
  *	registers.
- * @irq_not_threaded: flag must be set if @can_sleep is set but the
- *	IRQs don't need to be threaded
  * @read_reg: reader function for generic GPIO
  * @write_reg: writer function for generic GPIO
  * @pin2mask: some generic GPIO controllers work with the big-endian bits
@@ -109,8 +107,10 @@ enum single_ended_mode {
  *	for GPIO IRQs, provided by GPIO driver
  * @irq_default_type: default IRQ triggering type applied during GPIO driver
  *	initialization, provided by GPIO driver
- * @irq_parent: GPIO IRQ chip parent/bank linux irq number,
- *	provided by GPIO driver
+ * @irq_chained_parent: GPIO IRQ chip parent/bank linux irq number,
+ *	provided by GPIO driver for chained interrupt (not for nested
+ *	interrupts).
+ * @irq_nested: True if set the interrupt handling is nested.
  * @irq_need_valid_mask: If set core allocates @irq_valid_mask with all
  *	bits set to one
  * @irq_valid_mask: If not %NULL holds bitmask of GPIOs which are valid to
@@ -166,7 +166,6 @@ struct gpio_chip {
 	u16			ngpio;
 	const char		*const *names;
 	bool			can_sleep;
-	bool			irq_not_threaded;
 
 #if IS_ENABLED(CONFIG_GPIO_GENERIC)
 	unsigned long (*read_reg)(void __iomem *reg);
@@ -192,7 +191,8 @@ struct gpio_chip {
 	unsigned int		irq_base;
 	irq_flow_handler_t	irq_handler;
 	unsigned int		irq_default_type;
-	int			irq_parent;
+	int			irq_chained_parent;
+	bool			irq_nested;
 	bool			irq_need_valid_mask;
 	unsigned long		*irq_valid_mask;
 	struct lock_class_key	*lock_key;
@@ -270,24 +270,40 @@ void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
 		int parent_irq,
 		irq_flow_handler_t parent_handler);
 
+void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip,
+		struct irq_chip *irqchip,
+		int parent_irq);
+
 int _gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 			  struct irq_chip *irqchip,
 			  unsigned int first_irq,
 			  irq_flow_handler_t handler,
 			  unsigned int type,
+			  bool nested,
 			  struct lock_class_key *lock_key);
 
+/* FIXME: I assume threaded IRQchips do not have the lockdep problem */
+static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
+			  struct irq_chip *irqchip,
+			  unsigned int first_irq,
+			  irq_flow_handler_t handler,
+			  unsigned int type)
+{
+	return _gpiochip_irqchip_add(gpiochip, irqchip, first_irq,
+				     handler, type, true, NULL);
+}
+
 #ifdef CONFIG_LOCKDEP
 #define gpiochip_irqchip_add(...)				\
 (								\
 	({							\
 		static struct lock_class_key _key;		\
-		_gpiochip_irqchip_add(__VA_ARGS__, &_key);	\
+		_gpiochip_irqchip_add(__VA_ARGS__, false, &_key); \
 	})							\
 )
 #else
 #define gpiochip_irqchip_add(...)				\
-	_gpiochip_irqchip_add(__VA_ARGS__, NULL)
+	_gpiochip_irqchip_add(__VA_ARGS__, false, NULL)
 #endif
 
 #endif /* CONFIG_GPIOLIB_IRQCHIP */
-- 
cgit v1.2.3


From 3c62be17d4f562f43fe1d03b48194399caa35aa5 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 6 Oct 2016 19:02:05 -0700
Subject: f2fs: support multiple devices

This patch implements multiple devices support for f2fs.
Given multiple devices by mkfs.f2fs, f2fs shows them entirely as one big
volume under one f2fs instance.

Internal block management is very simple, but we will modify block allocation
and background GC policy to boost IO speed by exploiting them accoording to
each device speed.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c          |  55 ++++++++++++++++---
 fs/f2fs/f2fs.h          |  29 ++++++++--
 fs/f2fs/segment.c       | 119 +++++++++++++++++++++++++++++------------
 fs/f2fs/super.c         | 138 ++++++++++++++++++++++++++++++++++++++----------
 include/linux/f2fs_fs.h |  10 +++-
 5 files changed, 277 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 996b9aef94ce..861173f00459 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -87,6 +87,46 @@ static void f2fs_write_end_io(struct bio *bio)
 	bio_put(bio);
 }
 
+/*
+ * Return true, if pre_bio's bdev is same as its target device.
+ */
+struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
+				block_t blk_addr, struct bio *bio)
+{
+	struct block_device *bdev = sbi->sb->s_bdev;
+	int i;
+
+	for (i = 0; i < sbi->s_ndevs; i++) {
+		if (FDEV(i).start_blk <= blk_addr &&
+					FDEV(i).end_blk >= blk_addr) {
+			blk_addr -= FDEV(i).start_blk;
+			bdev = FDEV(i).bdev;
+			break;
+		}
+	}
+	if (bio) {
+		bio->bi_bdev = bdev;
+		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+	}
+	return bdev;
+}
+
+int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+	int i;
+
+	for (i = 0; i < sbi->s_ndevs; i++)
+		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
+			return i;
+	return 0;
+}
+
+static bool __same_bdev(struct f2fs_sb_info *sbi,
+				block_t blk_addr, struct bio *bio)
+{
+	return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
+}
+
 /*
  * Low-level block read/write IO operations.
  */
@@ -97,8 +137,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
 
 	bio = f2fs_bio_alloc(npages);
 
-	bio->bi_bdev = sbi->sb->s_bdev;
-	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+	f2fs_target_device(sbi, blk_addr, bio);
 	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
 	bio->bi_private = is_read ? NULL : sbi;
 
@@ -273,7 +312,8 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 	down_write(&io->io_rwsem);
 
 	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
-	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags)))
+	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
+			!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
 		__submit_merged_bio(io);
 alloc_new:
 	if (io->bio == NULL) {
@@ -961,7 +1001,6 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct fscrypt_ctx *ctx = NULL;
-	struct block_device *bdev = sbi->sb->s_bdev;
 	struct bio *bio;
 
 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
@@ -979,8 +1018,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
 			fscrypt_release_ctx(ctx);
 		return ERR_PTR(-ENOMEM);
 	}
-	bio->bi_bdev = bdev;
-	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
+	f2fs_target_device(sbi, blkaddr, bio);
 	bio->bi_end_io = f2fs_read_end_io;
 	bio->bi_private = ctx;
 
@@ -1075,7 +1113,8 @@ got_it:
 		 * This page will go to BIO.  Do we need to send this
 		 * BIO off first?
 		 */
-		if (bio && (last_block_in_bio != block_nr - 1)) {
+		if (bio && (last_block_in_bio != block_nr - 1 ||
+			!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
 submit_and_realloc:
 			__submit_bio(F2FS_I_SB(inode), bio, DATA);
 			bio = NULL;
@@ -1734,6 +1773,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 		return 0;
 	if (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS))
 		return 0;
+	if (F2FS_I_SB(inode)->s_ndevs)
+		return 0;
 
 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0496e82834d0..96221b1f620e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -709,6 +709,20 @@ struct f2fs_bio_info {
 	struct rw_semaphore io_rwsem;	/* blocking op for bio */
 };
 
+#define FDEV(i)				(sbi->devs[i])
+#define RDEV(i)				(raw_super->devs[i])
+struct f2fs_dev_info {
+	struct block_device *bdev;
+	char path[MAX_PATH_LEN];
+	unsigned int total_segments;
+	block_t start_blk;
+	block_t end_blk;
+#ifdef CONFIG_BLK_DEV_ZONED
+	unsigned int nr_blkz;			/* Total number of zones */
+	u8 *blkz_type;				/* Array of zones type */
+#endif
+};
+
 enum inode_type {
 	DIR_INODE,			/* for dirty dir inode */
 	FILE_INODE,			/* for dirty regular/symlink inode */
@@ -757,10 +771,8 @@ struct f2fs_sb_info {
 #endif
 
 #ifdef CONFIG_BLK_DEV_ZONED
-	unsigned int nr_blkz;			/* Total number of zones */
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
 	unsigned int log_blocks_per_blkz;	/* log2 F2FS blocks per zone */
-	u8 *blkz_type;				/* Array of zones type */
 #endif
 
 	/* for node-related operations */
@@ -876,6 +888,8 @@ struct f2fs_sb_info {
 
 	/* For shrinker support */
 	struct list_head s_list;
+	int s_ndevs;				/* number of devices */
+	struct f2fs_dev_info *devs;		/* for device list */
 	struct mutex umount_mutex;
 	unsigned int shrinker_run_no;
 
@@ -2138,6 +2152,9 @@ void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
 void f2fs_flush_merged_bios(struct f2fs_sb_info *);
 int f2fs_submit_page_bio(struct f2fs_io_info *);
 void f2fs_submit_page_mbio(struct f2fs_io_info *);
+struct block_device *f2fs_target_device(struct f2fs_sb_info *,
+				block_t, struct bio *);
+int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
 void set_data_blkaddr(struct dnode_of_data *);
 void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
 int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
@@ -2425,11 +2442,15 @@ static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb)
 
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline int get_blkz_type(struct f2fs_sb_info *sbi,
-				block_t blkaddr)
+			struct block_device *bdev, block_t blkaddr)
 {
 	unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
+	int i;
 
-	return sbi->blkz_type[zno];
+	for (i = 0; i < sbi->s_ndevs; i++)
+		if (FDEV(i).bdev == bdev)
+			return FDEV(i).blkz_type[zno];
+	return -EINVAL;
 }
 #endif
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4cc589a493e3..1869bc3973d5 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -403,6 +403,33 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 	}
 }
 
+static int __submit_flush_wait(struct block_device *bdev)
+{
+	struct bio *bio = f2fs_bio_alloc(0);
+	int ret;
+
+	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+	bio->bi_bdev = bdev;
+	ret = submit_bio_wait(bio);
+	bio_put(bio);
+	return ret;
+}
+
+static int submit_flush_wait(struct f2fs_sb_info *sbi)
+{
+	int ret = __submit_flush_wait(sbi->sb->s_bdev);
+	int i;
+
+	if (sbi->s_ndevs && !ret) {
+		for (i = 1; i < sbi->s_ndevs; i++) {
+			ret = __submit_flush_wait(FDEV(i).bdev);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
 static int issue_flush_thread(void *data)
 {
 	struct f2fs_sb_info *sbi = data;
@@ -413,25 +440,18 @@ repeat:
 		return 0;
 
 	if (!llist_empty(&fcc->issue_list)) {
-		struct bio *bio;
 		struct flush_cmd *cmd, *next;
 		int ret;
 
-		bio = f2fs_bio_alloc(0);
-
 		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
 		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
 
-		bio->bi_bdev = sbi->sb->s_bdev;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
-		ret = submit_bio_wait(bio);
-
+		ret = submit_flush_wait(sbi);
 		llist_for_each_entry_safe(cmd, next,
 					  fcc->dispatch_list, llnode) {
 			cmd->ret = ret;
 			complete(&cmd->wait);
 		}
-		bio_put(bio);
 		fcc->dispatch_list = NULL;
 	}
 
@@ -452,15 +472,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
 		return 0;
 
 	if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
-		struct bio *bio = f2fs_bio_alloc(0);
 		int ret;
 
 		atomic_inc(&fcc->submit_flush);
-		bio->bi_bdev = sbi->sb->s_bdev;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
-		ret = submit_bio_wait(bio);
+		ret = submit_flush_wait(sbi);
 		atomic_dec(&fcc->submit_flush);
-		bio_put(bio);
 		return ret;
 	}
 
@@ -637,14 +653,18 @@ static void f2fs_submit_bio_wait_endio(struct bio *bio)
 
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
-				block_t blkstart, block_t blklen)
+		struct block_device *bdev, block_t blkstart, block_t blklen)
 {
-	struct block_device *bdev = sbi->sb->s_bdev;
 	struct bio *bio = NULL;
 	int err;
 
 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
 
+	if (sbi->s_ndevs) {
+		int devi = f2fs_target_device_index(sbi, blkstart);
+
+		blkstart -= FDEV(devi).start_blk;
+	}
 	err = __blkdev_issue_discard(bdev,
 				SECTOR_FROM_BLOCK(blkstart),
 				SECTOR_FROM_BLOCK(blklen),
@@ -662,18 +682,24 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
-static int f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
-					block_t blkstart, block_t blklen)
+static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
+		struct block_device *bdev, block_t blkstart, block_t blklen)
 {
-	sector_t sector = SECTOR_FROM_BLOCK(blkstart);
 	sector_t nr_sects = SECTOR_FROM_BLOCK(blklen);
-	struct block_device *bdev = sbi->sb->s_bdev;
+	sector_t sector;
+	int devi = 0;
 
-	if (nr_sects != bdev_zone_size(bdev)) {
+	if (sbi->s_ndevs) {
+		devi = f2fs_target_device_index(sbi, blkstart);
+		blkstart -= FDEV(devi).start_blk;
+	}
+	sector = SECTOR_FROM_BLOCK(blkstart);
+
+	if (sector % bdev_zone_size(bdev) || nr_sects != bdev_zone_size(bdev)) {
 		f2fs_msg(sbi->sb, KERN_INFO,
-			 "Unaligned discard attempted (sector %llu + %llu)",
-			 (unsigned long long)sector,
-			 (unsigned long long)nr_sects);
+			"(%d) %s: Unaligned discard attempted (block %x + %x)",
+			devi, sbi->s_ndevs ? FDEV(devi).path: "",
+			blkstart, blklen);
 		return -EIO;
 	}
 
@@ -682,14 +708,12 @@ static int f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 	 * use regular discard if the drive supports it. For sequential
 	 * zones, reset the zone write pointer.
 	 */
-	switch (get_blkz_type(sbi, blkstart)) {
+	switch (get_blkz_type(sbi, bdev, blkstart)) {
 
 	case BLK_ZONE_TYPE_CONVENTIONAL:
 		if (!blk_queue_discard(bdev_get_queue(bdev)))
 			return 0;
-		return __f2fs_issue_discard_async(sbi, blkstart,
-						  blklen);
-
+		return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
 		trace_f2fs_issue_reset_zone(sbi->sb, blkstart);
@@ -702,14 +726,45 @@ static int f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 }
 #endif
 
+static int __issue_discard_async(struct f2fs_sb_info *sbi,
+		struct block_device *bdev, block_t blkstart, block_t blklen)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+	if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
+				bdev_zoned_model(bdev) != BLK_ZONED_NONE)
+		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
+#endif
+	return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
+}
+
 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
 				block_t blkstart, block_t blklen)
 {
+	sector_t start = blkstart, len = 0;
+	struct block_device *bdev;
 	struct seg_entry *se;
 	unsigned int offset;
 	block_t i;
+	int err = 0;
+
+	bdev = f2fs_target_device(sbi, blkstart, NULL);
+
+	for (i = blkstart; i < blkstart + blklen; i++, len++) {
+		if (i != start) {
+			struct block_device *bdev2 =
+				f2fs_target_device(sbi, i, NULL);
+
+			if (bdev2 != bdev) {
+				err = __issue_discard_async(sbi, bdev,
+						start, len);
+				if (err)
+					return err;
+				bdev = bdev2;
+				start = i;
+				len = 0;
+			}
+		}
 
-	for (i = blkstart; i < blkstart + blklen; i++) {
 		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
 		offset = GET_BLKOFF_FROM_SEG0(sbi, i);
 
@@ -717,11 +772,9 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
 			sbi->discard_blks--;
 	}
 
-#ifdef CONFIG_BLK_DEV_ZONED
-	if (f2fs_sb_mounted_blkzoned(sbi->sb))
-		return f2fs_issue_discard_zone(sbi, blkstart, blklen);
-#endif
-	return __f2fs_issue_discard_async(sbi, blkstart, blklen);
+	if (len)
+		err = __issue_discard_async(sbi, bdev, start, len);
+	return err;
 }
 
 static void __add_discard_entry(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index eca9aea9834e..4ccbb860ad06 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -713,6 +713,19 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi)
 	percpu_counter_destroy(&sbi->total_valid_inode_count);
 }
 
+static void destroy_device_list(struct f2fs_sb_info *sbi)
+{
+	int i;
+
+	for (i = 0; i < sbi->s_ndevs; i++) {
+		blkdev_put(FDEV(i).bdev, FMODE_EXCL);
+#ifdef CONFIG_BLK_DEV_ZONED
+		kfree(FDEV(i).blkz_type);
+#endif
+	}
+	kfree(sbi->devs);
+}
+
 static void f2fs_put_super(struct super_block *sb)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -773,6 +786,8 @@ static void f2fs_put_super(struct super_block *sb)
 		crypto_free_shash(sbi->s_chksum_driver);
 	kfree(sbi->raw_super);
 
+	destroy_device_list(sbi);
+
 	destroy_percpu_info(sbi);
 	kfree(sbi);
 }
@@ -1516,9 +1531,9 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
-static int init_blkz_info(struct f2fs_sb_info *sbi)
+static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
 {
-	struct block_device *bdev = sbi->sb->s_bdev;
+	struct block_device *bdev = FDEV(devi).bdev;
 	sector_t nr_sectors = bdev->bd_part->nr_sects;
 	sector_t sector = 0;
 	struct blk_zone *zones;
@@ -1529,15 +1544,21 @@ static int init_blkz_info(struct f2fs_sb_info *sbi)
 	if (!f2fs_sb_mounted_blkzoned(sbi->sb))
 		return 0;
 
+	if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
+				SECTOR_TO_BLOCK(bdev_zone_size(bdev)))
+		return -EINVAL;
 	sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
+	if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
+				__ilog2_u32(sbi->blocks_per_blkz))
+		return -EINVAL;
 	sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
-	sbi->nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
-		sbi->log_blocks_per_blkz;
+	FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
+					sbi->log_blocks_per_blkz;
 	if (nr_sectors & (bdev_zone_size(bdev) - 1))
-		sbi->nr_blkz++;
+		FDEV(devi).nr_blkz++;
 
-	sbi->blkz_type = kmalloc(sbi->nr_blkz, GFP_KERNEL);
-	if (!sbi->blkz_type)
+	FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
+	if (!FDEV(devi).blkz_type)
 		return -ENOMEM;
 
 #define F2FS_REPORT_NR_ZONES   4096
@@ -1562,7 +1583,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi)
 		}
 
 		for (i = 0; i < nr_zones; i++) {
-			sbi->blkz_type[n] = zones[i].type;
+			FDEV(devi).blkz_type[n] = zones[i].type;
 			sector += zones[i].len;
 			n++;
 		}
@@ -1666,6 +1687,77 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 	return err;
 }
 
+static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+	int i;
+
+	for (i = 0; i < MAX_DEVICES; i++) {
+		if (!RDEV(i).path[0])
+			return 0;
+
+		if (i == 0) {
+			sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) *
+						MAX_DEVICES, GFP_KERNEL);
+			if (!sbi->devs)
+				return -ENOMEM;
+		}
+
+		memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
+		FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
+		if (i == 0) {
+			FDEV(i).start_blk = 0;
+			FDEV(i).end_blk = FDEV(i).start_blk +
+				(FDEV(i).total_segments <<
+				sbi->log_blocks_per_seg) - 1 +
+				le32_to_cpu(raw_super->segment0_blkaddr);
+		} else {
+			FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
+			FDEV(i).end_blk = FDEV(i).start_blk +
+				(FDEV(i).total_segments <<
+				sbi->log_blocks_per_seg) - 1;
+		}
+
+		FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
+					sbi->sb->s_mode, sbi->sb->s_type);
+		if (IS_ERR(FDEV(i).bdev))
+			return PTR_ERR(FDEV(i).bdev);
+
+		/* to release errored devices */
+		sbi->s_ndevs = i + 1;
+
+#ifdef CONFIG_BLK_DEV_ZONED
+		if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
+				!f2fs_sb_mounted_blkzoned(sbi->sb)) {
+			f2fs_msg(sbi->sb, KERN_ERR,
+				"Zoned block device feature not enabled\n");
+			return -EINVAL;
+		}
+		if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
+			if (init_blkz_info(sbi, i)) {
+				f2fs_msg(sbi->sb, KERN_ERR,
+					"Failed to initialize F2FS blkzone information");
+				return -EINVAL;
+			}
+			f2fs_msg(sbi->sb, KERN_INFO,
+				"Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
+				i, FDEV(i).path,
+				FDEV(i).total_segments,
+				FDEV(i).start_blk, FDEV(i).end_blk,
+				bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
+				"Host-aware" : "Host-managed");
+			continue;
+		}
+#endif
+		f2fs_msg(sbi->sb, KERN_INFO,
+			"Mount Device [%2d]: %20s, %8u, %8x - %8x",
+				i, FDEV(i).path,
+				FDEV(i).total_segments,
+				FDEV(i).start_blk, FDEV(i).end_blk);
+	}
+	return 0;
+}
+
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct f2fs_sb_info *sbi;
@@ -1724,15 +1816,7 @@ try_onemore:
 			 "Zoned block device support is not enabled\n");
 		goto free_sb_buf;
 	}
-#else
-	if (bdev_zoned_model(sb->s_bdev) == BLK_ZONED_HM &&
-	    !f2fs_sb_mounted_blkzoned(sb)) {
-		f2fs_msg(sb, KERN_ERR,
-			 "Zoned block device feature not enabled\n");
-		goto free_sb_buf;
-	}
 #endif
-
 	default_options(sbi);
 	/* parse mount options */
 	options = kstrdup((const char *)data, GFP_KERNEL);
@@ -1802,6 +1886,13 @@ try_onemore:
 		goto free_meta_inode;
 	}
 
+	/* Initialize device list */
+	err = f2fs_scan_devices(sbi);
+	if (err) {
+		f2fs_msg(sb, KERN_ERR, "Failed to find devices");
+		goto free_devices;
+	}
+
 	sbi->total_valid_node_count =
 				le32_to_cpu(sbi->ckpt->valid_node_count);
 	percpu_counter_set(&sbi->total_valid_inode_count,
@@ -1820,15 +1911,6 @@ try_onemore:
 
 	init_ino_entry_info(sbi);
 
-#ifdef CONFIG_BLK_DEV_ZONED
-	err = init_blkz_info(sbi);
-	if (err) {
-		f2fs_msg(sb, KERN_ERR,
-			"Failed to initialize F2FS blkzone information");
-		goto free_blkz;
-	}
-#endif
-
 	/* setup f2fs internal modules */
 	err = build_segment_manager(sbi);
 	if (err) {
@@ -2007,10 +2089,8 @@ free_nm:
 	destroy_node_manager(sbi);
 free_sm:
 	destroy_segment_manager(sbi);
-#ifdef CONFIG_BLK_DEV_ZONED
-free_blkz:
-	kfree(sbi->blkz_type);
-#endif
+free_devices:
+	destroy_device_list(sbi);
 	kfree(sbi->ckpt);
 free_meta_inode:
 	make_bad_inode(sbi->meta_inode);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 422630b8e588..cea41a124a80 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -52,10 +52,17 @@
 
 #define VERSION_LEN	256
 #define MAX_VOLUME_NAME		512
+#define MAX_PATH_LEN		64
+#define MAX_DEVICES		8
 
 /*
  * For superblock
  */
+struct f2fs_device {
+	__u8 path[MAX_PATH_LEN];
+	__le32 total_segments;
+} __packed;
+
 struct f2fs_super_block {
 	__le32 magic;			/* Magic Number */
 	__le16 major_ver;		/* Major Version */
@@ -94,7 +101,8 @@ struct f2fs_super_block {
 	__le32 feature;			/* defined features */
 	__u8 encryption_level;		/* versioning level for encryption */
 	__u8 encrypt_pw_salt[16];	/* Salt used for string2key algorithm */
-	__u8 reserved[871];		/* valid reserved region */
+	struct f2fs_device devs[MAX_DEVICES];	/* device list */
+	__u8 reserved[327];		/* valid reserved region */
 } __packed;
 
 /*
-- 
cgit v1.2.3


From 3007098494bec614fb55dee7bc0410bb7db5ad18 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Wed, 23 Nov 2016 16:52:26 +0100
Subject: cgroup: add support for eBPF programs

This patch adds two sets of eBPF program pointers to struct cgroup.
One for such that are directly pinned to a cgroup, and one for such
that are effective for it.

To illustrate the logic behind that, assume the following example
cgroup hierarchy.

  A - B - C
        \ D - E

If only B has a program attached, it will be effective for B, C, D
and E. If D then attaches a program itself, that will be effective for
both D and E, and the program in B will only affect B and C. Only one
program of a given type is effective for a cgroup.

Attaching and detaching programs will be done through the bpf(2)
syscall. For now, ingress and egress inet socket filtering are the
only supported use-cases.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h  |  79 +++++++++++++++++++++
 include/linux/cgroup-defs.h |   4 ++
 init/Kconfig                |  12 ++++
 kernel/bpf/Makefile         |   1 +
 kernel/bpf/cgroup.c         | 167 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/cgroup.c             |  18 +++++
 6 files changed, 281 insertions(+)
 create mode 100644 include/linux/bpf-cgroup.h
 create mode 100644 kernel/bpf/cgroup.c

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
new file mode 100644
index 000000000000..ec80d0c0953e
--- /dev/null
+++ b/include/linux/bpf-cgroup.h
@@ -0,0 +1,79 @@
+#ifndef _BPF_CGROUP_H
+#define _BPF_CGROUP_H
+
+#include <linux/bpf.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/bpf.h>
+
+struct sock;
+struct cgroup;
+struct sk_buff;
+
+#ifdef CONFIG_CGROUP_BPF
+
+extern struct static_key_false cgroup_bpf_enabled_key;
+#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+
+struct cgroup_bpf {
+	/*
+	 * Store two sets of bpf_prog pointers, one for programs that are
+	 * pinned directly to this cgroup, and one for those that are effective
+	 * when this cgroup is accessed.
+	 */
+	struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
+	struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
+};
+
+void cgroup_bpf_put(struct cgroup *cgrp);
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
+
+void __cgroup_bpf_update(struct cgroup *cgrp,
+			 struct cgroup *parent,
+			 struct bpf_prog *prog,
+			 enum bpf_attach_type type);
+
+/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
+void cgroup_bpf_update(struct cgroup *cgrp,
+		       struct bpf_prog *prog,
+		       enum bpf_attach_type type);
+
+int __cgroup_bpf_run_filter(struct sock *sk,
+			    struct sk_buff *skb,
+			    enum bpf_attach_type type);
+
+/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb)			\
+({									\
+	int __ret = 0;							\
+	if (cgroup_bpf_enabled)						\
+		__ret = __cgroup_bpf_run_filter(sk, skb,		\
+						BPF_CGROUP_INET_INGRESS); \
+									\
+	__ret;								\
+})
+
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb)				\
+({									\
+	int __ret = 0;							\
+	if (cgroup_bpf_enabled && sk && sk == skb->sk) {		\
+		typeof(sk) __sk = sk_to_full_sk(sk);			\
+		if (sk_fullsock(__sk))					\
+			__ret = __cgroup_bpf_run_filter(__sk, skb,	\
+						BPF_CGROUP_INET_EGRESS); \
+	}								\
+	__ret;								\
+})
+
+#else
+
+struct cgroup_bpf {};
+static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
+				      struct cgroup *parent) {}
+
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+
+#endif /* CONFIG_CGROUP_BPF */
+
+#endif /* _BPF_CGROUP_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5b17de62c962..861b4677fc5b 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -16,6 +16,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/workqueue.h>
+#include <linux/bpf-cgroup.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -300,6 +301,9 @@ struct cgroup {
 	/* used to schedule release agent */
 	struct work_struct release_agent_work;
 
+	/* used to store eBPF programs */
+	struct cgroup_bpf bpf;
+
 	/* ids of the ancestors at each level including self */
 	int ancestor_ids[];
 };
diff --git a/init/Kconfig b/init/Kconfig
index 34407f15e6d3..405120b5f13e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1154,6 +1154,18 @@ config CGROUP_PERF
 
 	  Say N if unsure.
 
+config CGROUP_BPF
+	bool "Support for eBPF programs attached to cgroups"
+	depends on BPF_SYSCALL && SOCK_CGROUP_DATA
+	help
+	  Allow attaching eBPF programs to a cgroup using the bpf(2)
+	  syscall command BPF_PROG_ATTACH.
+
+	  In which context these programs are accessed depends on the type
+	  of attachment. For instance, programs that are attached using
+	  BPF_CGROUP_INET_INGRESS will be executed on the ingress path of
+	  inet sockets.
+
 config CGROUP_DEBUG
 	bool "Example controller"
 	default n
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index c4d89d6e2058..1276474ac3cd 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
+obj-$(CONFIG_CGROUP_BPF) += cgroup.o
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
new file mode 100644
index 000000000000..a0ab43f264b0
--- /dev/null
+++ b/kernel/bpf/cgroup.c
@@ -0,0 +1,167 @@
+/*
+ * Functions to manage eBPF programs attached to cgroups
+ *
+ * Copyright (c) 2016 Daniel Mack
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License.  See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/bpf-cgroup.h>
+#include <net/sock.h>
+
+DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
+EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+
+/**
+ * cgroup_bpf_put() - put references of all bpf programs
+ * @cgrp: the cgroup to modify
+ */
+void cgroup_bpf_put(struct cgroup *cgrp)
+{
+	unsigned int type;
+
+	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
+		struct bpf_prog *prog = cgrp->bpf.prog[type];
+
+		if (prog) {
+			bpf_prog_put(prog);
+			static_branch_dec(&cgroup_bpf_enabled_key);
+		}
+	}
+}
+
+/**
+ * cgroup_bpf_inherit() - inherit effective programs from parent
+ * @cgrp: the cgroup to modify
+ * @parent: the parent to inherit from
+ */
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
+{
+	unsigned int type;
+
+	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
+		struct bpf_prog *e;
+
+		e = rcu_dereference_protected(parent->bpf.effective[type],
+					      lockdep_is_held(&cgroup_mutex));
+		rcu_assign_pointer(cgrp->bpf.effective[type], e);
+	}
+}
+
+/**
+ * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
+ *                         propagate the change to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
+ * @prog: A new program to pin
+ * @type: Type of pinning operation (ingress/egress)
+ *
+ * Each cgroup has a set of two pointers for bpf programs; one for eBPF
+ * programs it owns, and which is effective for execution.
+ *
+ * If @prog is %NULL, this function attaches a new program to the cgroup and
+ * releases the one that is currently attached, if any. @prog is then made
+ * the effective program of type @type in that cgroup.
+ *
+ * If @prog is %NULL, the currently attached program of type @type is released,
+ * and the effective program of the parent cgroup (if any) is inherited to
+ * @cgrp.
+ *
+ * Then, the descendants of @cgrp are walked and the effective program for
+ * each of them is set to the effective program of @cgrp unless the
+ * descendant has its own program attached, in which case the subbranch is
+ * skipped. This ensures that delegated subcgroups with own programs are left
+ * untouched.
+ *
+ * Must be called with cgroup_mutex held.
+ */
+void __cgroup_bpf_update(struct cgroup *cgrp,
+			 struct cgroup *parent,
+			 struct bpf_prog *prog,
+			 enum bpf_attach_type type)
+{
+	struct bpf_prog *old_prog, *effective;
+	struct cgroup_subsys_state *pos;
+
+	old_prog = xchg(cgrp->bpf.prog + type, prog);
+
+	effective = (!prog && parent) ?
+		rcu_dereference_protected(parent->bpf.effective[type],
+					  lockdep_is_held(&cgroup_mutex)) :
+		prog;
+
+	css_for_each_descendant_pre(pos, &cgrp->self) {
+		struct cgroup *desc = container_of(pos, struct cgroup, self);
+
+		/* skip the subtree if the descendant has its own program */
+		if (desc->bpf.prog[type] && desc != cgrp)
+			pos = css_rightmost_descendant(pos);
+		else
+			rcu_assign_pointer(desc->bpf.effective[type],
+					   effective);
+	}
+
+	if (prog)
+		static_branch_inc(&cgroup_bpf_enabled_key);
+
+	if (old_prog) {
+		bpf_prog_put(old_prog);
+		static_branch_dec(&cgroup_bpf_enabled_key);
+	}
+}
+
+/**
+ * __cgroup_bpf_run_filter() - Run a program for packet filtering
+ * @sk: The socken sending or receiving traffic
+ * @skb: The skb that is being sent or received
+ * @type: The type of program to be exectuted
+ *
+ * If no socket is passed, or the socket is not of type INET or INET6,
+ * this function does nothing and returns 0.
+ *
+ * The program type passed in via @type must be suitable for network
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter(struct sock *sk,
+			    struct sk_buff *skb,
+			    enum bpf_attach_type type)
+{
+	struct bpf_prog *prog;
+	struct cgroup *cgrp;
+	int ret = 0;
+
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
+	if (sk->sk_family != AF_INET &&
+	    sk->sk_family != AF_INET6)
+		return 0;
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+
+	rcu_read_lock();
+
+	prog = rcu_dereference(cgrp->bpf.effective[type]);
+	if (prog) {
+		unsigned int offset = skb->data - skb_network_header(skb);
+
+		__skb_push(skb, offset);
+		ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
+		__skb_pull(skb, offset);
+	}
+
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 85bc9beb046d..2ee9ec3051b2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5074,6 +5074,8 @@ static void css_release_work_fn(struct work_struct *work)
 		if (cgrp->kn)
 			RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
 					 NULL);
+
+		cgroup_bpf_put(cgrp);
 	}
 
 	mutex_unlock(&cgroup_mutex);
@@ -5281,6 +5283,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 	if (!cgroup_on_dfl(cgrp))
 		cgrp->subtree_control = cgroup_control(cgrp);
 
+	if (parent)
+		cgroup_bpf_inherit(cgrp, parent);
+
 	cgroup_propagate_control(cgrp);
 
 	/* @cgrp doesn't have dir yet so the following will only create csses */
@@ -6495,6 +6500,19 @@ static __init int cgroup_namespaces_init(void)
 }
 subsys_initcall(cgroup_namespaces_init);
 
+#ifdef CONFIG_CGROUP_BPF
+void cgroup_bpf_update(struct cgroup *cgrp,
+		       struct bpf_prog *prog,
+		       enum bpf_attach_type type)
+{
+	struct cgroup *parent = cgroup_parent(cgrp);
+
+	mutex_lock(&cgroup_mutex);
+	__cgroup_bpf_update(cgrp, parent, prog, type);
+	mutex_unlock(&cgroup_mutex);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
 #ifdef CONFIG_CGROUP_DEBUG
 static struct cgroup_subsys_state *
 debug_css_alloc(struct cgroup_subsys_state *parent_css)
-- 
cgit v1.2.3


From 7fd8329ba502ef76dd91db561c7aed696b2c7720 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 21 Sep 2016 13:47:22 +0200
Subject: taint/module: Clean up global and module taint flags handling

The commit 66cc69e34e86a231 ("Fix: module signature vs tracepoints:
add new TAINT_UNSIGNED_MODULE") updated module_taint_flags() to
potentially print one more character. But it did not increase the
size of the corresponding buffers in m_show() and print_modules().

We have recently done the same mistake when adding a taint flag
for livepatching, see
https://lkml.kernel.org/r/cfba2c823bb984690b73572aaae1db596b54a082.1472137475.git.jpoimboe@redhat.com

Also struct module uses an incompatible type for mod-taints flags.
It survived from the commit 2bc2d61a9638dab670d ("[PATCH] list module
taint flags in Oops/panic"). There was used "int" for the global taint
flags at these times. But only the global tain flags was later changed
to "unsigned long" by the commit 25ddbb18aae33ad2 ("Make the taint
flags reliable").

This patch defines TAINT_FLAGS_COUNT that can be used to create
arrays and buffers of the right size. Note that we could not use
enum because the taint flag indexes are used also in assembly code.

Then it reworks the table that describes the taint flags. The TAINT_*
numbers can be used as the index. Instead, we add information
if the taint flag is also shown per-module.

Finally, it uses "unsigned long", bit operations, and the updated
taint_flags table also for mod->taints.

It is not optimal because only few taint flags can be printed by
module_taint_flags(). But better be on the safe side. IMHO, it is
not worth the optimization and this is a good compromise.

Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: http://lkml.kernel.org/r/1474458442-21581-1-git-send-email-pmladek@suse.com
[jeyu@redhat.com: fix broken lkml link in changelog]
Signed-off-by: Jessica Yu <jeyu@redhat.com>
---
 include/linux/kernel.h |  9 +++++++++
 include/linux/module.h |  2 +-
 kernel/module.c        | 33 +++++++++++++------------------
 kernel/panic.c         | 53 ++++++++++++++++++++++++--------------------------
 4 files changed, 48 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index bc6ed52a39b9..441def77246d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -506,6 +506,15 @@ extern enum system_states {
 #define TAINT_UNSIGNED_MODULE		13
 #define TAINT_SOFTLOCKUP		14
 #define TAINT_LIVEPATCH			15
+#define TAINT_FLAGS_COUNT		16
+
+struct taint_flag {
+	char true;	/* character printed when tainted */
+	char false;	/* character printed when not tainted */
+	bool module;	/* also show as a per-module taint flag */
+};
+
+extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT];
 
 extern const char hex_asc[];
 #define hex_asc_lo(x)	hex_asc[((x) & 0x0f)]
diff --git a/include/linux/module.h b/include/linux/module.h
index 0c3207d26ac0..f6ee569c62bb 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -399,7 +399,7 @@ struct module {
 	/* Arch-specific module values */
 	struct mod_arch_specific arch;
 
-	unsigned int taints;	/* same bits as kernel:tainted */
+	unsigned long taints;	/* same bits as kernel:taint_flags */
 
 #ifdef CONFIG_GENERIC_BUG
 	/* Support for BUG */
diff --git a/kernel/module.c b/kernel/module.c
index f57dd63186e6..a4acd8f403ae 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -330,7 +330,7 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
 				    enum lockdep_ok lockdep_ok)
 {
 	add_taint(flag, lockdep_ok);
-	mod->taints |= (1U << flag);
+	set_bit(flag, &mod->taints);
 }
 
 /*
@@ -1138,24 +1138,13 @@ static inline int module_unload_init(struct module *mod)
 static size_t module_flags_taint(struct module *mod, char *buf)
 {
 	size_t l = 0;
+	int i;
+
+	for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
+		if (taint_flags[i].module && test_bit(i, &mod->taints))
+			buf[l++] = taint_flags[i].true;
+	}
 
-	if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE))
-		buf[l++] = 'P';
-	if (mod->taints & (1 << TAINT_OOT_MODULE))
-		buf[l++] = 'O';
-	if (mod->taints & (1 << TAINT_FORCED_MODULE))
-		buf[l++] = 'F';
-	if (mod->taints & (1 << TAINT_CRAP))
-		buf[l++] = 'C';
-	if (mod->taints & (1 << TAINT_UNSIGNED_MODULE))
-		buf[l++] = 'E';
-	if (mod->taints & (1 << TAINT_LIVEPATCH))
-		buf[l++] = 'K';
-	/*
-	 * TAINT_FORCED_RMMOD: could be added.
-	 * TAINT_CPU_OUT_OF_SPEC, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
-	 * apply to modules.
-	 */
 	return l;
 }
 
@@ -4041,6 +4030,10 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
 }
 #endif /* CONFIG_KALLSYMS */
 
+/* Maximum number of characters written by module_flags() */
+#define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)
+
+/* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
 static char *module_flags(struct module *mod, char *buf)
 {
 	int bx = 0;
@@ -4085,7 +4078,7 @@ static void m_stop(struct seq_file *m, void *p)
 static int m_show(struct seq_file *m, void *p)
 {
 	struct module *mod = list_entry(p, struct module, list);
-	char buf[8];
+	char buf[MODULE_FLAGS_BUF_SIZE];
 
 	/* We always ignore unformed modules. */
 	if (mod->state == MODULE_STATE_UNFORMED)
@@ -4256,7 +4249,7 @@ EXPORT_SYMBOL_GPL(__module_text_address);
 void print_modules(void)
 {
 	struct module *mod;
-	char buf[8];
+	char buf[MODULE_FLAGS_BUF_SIZE];
 
 	printk(KERN_DEFAULT "Modules linked in:");
 	/* Most callers should already have preempt disabled, but make sure */
diff --git a/kernel/panic.c b/kernel/panic.c
index e6480e20379e..c51edaa04fce 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -298,30 +298,27 @@ void panic(const char *fmt, ...)
 
 EXPORT_SYMBOL(panic);
 
-
-struct tnt {
-	u8	bit;
-	char	true;
-	char	false;
-};
-
-static const struct tnt tnts[] = {
-	{ TAINT_PROPRIETARY_MODULE,	'P', 'G' },
-	{ TAINT_FORCED_MODULE,		'F', ' ' },
-	{ TAINT_CPU_OUT_OF_SPEC,	'S', ' ' },
-	{ TAINT_FORCED_RMMOD,		'R', ' ' },
-	{ TAINT_MACHINE_CHECK,		'M', ' ' },
-	{ TAINT_BAD_PAGE,		'B', ' ' },
-	{ TAINT_USER,			'U', ' ' },
-	{ TAINT_DIE,			'D', ' ' },
-	{ TAINT_OVERRIDDEN_ACPI_TABLE,	'A', ' ' },
-	{ TAINT_WARN,			'W', ' ' },
-	{ TAINT_CRAP,			'C', ' ' },
-	{ TAINT_FIRMWARE_WORKAROUND,	'I', ' ' },
-	{ TAINT_OOT_MODULE,		'O', ' ' },
-	{ TAINT_UNSIGNED_MODULE,	'E', ' ' },
-	{ TAINT_SOFTLOCKUP,		'L', ' ' },
-	{ TAINT_LIVEPATCH,		'K', ' ' },
+/*
+ * TAINT_FORCED_RMMOD could be a per-module flag but the module
+ * is being removed anyway.
+ */
+const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
+	{ 'P', 'G', true },	/* TAINT_PROPRIETARY_MODULE */
+	{ 'F', ' ', true },	/* TAINT_FORCED_MODULE */
+	{ 'S', ' ', false },	/* TAINT_CPU_OUT_OF_SPEC */
+	{ 'R', ' ', false },	/* TAINT_FORCED_RMMOD */
+	{ 'M', ' ', false },	/* TAINT_MACHINE_CHECK */
+	{ 'B', ' ', false },	/* TAINT_BAD_PAGE */
+	{ 'U', ' ', false },	/* TAINT_USER */
+	{ 'D', ' ', false },	/* TAINT_DIE */
+	{ 'A', ' ', false },	/* TAINT_OVERRIDDEN_ACPI_TABLE */
+	{ 'W', ' ', false },	/* TAINT_WARN */
+	{ 'C', ' ', true },	/* TAINT_CRAP */
+	{ 'I', ' ', false },	/* TAINT_FIRMWARE_WORKAROUND */
+	{ 'O', ' ', true },	/* TAINT_OOT_MODULE */
+	{ 'E', ' ', true },	/* TAINT_UNSIGNED_MODULE */
+	{ 'L', ' ', false },	/* TAINT_SOFTLOCKUP */
+	{ 'K', ' ', true },	/* TAINT_LIVEPATCH */
 };
 
 /**
@@ -348,16 +345,16 @@ static const struct tnt tnts[] = {
  */
 const char *print_tainted(void)
 {
-	static char buf[ARRAY_SIZE(tnts) + sizeof("Tainted: ")];
+	static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")];
 
 	if (tainted_mask) {
 		char *s;
 		int i;
 
 		s = buf + sprintf(buf, "Tainted: ");
-		for (i = 0; i < ARRAY_SIZE(tnts); i++) {
-			const struct tnt *t = &tnts[i];
-			*s++ = test_bit(t->bit, &tainted_mask) ?
+		for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
+			const struct taint_flag *t = &taint_flags[i];
+			*s++ = test_bit(i, &tainted_mask) ?
 					t->true : t->false;
 		}
 		*s = 0;
-- 
cgit v1.2.3


From c714965f580accdb6715cb28285eeccea18dafdb Mon Sep 17 00:00:00 2001
From: Anson Jacob <ansonjacob.aj@gmail.com>
Date: Wed, 19 Oct 2016 19:12:18 -0400
Subject: module: remove trailing whitespace

Fix checkpatch.pl warning:
ERROR: trailing whitespace

Signed-off-by: Anson Jacob <ansonjacob.aj@gmail.com>
Signed-off-by: Jessica Yu <jeyu@redhat.com>
---
 include/linux/module.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index f6ee569c62bb..7c84273d60b9 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -412,7 +412,7 @@ struct module {
 	/* Protected by RCU and/or module_mutex: use rcu_dereference() */
 	struct mod_kallsyms *kallsyms;
 	struct mod_kallsyms core_kallsyms;
-	
+
 	/* Section attributes */
 	struct module_sect_attrs *sect_attrs;
 
-- 
cgit v1.2.3


From 5a717f4f8f2830f297b5511022481bdc27b9d576 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 24 Nov 2016 07:04:08 +0200
Subject: netdevice: fix sparse warning for HARD_TX_LOCK

sparse warns about context imbalance in any code
that uses HARD_TX_LOCK/UNLOCK - this is because it's
unable to determine that flags don't change so
lock and unlock are paired.

Seems easy enough to fix by adding __acquire/__release
calls.

With this patch af_packet.c is now sparse-clean,

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ff57cd2eba3b..4ffcd874cc20 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3462,6 +3462,17 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 	txq->xmit_lock_owner = cpu;
 }
 
+static inline bool __netif_tx_acquire(struct netdev_queue *txq)
+{
+	__acquire(&txq->_xmit_lock);
+	return true;
+}
+
+static inline void __netif_tx_release(struct netdev_queue *txq)
+{
+	__release(&txq->_xmit_lock);
+}
+
 static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 {
 	spin_lock_bh(&txq->_xmit_lock);
@@ -3563,17 +3574,21 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
 #define HARD_TX_LOCK(dev, txq, cpu) {			\
 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
 		__netif_tx_lock(txq, cpu);		\
+	} else {					\
+		__netif_tx_acquire(txq);		\
 	}						\
 }
 
 #define HARD_TX_TRYLOCK(dev, txq)			\
 	(((dev->features & NETIF_F_LLTX) == 0) ?	\
 		__netif_tx_trylock(txq) :		\
-		true )
+		__netif_tx_acquire(txq))
 
 #define HARD_TX_UNLOCK(dev, txq) {			\
 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
 		__netif_tx_unlock(txq);			\
+	} else {					\
+		__netif_tx_release(txq);		\
 	}						\
 }
 
-- 
cgit v1.2.3


From 39290b389ea2654f9190e3b48c57d27b24def83e Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Mon, 14 Nov 2016 15:15:05 +0900
Subject: module: extend 'rodata=off' boot cmdline parameter to module mappings

The current "rodata=off" parameter disables read-only kernel mappings
under CONFIG_DEBUG_RODATA:
    commit d2aa1acad22f ("mm/init: Add 'rodata=off' boot cmdline parameter
    to disable read-only kernel mappings")

This patch is a logical extension to module mappings ie. read-only mappings
at module loading can be disabled even if CONFIG_DEBUG_SET_MODULE_RONX
(mainly for debug use). Please note, however, that it only affects RO/RW
permissions, keeping NX set.

This is the first step to make CONFIG_DEBUG_SET_MODULE_RONX mandatory
(always-on) in the future as CONFIG_DEBUG_RODATA on x86 and arm64.

Suggested-by: and Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Link: http://lkml.kernel.org/r/20161114061505.15238-1-takahiro.akashi@linaro.org
Signed-off-by: Jessica Yu <jeyu@redhat.com>
---
 include/linux/init.h |  3 +++
 init/main.c          |  7 +++++--
 kernel/module.c      | 20 +++++++++++++++++---
 3 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index e30104ceb86d..885c3e6d0f9d 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -126,6 +126,9 @@ void prepare_namespace(void);
 void __init load_default_modules(void);
 int __init init_rootfs(void);
 
+#if defined(CONFIG_DEBUG_RODATA) || defined(CONFIG_DEBUG_SET_MODULE_RONX)
+extern bool rodata_enabled;
+#endif
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 #endif
diff --git a/init/main.c b/init/main.c
index 2858be732f6d..959a24242988 100644
--- a/init/main.c
+++ b/init/main.c
@@ -81,6 +81,7 @@
 #include <linux/integrity.h>
 #include <linux/proc_ns.h>
 #include <linux/io.h>
+#include <linux/cache.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -914,14 +915,16 @@ static int try_to_run_init_process(const char *init_filename)
 
 static noinline void __init kernel_init_freeable(void);
 
-#ifdef CONFIG_DEBUG_RODATA
-static bool rodata_enabled = true;
+#if defined(CONFIG_DEBUG_RODATA) || defined(CONFIG_SET_MODULE_RONX)
+bool rodata_enabled __ro_after_init = true;
 static int __init set_debug_rodata(char *str)
 {
 	return strtobool(str, &rodata_enabled);
 }
 __setup("rodata=", set_debug_rodata);
+#endif
 
+#ifdef CONFIG_DEBUG_RODATA
 static void mark_readonly(void)
 {
 	if (rodata_enabled)
diff --git a/kernel/module.c b/kernel/module.c
index 6281c70683d3..039ce82803f7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1902,6 +1902,9 @@ static void frob_writable_data(const struct module_layout *layout,
 /* livepatching wants to disable read-only so it can frob module. */
 void module_disable_ro(const struct module *mod)
 {
+	if (!rodata_enabled)
+		return;
+
 	frob_text(&mod->core_layout, set_memory_rw);
 	frob_rodata(&mod->core_layout, set_memory_rw);
 	frob_ro_after_init(&mod->core_layout, set_memory_rw);
@@ -1911,6 +1914,9 @@ void module_disable_ro(const struct module *mod)
 
 void module_enable_ro(const struct module *mod, bool after_init)
 {
+	if (!rodata_enabled)
+		return;
+
 	frob_text(&mod->core_layout, set_memory_ro);
 	frob_rodata(&mod->core_layout, set_memory_ro);
 	frob_text(&mod->init_layout, set_memory_ro);
@@ -1943,6 +1949,9 @@ void set_all_modules_text_rw(void)
 {
 	struct module *mod;
 
+	if (!rodata_enabled)
+		return;
+
 	mutex_lock(&module_mutex);
 	list_for_each_entry_rcu(mod, &modules, list) {
 		if (mod->state == MODULE_STATE_UNFORMED)
@@ -1959,6 +1968,9 @@ void set_all_modules_text_ro(void)
 {
 	struct module *mod;
 
+	if (!rodata_enabled)
+		return;
+
 	mutex_lock(&module_mutex);
 	list_for_each_entry_rcu(mod, &modules, list) {
 		/*
@@ -1978,10 +1990,12 @@ void set_all_modules_text_ro(void)
 
 static void disable_ro_nx(const struct module_layout *layout)
 {
-	frob_text(layout, set_memory_rw);
-	frob_rodata(layout, set_memory_rw);
+	if (rodata_enabled) {
+		frob_text(layout, set_memory_rw);
+		frob_rodata(layout, set_memory_rw);
+		frob_ro_after_init(layout, set_memory_rw);
+	}
 	frob_rodata(layout, set_memory_x);
-	frob_ro_after_init(layout, set_memory_rw);
 	frob_ro_after_init(layout, set_memory_x);
 	frob_writable_data(layout, set_memory_x);
 }
-- 
cgit v1.2.3


From ec76d819d27040e418801d1a57bd3bdfde51019e Mon Sep 17 00:00:00 2001
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Date: Fri, 14 Oct 2016 11:53:19 +1100
Subject: KVM: Export kvm module parameter variables

The kvm module has the parameters halt_poll_ns, halt_poll_ns_grow, and
halt_poll_ns_shrink. Halt polling was recently added to the powerpc kvm-hv
module and these parameters were essentially duplicated for that. There is
no benefit to this duplication and it can lead to confusion when trying to
tune halt polling.

Thus move the definition of these variables to kvm_host.h and export them.
This will allow the kvm-hv module to use the same module parameters by
accessing these variables, which will be implemented in the next patch,
meaning that they will no longer be duplicated.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 include/linux/kvm_host.h | 4 ++++
 virt/kvm/kvm_main.c      | 9 ++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01c0b9cc3915..29b500a857d1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1107,6 +1107,10 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
 extern bool kvm_rebooting;
 
+extern unsigned int halt_poll_ns;
+extern unsigned int halt_poll_ns_grow;
+extern unsigned int halt_poll_ns_shrink;
+
 struct kvm_device {
 	struct kvm_device_ops *ops;
 	struct kvm *kvm;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fbf04c0c898c..9831cdf35436 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,16 +70,19 @@ MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
 /* Architectures should define their poll value according to the halt latency */
-static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
+unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
 module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(halt_poll_ns);
 
 /* Default doubles per-vcpu halt_poll_ns. */
-static unsigned int halt_poll_ns_grow = 2;
+unsigned int halt_poll_ns_grow = 2;
 module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
 
 /* Default resets per-vcpu halt_poll_ns . */
-static unsigned int halt_poll_ns_shrink;
+unsigned int halt_poll_ns_shrink;
 module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
 
 /*
  * Ordering of locks:
-- 
cgit v1.2.3


From 88575199cc65de99a156888629a68180c830eff2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 26 Nov 2016 01:28:04 +0100
Subject: bpf: drop unnecessary context cast from BPF_PROG_RUN

Since long already bpf_func is not only about struct sk_buff * as
input anymore. Make it generic as void *, so that callers don't
need to cast for it each time they call BPF_PROG_RUN().

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +-
 include/linux/filter.h                              | 6 +++---
 kernel/events/core.c                                | 2 +-
 kernel/seccomp.c                                    | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index eb3715700c95..876ab3a92ad5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1518,7 +1518,7 @@ static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len)
 	xdp.data = data;
 	xdp.data_end = data + len;
 
-	return BPF_PROG_RUN(prog, (void *)&xdp);
+	return BPF_PROG_RUN(prog, &xdp);
 }
 
 /**
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1f09c521adfe..7f246a281435 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -408,8 +408,8 @@ struct bpf_prog {
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
-	unsigned int		(*bpf_func)(const struct sk_buff *skb,
-					    const struct bpf_insn *filter);
+	unsigned int		(*bpf_func)(const void *ctx,
+					    const struct bpf_insn *insn);
 	/* Instructions for interpreter */
 	union {
 		struct sock_filter	insns[0];
@@ -504,7 +504,7 @@ static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
 	u32 ret;
 
 	rcu_read_lock();
-	ret = BPF_PROG_RUN(prog, (void *)xdp);
+	ret = BPF_PROG_RUN(prog, xdp);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6ee1febdf6ff..22cc734aa1b2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7726,7 +7726,7 @@ static void bpf_overflow_handler(struct perf_event *event,
 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
 		goto out;
 	rcu_read_lock();
-	ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+	ret = BPF_PROG_RUN(event->prog, &ctx);
 	rcu_read_unlock();
 out:
 	__this_cpu_dec(bpf_prog_active);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 0db7c8a2afe2..bff9c774987a 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -195,7 +195,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
 	 * value always takes priority (ignoring the DATA).
 	 */
 	for (; f; f = f->prev) {
-		u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
+		u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
 
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
-- 
cgit v1.2.3


From c491680f8f489926eebfdf2cd006767fc8bdaa49 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 26 Nov 2016 01:28:06 +0100
Subject: bpf: reuse dev_is_mac_header_xmit for redirect

Commit dcf800344a91 ("net/sched: act_mirred: Refactor detection whether
dev needs xmit at mac header") added dev_is_mac_header_xmit(); since it's
also useful elsewhere, move it to if_arp.h and reuse it for BPF.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h | 16 ++++++++++++++++
 net/core/filter.c      | 14 ++++----------
 net/sched/act_mirred.c | 15 +--------------
 3 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index f563907ed776..3355efc89781 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -44,4 +44,20 @@ static inline int arp_hdr_len(struct net_device *dev)
 		return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2;
 	}
 }
+
+static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
+{
+	switch (dev->type) {
+	case ARPHRD_TUNNEL:
+	case ARPHRD_TUNNEL6:
+	case ARPHRD_SIT:
+	case ARPHRD_IPGRE:
+	case ARPHRD_VOID:
+	case ARPHRD_NONE:
+		return false;
+	default:
+		return true;
+	}
+}
+
 #endif	/* _LINUX_IF_ARP_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index ea315af56511..698a262b8ebb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -30,6 +30,7 @@
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/if_packet.h>
+#include <linux/if_arp.h>
 #include <linux/gfp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -1696,17 +1697,10 @@ static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
 static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
 			  u32 flags)
 {
-	switch (dev->type) {
-	case ARPHRD_TUNNEL:
-	case ARPHRD_TUNNEL6:
-	case ARPHRD_SIT:
-	case ARPHRD_IPGRE:
-	case ARPHRD_VOID:
-	case ARPHRD_NONE:
-		return __bpf_redirect_no_mac(skb, dev, flags);
-	default:
+	if (dev_is_mac_header_xmit(dev))
 		return __bpf_redirect_common(skb, dev, flags);
-	}
+	else
+		return __bpf_redirect_no_mac(skb, dev, flags);
 }
 
 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index b2d417b8f46c..1af7baa732a3 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
+#include <linux/if_arp.h>
 #include <net/net_namespace.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
@@ -73,20 +74,6 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
 static unsigned int mirred_net_id;
 static struct tc_action_ops act_mirred_ops;
 
-static bool dev_is_mac_header_xmit(const struct net_device *dev)
-{
-	switch (dev->type) {
-	case ARPHRD_TUNNEL:
-	case ARPHRD_TUNNEL6:
-	case ARPHRD_SIT:
-	case ARPHRD_IPGRE:
-	case ARPHRD_VOID:
-	case ARPHRD_NONE:
-		return false;
-	}
-	return true;
-}
-
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
 			   int bind)
-- 
cgit v1.2.3


From 72d19459d7919f966594576bb042d15a451f27ea Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 25 Nov 2016 14:27:21 +0100
Subject: HID: input: rework HID_QUIRK_MULTI_INPUT

The purpose of HID_QUIRK_MULTI_INPUT is to have an input device per
report id. This is useful when the HID device presents several HID
collections of different device types.

The current implementation of hid-input creates one input node per id per
type (input or output). This is problematic for the LEDs of a keyboard as
they are often set through an output report. The current code creates
one input node with all the keyboard keys, and one other with only the
LEDs.

To solve this, we use a two-passes way:
- first, we initialize all input nodes and associate one per report id
- then, we register all the input nodes

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 95 ++++++++++++++++++++++++++++---------------------
 include/linux/hid.h     |  1 +
 2 files changed, 55 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index fb9ace1cef8b..55db58459531 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1468,6 +1468,31 @@ static void hidinput_cleanup_hidinput(struct hid_device *hid,
 	kfree(hidinput);
 }
 
+static struct hid_input *hidinput_match(struct hid_report *report)
+{
+	struct hid_device *hid = report->device;
+	struct hid_input *hidinput;
+
+	list_for_each_entry(hidinput, &hid->inputs, list) {
+		if (hidinput->report &&
+		    hidinput->report->id == report->id)
+			return hidinput;
+	}
+
+	return NULL;
+}
+
+static inline void hidinput_configure_usages(struct hid_input *hidinput,
+					     struct hid_report *report)
+{
+	int i, j;
+
+	for (i = 0; i < report->maxfield; i++)
+		for (j = 0; j < report->field[i]->maxusage; j++)
+			hidinput_configure_usage(hidinput, report->field[i],
+						 report->field[i]->usage + j);
+}
+
 /*
  * Register the input device; print a message.
  * Configure the input layer interface
@@ -1478,8 +1503,8 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
 {
 	struct hid_driver *drv = hid->driver;
 	struct hid_report *report;
-	struct hid_input *hidinput = NULL;
-	int i, j, k;
+	struct hid_input *next, *hidinput = NULL;
+	int i, k;
 
 	INIT_LIST_HEAD(&hid->inputs);
 	INIT_WORK(&hid->led_work, hidinput_led_worker);
@@ -1509,43 +1534,40 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
 			if (!report->maxfield)
 				continue;
 
+			/*
+			 * Find the previous hidinput report attached
+			 * to this report id.
+			 */
+			if (hid->quirks & HID_QUIRK_MULTI_INPUT)
+				hidinput = hidinput_match(report);
+
 			if (!hidinput) {
 				hidinput = hidinput_allocate(hid);
 				if (!hidinput)
 					goto out_unwind;
 			}
 
-			for (i = 0; i < report->maxfield; i++)
-				for (j = 0; j < report->field[i]->maxusage; j++)
-					hidinput_configure_usage(hidinput, report->field[i],
-								 report->field[i]->usage + j);
-
-			if ((hid->quirks & HID_QUIRK_NO_EMPTY_INPUT) &&
-			    !hidinput_has_been_populated(hidinput))
-				continue;
+			hidinput_configure_usages(hidinput, report);
 
-			if (hid->quirks & HID_QUIRK_MULTI_INPUT) {
-				/* This will leave hidinput NULL, so that it
-				 * allocates another one if we have more inputs on
-				 * the same interface. Some devices (e.g. Happ's
-				 * UGCI) cram a lot of unrelated inputs into the
-				 * same interface. */
+			if (hid->quirks & HID_QUIRK_MULTI_INPUT)
 				hidinput->report = report;
-				if (drv->input_configured &&
-				    drv->input_configured(hid, hidinput))
-					goto out_cleanup;
-				if (input_register_device(hidinput->input))
-					goto out_cleanup;
-				hidinput = NULL;
-			}
 		}
 	}
 
-	if (hidinput && (hid->quirks & HID_QUIRK_NO_EMPTY_INPUT) &&
-	    !hidinput_has_been_populated(hidinput)) {
-		/* no need to register an input device not populated */
-		hidinput_cleanup_hidinput(hid, hidinput);
-		hidinput = NULL;
+	list_for_each_entry_safe(hidinput, next, &hid->inputs, list) {
+		if ((hid->quirks & HID_QUIRK_NO_EMPTY_INPUT) &&
+		    !hidinput_has_been_populated(hidinput)) {
+			/* no need to register an input device not populated */
+			hidinput_cleanup_hidinput(hid, hidinput);
+			continue;
+		}
+
+		if (drv->input_configured &&
+		    drv->input_configured(hid, hidinput))
+			goto out_unwind;
+		if (input_register_device(hidinput->input))
+			goto out_unwind;
+		hidinput->registered = true;
 	}
 
 	if (list_empty(&hid->inputs)) {
@@ -1553,20 +1575,8 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
 		goto out_unwind;
 	}
 
-	if (hidinput) {
-		if (drv->input_configured &&
-		    drv->input_configured(hid, hidinput))
-			goto out_cleanup;
-		if (input_register_device(hidinput->input))
-			goto out_cleanup;
-	}
-
 	return 0;
 
-out_cleanup:
-	list_del(&hidinput->list);
-	input_free_device(hidinput->input);
-	kfree(hidinput);
 out_unwind:
 	/* unwind the ones we already registered */
 	hidinput_disconnect(hid);
@@ -1583,7 +1593,10 @@ void hidinput_disconnect(struct hid_device *hid)
 
 	list_for_each_entry_safe(hidinput, next, &hid->inputs, list) {
 		list_del(&hidinput->list);
-		input_unregister_device(hidinput->input);
+		if (hidinput->registered)
+			input_unregister_device(hidinput->input);
+		else
+			input_free_device(hidinput->input);
 		kfree(hidinput);
 	}
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index b2ec82712baa..596b9232c19e 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -479,6 +479,7 @@ struct hid_input {
 	struct list_head list;
 	struct hid_report *report;
 	struct input_dev *input;
+	bool registered;
 };
 
 enum hid_type {
-- 
cgit v1.2.3


From 3a6a931dfb8e49a7377825b465d84e110fe89f68 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Sun, 27 Nov 2016 17:02:04 +0200
Subject: net/mlx5e: Support DCBX CEE API

Add DCBX CEE API interface for ConnectX-4. Configurations are stored in
a temporary structure and are applied to the card's firmware when
the CEE's setall callback function is called.

Note:
  priority group in CEE is equivalent to traffic class in ConnectX-4
  hardware spec.

  bw allocation per priority in CEE is not supported because ConnectX-4
  only supports bw allocation per traffic class.

  user priority in CEE does not have an equivalent term in ConnectX-4.
  Therefore, user priority to priority mapping in CEE is not supported.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  24 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 301 ++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/port.c     |  43 +++
 include/linux/mlx5/port.h                          |   4 +
 4 files changed, 370 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a2b32ed24315..31387ed9113b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -221,6 +221,26 @@ struct mlx5e_params {
 	u32 lro_timeout;
 };
 
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+struct mlx5e_cee_config {
+	/* bw pct for priority group */
+	u8                         pg_bw_pct[CEE_DCBX_MAX_PGS];
+	u8                         prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+	bool                       pfc_setting[CEE_DCBX_MAX_PRIO];
+	bool                       pfc_enable;
+};
+
+enum {
+	MLX5_DCB_CHG_RESET,
+	MLX5_DCB_NO_CHG,
+	MLX5_DCB_CHG_NO_RESET,
+};
+
+struct mlx5e_dcbx {
+	struct mlx5e_cee_config    cee_cfg; /* pending configuration */
+};
+#endif
+
 struct mlx5e_tstamp {
 	rwlock_t                   lock;
 	struct cyclecounter        cycles;
@@ -688,6 +708,10 @@ struct mlx5e_priv {
 	struct mlx5e_stats         stats;
 	struct mlx5e_tstamp        tstamp;
 	u16 q_counter;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	struct mlx5e_dcbx          dcbx;
+#endif
+
 	const struct mlx5e_profile *profile;
 	void                      *ppriv;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 762af16ed021..059524324fdb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -38,6 +38,9 @@
 #define MLX5E_100MB (100000)
 #define MLX5E_1GB   (1000000)
 
+#define MLX5E_CEE_STATE_UP    1
+#define MLX5E_CEE_STATE_DOWN  0
+
 static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
 				   struct ieee_ets *ets)
 {
@@ -222,13 +225,15 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 
 static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
 {
-	return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+	return DCB_CAP_DCBX_HOST |
+	       DCB_CAP_DCBX_VER_IEEE |
+	       DCB_CAP_DCBX_VER_CEE;
 }
 
 static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 {
 	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
-	    (mode & DCB_CAP_DCBX_VER_CEE) ||
+	    !(mode & DCB_CAP_DCBX_VER_CEE) ||
 	    !(mode & DCB_CAP_DCBX_VER_IEEE) ||
 	    !(mode & DCB_CAP_DCBX_HOST))
 		return 1;
@@ -304,6 +309,281 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev,
 	return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
 }
 
+static u8 mlx5e_dcbnl_setall(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct ieee_ets ets;
+	struct ieee_pfc pfc;
+	int err;
+	int i;
+
+	memset(&ets, 0, sizeof(ets));
+	memset(&pfc, 0, sizeof(pfc));
+
+	ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+	for (i = 0; i < CEE_DCBX_MAX_PGS; i++) {
+		ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i];
+		ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i];
+		ets.tc_tsa[i]   = IEEE_8021QAZ_TSA_ETS;
+		ets.prio_tc[i]  = cee_cfg->prio_to_pg_map[i];
+	}
+
+	err = mlx5e_dbcnl_validate_ets(netdev, &ets);
+	if (err) {
+		netdev_err(netdev,
+			   "%s, Failed to validate ETS: %d\n", __func__, err);
+		goto out;
+	}
+
+	err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+	if (err) {
+		netdev_err(netdev,
+			   "%s, Failed to set ETS: %d\n", __func__, err);
+		goto out;
+	}
+
+	/* Set PFC */
+	pfc.pfc_cap = mlx5_max_tc(mdev) + 1;
+	if (!cee_cfg->pfc_enable)
+		pfc.pfc_en = 0;
+	else
+		for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+			pfc.pfc_en |= cee_cfg->pfc_setting[i] << i;
+
+	err = mlx5e_dcbnl_ieee_setpfc(netdev, &pfc);
+	if (err) {
+		netdev_err(netdev,
+			   "%s, Failed to set PFC: %d\n", __func__, err);
+		goto out;
+	}
+out:
+	return err ? MLX5_DCB_NO_CHG : MLX5_DCB_CHG_RESET;
+}
+
+static u8 mlx5e_dcbnl_getstate(struct net_device *netdev)
+{
+	return MLX5E_CEE_STATE_UP;
+}
+
+static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
+				      u8 *perm_addr)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	if (!perm_addr)
+		return;
+
+	mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
+}
+
+static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
+				     int priority, u8 prio_type,
+				     u8 pgid, u8 bw_pct, u8 up_map)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+	if (priority >= CEE_DCBX_MAX_PRIO) {
+		netdev_err(netdev,
+			   "%s, priority is out of range\n", __func__);
+		return;
+	}
+
+	if (pgid >= CEE_DCBX_MAX_PGS) {
+		netdev_err(netdev,
+			   "%s, priority group is out of range\n", __func__);
+		return;
+	}
+
+	cee_cfg->prio_to_pg_map[priority] = pgid;
+}
+
+static void mlx5e_dcbnl_setpgbwgcfgtx(struct net_device *netdev,
+				      int pgid, u8 bw_pct)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+	if (pgid >= CEE_DCBX_MAX_PGS) {
+		netdev_err(netdev,
+			   "%s, priority group is out of range\n", __func__);
+		return;
+	}
+
+	cee_cfg->pg_bw_pct[pgid] = bw_pct;
+}
+
+static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev,
+				     int priority, u8 *prio_type,
+				     u8 *pgid, u8 *bw_pct, u8 *up_map)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (priority >= CEE_DCBX_MAX_PRIO) {
+		netdev_err(netdev,
+			   "%s, priority is out of range\n", __func__);
+		return;
+	}
+
+	*prio_type = 0;
+	*bw_pct = 0;
+	*up_map = 0;
+
+	if (mlx5_query_port_prio_tc(mdev, priority, pgid))
+		*pgid = 0;
+}
+
+static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev,
+				      int pgid, u8 *bw_pct)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (pgid >= CEE_DCBX_MAX_PGS) {
+		netdev_err(netdev,
+			   "%s, priority group is out of range\n", __func__);
+		return;
+	}
+
+	if (mlx5_query_port_tc_bw_alloc(mdev, pgid, bw_pct))
+		*bw_pct = 0;
+}
+
+static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev,
+				  int priority, u8 setting)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+	if (priority >= CEE_DCBX_MAX_PRIO) {
+		netdev_err(netdev,
+			   "%s, priority is out of range\n", __func__);
+		return;
+	}
+
+	if (setting > 1)
+		return;
+
+	cee_cfg->pfc_setting[priority] = setting;
+}
+
+static int
+mlx5e_dcbnl_get_priority_pfc(struct net_device *netdev,
+			     int priority, u8 *setting)
+{
+	struct ieee_pfc pfc;
+	int err;
+
+	err = mlx5e_dcbnl_ieee_getpfc(netdev, &pfc);
+
+	if (err)
+		*setting = 0;
+	else
+		*setting = (pfc.pfc_en >> priority) & 0x01;
+
+	return err;
+}
+
+static void mlx5e_dcbnl_getpfccfg(struct net_device *netdev,
+				  int priority, u8 *setting)
+{
+	if (priority >= CEE_DCBX_MAX_PRIO) {
+		netdev_err(netdev,
+			   "%s, priority is out of range\n", __func__);
+		return;
+	}
+
+	if (!setting)
+		return;
+
+	mlx5e_dcbnl_get_priority_pfc(netdev, priority, setting);
+}
+
+static u8 mlx5e_dcbnl_getcap(struct net_device *netdev,
+			     int capid, u8 *cap)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 rval = 0;
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PG:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_UP2TC:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_PG_TCS:
+		*cap = 1 << mlx5_max_tc(mdev);
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 1 << mlx5_max_tc(mdev);
+		break;
+	case DCB_CAP_ATTR_GSP:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_BCN:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = (DCB_CAP_DCBX_LLD_MANAGED |
+			DCB_CAP_DCBX_VER_CEE |
+			DCB_CAP_DCBX_STATIC);
+		break;
+	default:
+		*cap = 0;
+		rval = 1;
+		break;
+	}
+
+	return rval;
+}
+
+static int mlx5e_dcbnl_getnumtcs(struct net_device *netdev,
+				 int tcs_id, u8 *num)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	switch (tcs_id) {
+	case DCB_NUMTCS_ATTR_PG:
+	case DCB_NUMTCS_ATTR_PFC:
+		*num = mlx5_max_tc(mdev) + 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static u8 mlx5e_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct ieee_pfc pfc;
+
+	if (mlx5e_dcbnl_ieee_getpfc(netdev, &pfc))
+		return MLX5E_CEE_STATE_DOWN;
+
+	return pfc.pfc_en ? MLX5E_CEE_STATE_UP : MLX5E_CEE_STATE_DOWN;
+}
+
+static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+	if ((state != MLX5E_CEE_STATE_UP) && (state != MLX5E_CEE_STATE_DOWN))
+		return;
+
+	cee_cfg->pfc_enable = state;
+}
+
 const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_getets	= mlx5e_dcbnl_ieee_getets,
 	.ieee_setets	= mlx5e_dcbnl_ieee_setets,
@@ -313,4 +593,21 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_setpfc	= mlx5e_dcbnl_ieee_setpfc,
 	.getdcbx	= mlx5e_dcbnl_getdcbx,
 	.setdcbx	= mlx5e_dcbnl_setdcbx,
+
+/* CEE interfaces */
+	.setall         = mlx5e_dcbnl_setall,
+	.getstate       = mlx5e_dcbnl_getstate,
+	.getpermhwaddr  = mlx5e_dcbnl_getpermhwaddr,
+
+	.setpgtccfgtx   = mlx5e_dcbnl_setpgtccfgtx,
+	.setpgbwgcfgtx  = mlx5e_dcbnl_setpgbwgcfgtx,
+	.getpgtccfgtx   = mlx5e_dcbnl_getpgtccfgtx,
+	.getpgbwgcfgtx  = mlx5e_dcbnl_getpgbwgcfgtx,
+
+	.setpfccfg      = mlx5e_dcbnl_setpfccfg,
+	.getpfccfg      = mlx5e_dcbnl_getpfccfg,
+	.getcap         = mlx5e_dcbnl_getcap,
+	.getnumtcs      = mlx5e_dcbnl_getnumtcs,
+	.getpfcstate    = mlx5e_dcbnl_getpfcstate,
+	.setpfcstate    = mlx5e_dcbnl_setpfcstate,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index b77928f5b46e..ed4898fcadc9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -572,6 +572,28 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc);
 
+int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
+			    u8 prio, u8 *tc)
+{
+	u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+	u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+	int err;
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(qtct_reg, in, port_number, 1);
+	MLX5_SET(qtct_reg, in, prio, prio);
+
+	err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+				   sizeof(out), MLX5_REG_QTCT, 0, 0);
+	if (!err)
+		*tc = MLX5_GET(qtct_reg, out, tclass);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc);
+
 static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
 				   int inlen)
 {
@@ -625,6 +647,27 @@ int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc);
 
+int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
+				u8 tc, u8 *bw_pct)
+{
+	u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+	void *ets_tcn_conf;
+	int err;
+
+	err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+	if (err)
+		return err;
+
+	ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+				    tc_configuration[tc]);
+
+	*bw_pct = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+			   bw_allocation);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc);
+
 int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
 				    u8 *max_bw_value,
 				    u8 *max_bw_units)
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index dde8c7ec5ff1..bdee439f8cf3 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -141,8 +141,12 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx,
 int mlx5_max_tc(struct mlx5_core_dev *mdev);
 
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc);
+int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
+			    u8 prio, u8 *tc);
 int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group);
 int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw);
+int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
+				u8 tc, u8 *bw_pct);
 int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
 				    u8 *max_bw_value,
 				    u8 *max_bw_unit);
-- 
cgit v1.2.3


From 341c5ee2fb78420ffc441df36f93226be8069b0a Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Sun, 27 Nov 2016 17:02:06 +0200
Subject: net/mlx5: Add DCBX firmware commands support

Add set/query commands for DCBX_PARAM register

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 20 ++++++++++++++++++++
 include/linux/mlx5/driver.h                    |  7 +++++++
 include/linux/mlx5/port.h                      |  2 ++
 3 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index ed4898fcadc9..d2ec9d232a70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -548,6 +548,26 @@ int mlx5_max_tc(struct mlx5_core_dev *mdev)
 	return num_tc - 1;
 }
 
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out)
+{
+	u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0};
+
+	MLX5_SET(dcbx_param, in, port_number, 1);
+
+	return  mlx5_core_access_reg(mdev, in, sizeof(in), out,
+				    sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0);
+}
+
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in)
+{
+	u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+	MLX5_SET(dcbx_param, in, port_number, 1);
+
+	return mlx5_core_access_reg(mdev, in, sizeof(out), out,
+				    sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1);
+}
+
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
 {
 	u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0};
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ae1f451e8f89..68b85efc3908 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -104,6 +104,8 @@ enum {
 enum {
 	MLX5_REG_QETCR		 = 0x4005,
 	MLX5_REG_QTCT		 = 0x400a,
+	MLX5_REG_DCBX_PARAM      = 0x4020,
+	MLX5_REG_DCBX_APP        = 0x4021,
 	MLX5_REG_PCAP		 = 0x5001,
 	MLX5_REG_PMTU		 = 0x5003,
 	MLX5_REG_PTYS		 = 0x5004,
@@ -124,6 +126,11 @@ enum {
 	MLX5_REG_MPCNT		 = 0x9051,
 };
 
+enum mlx5_dcbx_oper_mode {
+	MLX5E_DCBX_PARAM_VER_OPER_HOST  = 0x0,
+	MLX5E_DCBX_PARAM_VER_OPER_AUTO  = 0x3,
+};
+
 enum {
 	MLX5_ATOMIC_OPS_CMP_SWAP	= 1 << 0,
 	MLX5_ATOMIC_OPS_FETCH_ADD	= 1 << 1,
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index bdee439f8cf3..e527732fb31b 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -162,4 +162,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
 int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
 			     u16 offset, u16 size, u8 *data);
 
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out);
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in);
 #endif /* __MLX5_PORT_H__ */
-- 
cgit v1.2.3


From 50fcbbbb79de4b95a765ea170677c9810fcb9cee Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 12 Oct 2016 10:50:37 +0800
Subject: mmc: core: expose the capability of gpio card detect

Add new helper API mmc_can_gpio_cd for slot-gpio to make
host drivers know whether it supports gpio card detect.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/slot-gpio.c  | 8 ++++++++
 include/linux/mmc/slot-gpio.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 27117ba47073..babe591aea96 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -258,6 +258,14 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 }
 EXPORT_SYMBOL(mmc_gpiod_request_cd);
 
+bool mmc_can_gpio_cd(struct mmc_host *host)
+{
+	struct mmc_gpio *ctx = host->slot.handler_priv;
+
+	return ctx->cd_gpio ? true : false;
+}
+EXPORT_SYMBOL(mmc_can_gpio_cd);
+
 /**
  * mmc_gpiod_request_ro - request a gpio descriptor for write protection
  * @host: mmc host
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index 3945a8c9d3cb..a7972cd3bc14 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -29,5 +29,6 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
 void mmc_gpio_set_cd_isr(struct mmc_host *host,
 			 irqreturn_t (*isr)(int irq, void *dev_id));
 void mmc_gpiod_request_cd_irq(struct mmc_host *host);
+bool mmc_can_gpio_cd(struct mmc_host *host);
 
 #endif
-- 
cgit v1.2.3


From 8e8b3f514c12a3b800bba8a7766c71139ad75b89 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 4 Nov 2016 11:05:19 +0100
Subject: mmc: core: use enum mmc_blk_status properly

There were several instances of code using the
enum mmc_blk_status by arbitrarily converting it to an int and
throwing it around to different functions. This makes the code
hard to understand to may give rise to strange errors.

Especially the function prototype mmc_start_req() had to be
modified to take a pointer to an enum mmc_blk_status and the
function pointer .err_check() inside struct mmc_async_req
needed to return an enum mmc_blk_status.

In every case: instead of assigning the block layer error code
to an int, use the enum, also change the signature of all
functions actually passing this enum to use the enum.

To make it possible to use the enum everywhere applicable, move
it to <linux/mmc/core.h> so that all code actually using it can
also see it.

An interesting case was encountered in the MMC test code which
did not return a enum mmc_blk_status at all in the .err_check
function supposed to check whether asynchronous requests worked
or not: instead it returned a normal -ERROR or even the test
frameworks internal error codes.

The test code would also pass on enum mmc_blk_status codes as
error codes inside the test code instead of converting them
to the local RESULT_* codes.

I have tried to fix all instances properly and run some tests
on the result.

Cc: Chunyan Zhang <zhang.chunyan@linaro.org>
Cc: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c    | 13 +++++++------
 drivers/mmc/card/mmc_test.c | 44 ++++++++++++++++++++++++++++++++++----------
 drivers/mmc/core/core.c     | 35 ++++++++++++++++++-----------------
 include/linux/mmc/card.h    | 12 ------------
 include/linux/mmc/core.h    | 15 ++++++++++++++-
 include/linux/mmc/host.h    |  2 +-
 6 files changed, 74 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 62e38bc30618..49c2a2be7bb8 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1320,8 +1320,8 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq,
 	 R1_CC_ERROR |		/* Card controller error */		\
 	 R1_ERROR)		/* General/unknown error */
 
-static int mmc_blk_err_check(struct mmc_card *card,
-			     struct mmc_async_req *areq)
+static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card,
+					     struct mmc_async_req *areq)
 {
 	struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req,
 						    mmc_active);
@@ -1433,14 +1433,15 @@ static int mmc_blk_err_check(struct mmc_card *card,
 	return MMC_BLK_SUCCESS;
 }
 
-static int mmc_blk_packed_err_check(struct mmc_card *card,
-				    struct mmc_async_req *areq)
+static enum mmc_blk_status mmc_blk_packed_err_check(struct mmc_card *card,
+						    struct mmc_async_req *areq)
 {
 	struct mmc_queue_req *mq_rq = container_of(areq, struct mmc_queue_req,
 			mmc_active);
 	struct request *req = mq_rq->req;
 	struct mmc_packed *packed = mq_rq->packed;
-	int err, check, status;
+	enum mmc_blk_status status, check;
+	int err;
 	u8 *ext_csd;
 
 	packed->retries--;
@@ -1995,7 +1996,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			areq = &mq->mqrq_cur->mmc_active;
 		} else
 			areq = NULL;
-		areq = mmc_start_req(card->host, areq, (int *) &status);
+		areq = mmc_start_req(card->host, areq, &status);
 		if (!areq) {
 			if (status == MMC_BLK_NEW_REQUEST)
 				mq->flags |= MMC_QUEUE_NEW_REQUEST;
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index 3678220964fe..f61c812995ae 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -736,15 +736,28 @@ static int mmc_test_check_result(struct mmc_test_card *test,
 	return ret;
 }
 
-static int mmc_test_check_result_async(struct mmc_card *card,
+static enum mmc_blk_status mmc_test_check_result_async(struct mmc_card *card,
 				       struct mmc_async_req *areq)
 {
 	struct mmc_test_async_req *test_async =
 		container_of(areq, struct mmc_test_async_req, areq);
+	int ret;
 
 	mmc_test_wait_busy(test_async->test);
 
-	return mmc_test_check_result(test_async->test, areq->mrq);
+	/*
+	 * FIXME: this would earlier just casts a regular error code,
+	 * either of the kernel type -ERRORCODE or the local test framework
+	 * RESULT_* errorcode, into an enum mmc_blk_status and return as
+	 * result check. Instead, convert it to some reasonable type by just
+	 * returning either MMC_BLK_SUCCESS or MMC_BLK_CMD_ERR.
+	 * If possible, a reasonable error code should be returned.
+	 */
+	ret = mmc_test_check_result(test_async->test, areq->mrq);
+	if (ret)
+		return MMC_BLK_CMD_ERR;
+
+	return MMC_BLK_SUCCESS;
 }
 
 /*
@@ -817,6 +830,7 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
 	struct mmc_async_req *done_areq;
 	struct mmc_async_req *cur_areq = &test_areq[0].areq;
 	struct mmc_async_req *other_areq = &test_areq[1].areq;
+	enum mmc_blk_status status;
 	int i;
 	int ret;
 
@@ -834,10 +848,12 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
 	for (i = 0; i < count; i++) {
 		mmc_test_prepare_mrq(test, cur_areq->mrq, sg, sg_len, dev_addr,
 				     blocks, blksz, write);
-		done_areq = mmc_start_req(test->card->host, cur_areq, &ret);
+		done_areq = mmc_start_req(test->card->host, cur_areq, &status);
 
-		if (ret || (!done_areq && i > 0))
+		if (status != MMC_BLK_SUCCESS || (!done_areq && i > 0)) {
+			ret = RESULT_FAIL;
 			goto err;
+		}
 
 		if (done_areq) {
 			if (done_areq->mrq == &mrq2)
@@ -851,7 +867,9 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
 		dev_addr += blocks;
 	}
 
-	done_areq = mmc_start_req(test->card->host, NULL, &ret);
+	done_areq = mmc_start_req(test->card->host, NULL, &status);
+	if (status != MMC_BLK_SUCCESS)
+		ret = RESULT_FAIL;
 
 	return ret;
 err:
@@ -2351,6 +2369,7 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test,
 	struct mmc_request *mrq;
 	unsigned long timeout;
 	bool expired = false;
+	enum mmc_blk_status blkstat = MMC_BLK_SUCCESS;
 	int ret = 0, cmd_ret;
 	u32 status = 0;
 	int count = 0;
@@ -2378,9 +2397,11 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test,
 
 	/* Start ongoing data request */
 	if (use_areq) {
-		mmc_start_req(host, &test_areq.areq, &ret);
-		if (ret)
+		mmc_start_req(host, &test_areq.areq, &blkstat);
+		if (blkstat != MMC_BLK_SUCCESS) {
+			ret = RESULT_FAIL;
 			goto out_free;
+		}
 	} else {
 		mmc_wait_for_req(host, mrq);
 	}
@@ -2413,10 +2434,13 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test,
 	} while (repeat_cmd && R1_CURRENT_STATE(status) != R1_STATE_TRAN);
 
 	/* Wait for data request to complete */
-	if (use_areq)
-		mmc_start_req(host, NULL, &ret);
-	else
+	if (use_areq) {
+		mmc_start_req(host, NULL, &blkstat);
+		if (blkstat != MMC_BLK_SUCCESS)
+			ret = RESULT_FAIL;
+	} else {
 		mmc_wait_for_req_done(test->card->host, mrq);
+	}
 
 	/*
 	 * For cap_cmd_during_tfr request, upper layer must send stop if
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 2ad729138bee..50bb9a16380d 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -497,13 +497,13 @@ static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
  *
  * Returns enum mmc_blk_status after checking errors.
  */
-static int mmc_wait_for_data_req_done(struct mmc_host *host,
+static enum mmc_blk_status mmc_wait_for_data_req_done(struct mmc_host *host,
 				      struct mmc_request *mrq,
 				      struct mmc_async_req *next_req)
 {
 	struct mmc_command *cmd;
 	struct mmc_context_info *context_info = &host->context_info;
-	int err;
+	enum mmc_blk_status status;
 	unsigned long flags;
 
 	while (1) {
@@ -520,9 +520,9 @@ static int mmc_wait_for_data_req_done(struct mmc_host *host,
 
 			if (!cmd->error || !cmd->retries ||
 			    mmc_card_removed(host->card)) {
-				err = host->areq->err_check(host->card,
-							    host->areq);
-				break; /* return err */
+				status = host->areq->err_check(host->card,
+							       host->areq);
+				break; /* return status */
 			} else {
 				mmc_retune_recheck(host);
 				pr_info("%s: req failed (CMD%u): %d, retrying...\n",
@@ -540,7 +540,7 @@ static int mmc_wait_for_data_req_done(struct mmc_host *host,
 		}
 	}
 	mmc_retune_release(host);
-	return err;
+	return status;
 }
 
 void mmc_wait_for_req_done(struct mmc_host *host, struct mmc_request *mrq)
@@ -658,9 +658,10 @@ static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
  *	is returned without waiting. NULL is not an error condition.
  */
 struct mmc_async_req *mmc_start_req(struct mmc_host *host,
-				    struct mmc_async_req *areq, int *error)
+				    struct mmc_async_req *areq,
+				    enum mmc_blk_status *ret_stat)
 {
-	int err = 0;
+	enum mmc_blk_status status = MMC_BLK_SUCCESS;
 	int start_err = 0;
 	struct mmc_async_req *data = host->areq;
 
@@ -669,10 +670,10 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
 		mmc_pre_req(host, areq->mrq, !host->areq);
 
 	if (host->areq) {
-		err = mmc_wait_for_data_req_done(host, host->areq->mrq,	areq);
-		if (err == MMC_BLK_NEW_REQUEST) {
-			if (error)
-				*error = err;
+		status = mmc_wait_for_data_req_done(host, host->areq->mrq, areq);
+		if (status == MMC_BLK_NEW_REQUEST) {
+			if (ret_stat)
+				*ret_stat = status;
 			/*
 			 * The previous request was not completed,
 			 * nothing to return
@@ -699,23 +700,23 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
 		}
 	}
 
-	if (!err && areq)
+	if (status == MMC_BLK_SUCCESS && areq)
 		start_err = __mmc_start_data_req(host, areq->mrq);
 
 	if (host->areq)
 		mmc_post_req(host, host->areq->mrq, 0);
 
 	 /* Cancel a prepared request if it was not started. */
-	if ((err || start_err) && areq)
+	if ((status != MMC_BLK_SUCCESS || start_err) && areq)
 		mmc_post_req(host, areq->mrq, -EINVAL);
 
-	if (err)
+	if (status != MMC_BLK_SUCCESS)
 		host->areq = NULL;
 	else
 		host->areq = areq;
 
-	if (error)
-		*error = err;
+	if (ret_stat)
+		*ret_stat = status;
 	return data;
 }
 EXPORT_SYMBOL(mmc_start_req);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 73fad83acbcb..e49a3ff9d0e0 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -207,18 +207,6 @@ struct sdio_func_tuple;
 
 #define SDIO_MAX_FUNCS		7
 
-enum mmc_blk_status {
-	MMC_BLK_SUCCESS = 0,
-	MMC_BLK_PARTIAL,
-	MMC_BLK_CMD_ERR,
-	MMC_BLK_RETRY,
-	MMC_BLK_ABORT,
-	MMC_BLK_DATA_ERR,
-	MMC_BLK_ECC_ERR,
-	MMC_BLK_NOMEDIUM,
-	MMC_BLK_NEW_REQUEST,
-};
-
 /* The number of MMC physical partitions.  These consist of:
  * boot partitions (2), general purpose partitions (4) and
  * RPMB partition (1) in MMC v4.4.
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 2b953eb8ceae..0ce928b3ce90 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -15,6 +15,18 @@ struct request;
 struct mmc_data;
 struct mmc_request;
 
+enum mmc_blk_status {
+	MMC_BLK_SUCCESS = 0,
+	MMC_BLK_PARTIAL,
+	MMC_BLK_CMD_ERR,
+	MMC_BLK_RETRY,
+	MMC_BLK_ABORT,
+	MMC_BLK_DATA_ERR,
+	MMC_BLK_ECC_ERR,
+	MMC_BLK_NOMEDIUM,
+	MMC_BLK_NEW_REQUEST,
+};
+
 struct mmc_command {
 	u32			opcode;
 	u32			arg;
@@ -150,7 +162,8 @@ struct mmc_async_req;
 extern int mmc_stop_bkops(struct mmc_card *);
 extern int mmc_read_bkops_status(struct mmc_card *);
 extern struct mmc_async_req *mmc_start_req(struct mmc_host *,
-					   struct mmc_async_req *, int *);
+					   struct mmc_async_req *,
+					   enum mmc_blk_status *);
 extern int mmc_interrupt_hpi(struct mmc_card *);
 extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern void mmc_wait_for_req_done(struct mmc_host *host,
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0b2439441cc8..5310f94be0ab 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -173,7 +173,7 @@ struct mmc_async_req {
 	 * Check error status of completed mmc request.
 	 * Returns 0 if success otherwise non zero.
 	 */
-	int (*err_check) (struct mmc_card *, struct mmc_async_req *);
+	enum mmc_blk_status (*err_check)(struct mmc_card *, struct mmc_async_req *);
 };
 
 /**
-- 
cgit v1.2.3


From 8185e51f358a8dd4801b67e8c66f03eb9eeaba75 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Mon, 12 Sep 2016 10:15:06 -0400
Subject: mmc: tmio-mmc: add support for 32bit data port

For the r7s72100 SOC, the DATA_PORT register was changed to 32-bits wide.
Therefore a new flag has been created that will allow 32-bit reads/writes
to the DATA_PORT register instead of 16-bit (because 16-bits accesses are
not supported).

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc.h     | 12 ++++++++++++
 drivers/mmc/host/tmio_mmc_pio.c | 30 ++++++++++++++++++++++++++++++
 include/linux/mfd/tmio.h        |  5 +++++
 3 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 8e126afd988c..839755c7f2b0 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -245,6 +245,12 @@ static inline u32 sd_ctrl_read16_and_16_as_32(struct tmio_mmc_host *host, int ad
 	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
 }
 
+static inline void sd_ctrl_read32_rep(struct tmio_mmc_host *host, int addr,
+		u32 *buf, int count)
+{
+	readsl(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
 static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val)
 {
 	/* If there is a hook and it returns non-zero then there
@@ -267,4 +273,10 @@ static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host, int
 	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
 }
 
+static inline void sd_ctrl_write32_rep(struct tmio_mmc_host *host, int addr,
+		const u32 *buf, int count)
+{
+	writesl(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
 #endif
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 700567603107..8b75a9bc1f3d 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -393,6 +393,36 @@ static void tmio_mmc_transfer_data(struct tmio_mmc_host *host,
 	/*
 	 * Transfer the data
 	 */
+	if (host->pdata->flags & TMIO_MMC_32BIT_DATA_PORT) {
+		u8 data[4] = { };
+
+		if (is_read)
+			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT, (u32 *)buf,
+					   count >> 2);
+		else
+			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT, (u32 *)buf,
+					    count >> 2);
+
+		/* if count was multiple of 4 */
+		if (!(count & 0x3))
+			return;
+
+		buf8 = (u8 *)(buf + (count >> 2));
+		count %= 4;
+
+		if (is_read) {
+			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT,
+					   (u32 *)data, 1);
+			memcpy(buf8, data, count);
+		} else {
+			memcpy(data, buf8, count);
+			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT,
+					    (u32 *)data, 1);
+		}
+
+		return;
+	}
+
 	if (is_read)
 		sd_ctrl_read16_rep(host, CTL_SD_DATA_PORT, buf, count >> 1);
 	else
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 7a26286db895..fba44abd05ba 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -99,6 +99,11 @@
  */
 #define TMIO_MMC_SDIO_STATUS_QUIRK	(1 << 8)
 
+/*
+ * Some controllers have a 32-bit wide data port register
+ */
+#define TMIO_MMC_32BIT_DATA_PORT	(1 << 9)
+
 /*
  * Some controllers allows to set SDx actual clock
  */
-- 
cgit v1.2.3


From c820af5f18ec248b3cb61a9a9ce47ef0f2e9ec63 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Thu, 3 Nov 2016 15:15:59 +0100
Subject: mmc: core: Add helper to see if a host can be retuned

This is in preparation for restoring saved tuning parameters
when resuming the TMIO driver.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 5310f94be0ab..68639295148d 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -546,6 +546,11 @@ static inline void mmc_retune_recheck(struct mmc_host *host)
 		host->retune_now = 1;
 }
 
+static inline bool mmc_can_retune(struct mmc_host *host)
+{
+	return host->can_retune == 1;
+}
+
 void mmc_retune_pause(struct mmc_host *host);
 void mmc_retune_unpause(struct mmc_host *host);
 
-- 
cgit v1.2.3


From a4cc7eb4416fda59f18e744925ba3a347f7ecac5 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Thu, 17 Nov 2016 16:40:38 +0900
Subject: mmc: dw_mmc: use the cookie's enum values for post/pre_req()

This patch removed the meaningless value. Instead, use the cookie's enum
values for executing correctly.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 39 +++++++++++++++++++--------------------
 include/linux/mmc/dw_mmc.h |  6 ++++++
 2 files changed, 25 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 0b220e5a8169..65546b6df840 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -414,12 +414,13 @@ static void dw_mci_dma_cleanup(struct dw_mci *host)
 {
 	struct mmc_data *data = host->data;
 
-	if (data)
-		if (!data->host_cookie)
-			dma_unmap_sg(host->dev,
-				     data->sg,
-				     data->sg_len,
-				     dw_mci_get_dma_dir(data));
+	if (data && data->host_cookie == COOKIE_MAPPED) {
+		dma_unmap_sg(host->dev,
+			     data->sg,
+			     data->sg_len,
+			     dw_mci_get_dma_dir(data));
+		data->host_cookie = COOKIE_UNMAPPED;
+	}
 }
 
 static void dw_mci_idmac_reset(struct dw_mci *host)
@@ -850,13 +851,13 @@ static const struct dw_mci_dma_ops dw_mci_edmac_ops = {
 
 static int dw_mci_pre_dma_transfer(struct dw_mci *host,
 				   struct mmc_data *data,
-				   bool next)
+				   int cookie)
 {
 	struct scatterlist *sg;
 	unsigned int i, sg_len;
 
-	if (!next && data->host_cookie)
-		return data->host_cookie;
+	if (data->host_cookie == COOKIE_PRE_MAPPED)
+		return data->sg_len;
 
 	/*
 	 * We don't do DMA on "complex" transfers, i.e. with
@@ -881,8 +882,7 @@ static int dw_mci_pre_dma_transfer(struct dw_mci *host,
 	if (sg_len == 0)
 		return -EINVAL;
 
-	if (next)
-		data->host_cookie = sg_len;
+	data->host_cookie = cookie;
 
 	return sg_len;
 }
@@ -897,13 +897,12 @@ static void dw_mci_pre_req(struct mmc_host *mmc,
 	if (!slot->host->use_dma || !data)
 		return;
 
-	if (data->host_cookie) {
-		data->host_cookie = 0;
-		return;
-	}
+	/* This data might be unmapped at this time */
+	data->host_cookie = COOKIE_UNMAPPED;
 
-	if (dw_mci_pre_dma_transfer(slot->host, mrq->data, 1) < 0)
-		data->host_cookie = 0;
+	if (dw_mci_pre_dma_transfer(slot->host, mrq->data,
+				COOKIE_PRE_MAPPED) < 0)
+		data->host_cookie = COOKIE_UNMAPPED;
 }
 
 static void dw_mci_post_req(struct mmc_host *mmc,
@@ -916,12 +915,12 @@ static void dw_mci_post_req(struct mmc_host *mmc,
 	if (!slot->host->use_dma || !data)
 		return;
 
-	if (data->host_cookie)
+	if (data->host_cookie != COOKIE_UNMAPPED)
 		dma_unmap_sg(slot->host->dev,
 			     data->sg,
 			     data->sg_len,
 			     dw_mci_get_dma_dir(data));
-	data->host_cookie = 0;
+	data->host_cookie = COOKIE_UNMAPPED;
 }
 
 static void dw_mci_adjust_fifoth(struct dw_mci *host, struct mmc_data *data)
@@ -1027,7 +1026,7 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
 	if (!host->use_dma)
 		return -ENODEV;
 
-	sg_len = dw_mci_pre_dma_transfer(host, data, 0);
+	sg_len = dw_mci_pre_dma_transfer(host, data, COOKIE_MAPPED);
 	if (sg_len < 0) {
 		host->dma_ops->stop(host);
 		return sg_len;
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index f5af2bd35e7f..15db6f83f53f 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -39,6 +39,12 @@ enum {
 	EVENT_DATA_ERROR,
 };
 
+enum dw_mci_cookie {
+	COOKIE_UNMAPPED,
+	COOKIE_PRE_MAPPED,	/* mapped by pre_req() of dwmmc */
+	COOKIE_MAPPED,		/* mapped by prepare_data() of dwmmc */
+};
+
 struct mmc_data;
 
 enum {
-- 
cgit v1.2.3


From d3c6aac3bdfe97b8b44db6a8aba59786cb9531dc Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 23 Nov 2016 11:02:24 +0100
Subject: mmc: delete is_first_req parameter from pre-request callback

The void (*pre_req) callback in the struct mmc_host_ops vtable
is passing an argument "is_first_req" indicating whether this is
the first request or not.

None of the in-kernel users use this parameter: instead, since
they all just do variants of dma_map* they use the DMA cookie
to indicate whether a pre* callback has already been done for
a request when they decide how to handle it.

Delete the parameter from the callback and all users, as it is
just pointless cruft.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c           | 11 ++++-------
 drivers/mmc/host/dw_mmc.c         |  3 +--
 drivers/mmc/host/jz4740_mmc.c     |  3 +--
 drivers/mmc/host/mmci.c           |  3 +--
 drivers/mmc/host/mtk-sd.c         |  3 +--
 drivers/mmc/host/omap_hsmmc.c     |  3 +--
 drivers/mmc/host/rtsx_pci_sdmmc.c |  3 +--
 drivers/mmc/host/sdhci.c          |  3 +--
 include/linux/mmc/host.h          |  3 +--
 9 files changed, 12 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 060f76742550..f39397f7c8dc 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -611,18 +611,15 @@ EXPORT_SYMBOL(mmc_is_req_done);
  *	mmc_pre_req - Prepare for a new request
  *	@host: MMC host to prepare command
  *	@mrq: MMC request to prepare for
- *	@is_first_req: true if there is no previous started request
- *                     that may run in parellel to this call, otherwise false
  *
  *	mmc_pre_req() is called in prior to mmc_start_req() to let
  *	host prepare for the new request. Preparation of a request may be
  *	performed while another request is running on the host.
  */
-static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
-		 bool is_first_req)
+static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq)
 {
 	if (host->ops->pre_req)
-		host->ops->pre_req(host, mrq, is_first_req);
+		host->ops->pre_req(host, mrq);
 }
 
 /**
@@ -667,7 +664,7 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
 
 	/* Prepare a new request */
 	if (areq)
-		mmc_pre_req(host, areq->mrq, !host->areq);
+		mmc_pre_req(host, areq->mrq);
 
 	if (host->areq) {
 		status = mmc_wait_for_data_req_done(host, host->areq->mrq, areq);
@@ -696,7 +693,7 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
 
 			/* prepare the request again */
 			if (areq)
-				mmc_pre_req(host, areq->mrq, !host->areq);
+				mmc_pre_req(host, areq->mrq);
 		}
 	}
 
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 881ca3e37da4..d400afc74719 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -886,8 +886,7 @@ static int dw_mci_pre_dma_transfer(struct dw_mci *host,
 }
 
 static void dw_mci_pre_req(struct mmc_host *mmc,
-			   struct mmc_request *mrq,
-			   bool is_first_req)
+			   struct mmc_request *mrq)
 {
 	struct dw_mci_slot *slot = mmc_priv(mmc);
 	struct mmc_data *data = mrq->data;
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 684087db170b..819ad32964fc 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -320,8 +320,7 @@ dma_unmap:
 }
 
 static void jz4740_mmc_pre_request(struct mmc_host *mmc,
-				   struct mmc_request *mrq,
-				   bool is_first_req)
+				   struct mmc_request *mrq)
 {
 	struct jz4740_mmc_host *host = mmc_priv(mmc);
 	struct mmc_data *data = mrq->data;
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 6af3ee7b452a..01a804792f30 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -699,8 +699,7 @@ static void mmci_get_next_data(struct mmci_host *host, struct mmc_data *data)
 	next->dma_chan = NULL;
 }
 
-static void mmci_pre_request(struct mmc_host *mmc, struct mmc_request *mrq,
-			     bool is_first_req)
+static void mmci_pre_request(struct mmc_host *mmc, struct mmc_request *mrq)
 {
 	struct mmci_host *host = mmc_priv(mmc);
 	struct mmc_data *data = mrq->data;
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 86af0b199a54..10ef2ae1d2f6 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -927,8 +927,7 @@ static void msdc_ops_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		msdc_start_command(host, mrq, mrq->cmd);
 }
 
-static void msdc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
-		bool is_first_req)
+static void msdc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq)
 {
 	struct msdc_host *host = mmc_priv(mmc);
 	struct mmc_data *data = mrq->data;
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 5f2f24a7360d..ad11c4cc12ed 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1565,8 +1565,7 @@ static void omap_hsmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
 	}
 }
 
-static void omap_hsmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
-			       bool is_first_req)
+static void omap_hsmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 3ccaa1415f33..ecb99a8d2fa2 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -190,8 +190,7 @@ static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host,
 	return using_cookie;
 }
 
-static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
-		bool is_first_req)
+static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq)
 {
 	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
 	struct mmc_data *data = mrq->data;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 62aedf191b0c..7f2526c9572f 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2202,8 +2202,7 @@ static void sdhci_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
 	data->host_cookie = COOKIE_UNMAPPED;
 }
 
-static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
-			       bool is_first_req)
+static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 68639295148d..2a6418d0c343 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -93,8 +93,7 @@ struct mmc_host_ops {
 	 */
 	void	(*post_req)(struct mmc_host *host, struct mmc_request *req,
 			    int err);
-	void	(*pre_req)(struct mmc_host *host, struct mmc_request *req,
-			   bool is_first_req);
+	void	(*pre_req)(struct mmc_host *host, struct mmc_request *req);
 	void	(*request)(struct mmc_host *host, struct mmc_request *req);
 
 	/*
-- 
cgit v1.2.3


From 03d640ae1f9b24b1d2a11f747143a1ecc0745019 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 25 Nov 2016 10:35:00 +0100
Subject: mmc: block: delete packed command support

I've had it with this code now.

The packed command support is a complex hurdle in the MMC/SD block
layer, around 500+ lines of code which was introduced in 2013 in

commit ce39f9d17c14 ("mmc: support packed write command for eMMC4.5
devices")
commit abd9ac144947 ("mmc: add packed command feature of eMMC4.5")

...and since then it has been rotting. The original author of the
code has disappeared from the community and the mail address is
bouncing.

For the code to be exercised the host must flag that it supports
packed commands, so in mmc_blk_prep_packed_list() which is called for
every single request, the following construction appears:

u8 max_packed_rw = 0;

if ((rq_data_dir(cur) == WRITE) &&
    mmc_host_packed_wr(card->host))
        max_packed_rw = card->ext_csd.max_packed_writes;

if (max_packed_rw == 0)
    goto no_packed;

This has the following logical deductions:

- Only WRITE commands can really be packed, so the solution is
  only half-done: we support packed WRITE but not packed READ.
  The packed command support has not been finalized by supporting
  reads in three years!

- mmc_host_packed_wr() is just a static inline that checks
  host->caps2 & MMC_CAP2_PACKED_WR. The problem with this is
  that NO upstream host sets this capability flag! No driver
  in the kernel is using it, and we can't test it. Packed
  command may be supported in out-of-tree code, but I doubt
  it. I doubt that the code is even working anymore due to
  other refactorings in the MMC block layer, who would
  notice if patches affecting it broke packed commands?
  No one.

- There is no Device Tree binding or code to mark a host as
  supporting packed read or write commands, just this flag
  in caps2, so for sure there are not any DT systems using
  it either.

It has other problems as well: mmc_blk_prep_packed_list() is
speculatively picking requests out of the request queue with
blk_fetch_request() making the MMC/SD stack harder to convert
to the multiqueue block layer. By this we get rid of an
obstacle.

The way I see it this is just cruft littering the MMC/SD
stack.

Cc: Namjae Jeon <namjae.jeon@samsung.com>
Cc: Maya Erez <qca_merez@qca.qualcomm.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c | 483 ++---------------------------------------------
 drivers/mmc/card/queue.c |  53 +-----
 drivers/mmc/card/queue.h |  19 --
 include/linux/mmc/host.h |   5 -
 4 files changed, 20 insertions(+), 540 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 6618126fcb9f..86ff28f84698 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -66,9 +66,6 @@ MODULE_ALIAS("mmc:block");
 
 #define mmc_req_rel_wr(req)	((req->cmd_flags & REQ_FUA) && \
 				  (rq_data_dir(req) == WRITE))
-#define PACKED_CMD_VER	0x01
-#define PACKED_CMD_WR	0x02
-
 static DEFINE_MUTEX(block_mutex);
 
 /*
@@ -102,7 +99,6 @@ struct mmc_blk_data {
 	unsigned int	flags;
 #define MMC_BLK_CMD23	(1 << 0)	/* Can do SET_BLOCK_COUNT for multiblock */
 #define MMC_BLK_REL_WR	(1 << 1)	/* MMC Reliable write support */
-#define MMC_BLK_PACKED_CMD	(1 << 2)	/* MMC packed command support */
 
 	unsigned int	usage;
 	unsigned int	read_only;
@@ -126,12 +122,6 @@ struct mmc_blk_data {
 
 static DEFINE_MUTEX(open_lock);
 
-enum {
-	MMC_PACKED_NR_IDX = -1,
-	MMC_PACKED_NR_ZERO,
-	MMC_PACKED_NR_SINGLE,
-};
-
 module_param(perdev_minors, int, 0444);
 MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device");
 
@@ -139,17 +129,6 @@ static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md);
 static int get_card_status(struct mmc_card *card, u32 *status, int retries);
 
-static inline void mmc_blk_clear_packed(struct mmc_queue_req *mqrq)
-{
-	struct mmc_packed *packed = mqrq->packed;
-
-	mqrq->cmd_type = MMC_PACKED_NONE;
-	packed->nr_entries = MMC_PACKED_NR_ZERO;
-	packed->idx_failure = MMC_PACKED_NR_IDX;
-	packed->retries = 0;
-	packed->blocks = 0;
-}
-
 static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk)
 {
 	struct mmc_blk_data *md;
@@ -1420,111 +1399,12 @@ static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card,
 	if (!brq->data.bytes_xfered)
 		return MMC_BLK_RETRY;
 
-	if (mmc_packed_cmd(mq_mrq->cmd_type)) {
-		if (unlikely(brq->data.blocks << 9 != brq->data.bytes_xfered))
-			return MMC_BLK_PARTIAL;
-		else
-			return MMC_BLK_SUCCESS;
-	}
-
 	if (blk_rq_bytes(req) != brq->data.bytes_xfered)
 		return MMC_BLK_PARTIAL;
 
 	return MMC_BLK_SUCCESS;
 }
 
-static int mmc_packed_init(struct mmc_queue *mq, struct mmc_card *card)
-{
-	struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
-	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
-	int ret = 0;
-
-
-	mqrq_cur->packed = kzalloc(sizeof(struct mmc_packed), GFP_KERNEL);
-	if (!mqrq_cur->packed) {
-		pr_warn("%s: unable to allocate packed cmd for mqrq_cur\n",
-			mmc_card_name(card));
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	mqrq_prev->packed = kzalloc(sizeof(struct mmc_packed), GFP_KERNEL);
-	if (!mqrq_prev->packed) {
-		pr_warn("%s: unable to allocate packed cmd for mqrq_prev\n",
-			mmc_card_name(card));
-		kfree(mqrq_cur->packed);
-		mqrq_cur->packed = NULL;
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	INIT_LIST_HEAD(&mqrq_cur->packed->list);
-	INIT_LIST_HEAD(&mqrq_prev->packed->list);
-
-out:
-	return ret;
-}
-
-static void mmc_packed_clean(struct mmc_queue *mq)
-{
-	struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
-	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
-
-	kfree(mqrq_cur->packed);
-	mqrq_cur->packed = NULL;
-	kfree(mqrq_prev->packed);
-	mqrq_prev->packed = NULL;
-}
-
-static enum mmc_blk_status mmc_blk_packed_err_check(struct mmc_card *card,
-						    struct mmc_async_req *areq)
-{
-	struct mmc_queue_req *mq_rq = container_of(areq, struct mmc_queue_req,
-			mmc_active);
-	struct request *req = mq_rq->req;
-	struct mmc_packed *packed = mq_rq->packed;
-	enum mmc_blk_status status, check;
-	int err;
-	u8 *ext_csd;
-
-	packed->retries--;
-	check = mmc_blk_err_check(card, areq);
-	err = get_card_status(card, &status, 0);
-	if (err) {
-		pr_err("%s: error %d sending status command\n",
-		       req->rq_disk->disk_name, err);
-		return MMC_BLK_ABORT;
-	}
-
-	if (status & R1_EXCEPTION_EVENT) {
-		err = mmc_get_ext_csd(card, &ext_csd);
-		if (err) {
-			pr_err("%s: error %d sending ext_csd\n",
-			       req->rq_disk->disk_name, err);
-			return MMC_BLK_ABORT;
-		}
-
-		if ((ext_csd[EXT_CSD_EXP_EVENTS_STATUS] &
-		     EXT_CSD_PACKED_FAILURE) &&
-		    (ext_csd[EXT_CSD_PACKED_CMD_STATUS] &
-		     EXT_CSD_PACKED_GENERIC_ERROR)) {
-			if (ext_csd[EXT_CSD_PACKED_CMD_STATUS] &
-			    EXT_CSD_PACKED_INDEXED_ERROR) {
-				packed->idx_failure =
-				  ext_csd[EXT_CSD_PACKED_FAILURE_INDEX] - 1;
-				check = MMC_BLK_PARTIAL;
-			}
-			pr_err("%s: packed cmd failed, nr %u, sectors %u, "
-			       "failure index: %d\n",
-			       req->rq_disk->disk_name, packed->nr_entries,
-			       packed->blocks, packed->idx_failure);
-		}
-		kfree(ext_csd);
-	}
-
-	return check;
-}
-
 static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 			       struct mmc_card *card,
 			       int disable_multi,
@@ -1685,222 +1565,6 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 	mmc_queue_bounce_pre(mqrq);
 }
 
-static inline u8 mmc_calc_packed_hdr_segs(struct request_queue *q,
-					  struct mmc_card *card)
-{
-	unsigned int hdr_sz = mmc_large_sector(card) ? 4096 : 512;
-	unsigned int max_seg_sz = queue_max_segment_size(q);
-	unsigned int len, nr_segs = 0;
-
-	do {
-		len = min(hdr_sz, max_seg_sz);
-		hdr_sz -= len;
-		nr_segs++;
-	} while (hdr_sz);
-
-	return nr_segs;
-}
-
-static u8 mmc_blk_prep_packed_list(struct mmc_queue *mq, struct request *req)
-{
-	struct request_queue *q = mq->queue;
-	struct mmc_card *card = mq->card;
-	struct request *cur = req, *next = NULL;
-	struct mmc_blk_data *md = mq->blkdata;
-	struct mmc_queue_req *mqrq = mq->mqrq_cur;
-	bool en_rel_wr = card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN;
-	unsigned int req_sectors = 0, phys_segments = 0;
-	unsigned int max_blk_count, max_phys_segs;
-	bool put_back = true;
-	u8 max_packed_rw = 0;
-	u8 reqs = 0;
-
-	/*
-	 * We don't need to check packed for any further
-	 * operation of packed stuff as we set MMC_PACKED_NONE
-	 * and return zero for reqs if geting null packed. Also
-	 * we clean the flag of MMC_BLK_PACKED_CMD to avoid doing
-	 * it again when removing blk req.
-	 */
-	if (!mqrq->packed) {
-		md->flags &= (~MMC_BLK_PACKED_CMD);
-		goto no_packed;
-	}
-
-	if (!(md->flags & MMC_BLK_PACKED_CMD))
-		goto no_packed;
-
-	if ((rq_data_dir(cur) == WRITE) &&
-	    mmc_host_packed_wr(card->host))
-		max_packed_rw = card->ext_csd.max_packed_writes;
-
-	if (max_packed_rw == 0)
-		goto no_packed;
-
-	if (mmc_req_rel_wr(cur) &&
-	    (md->flags & MMC_BLK_REL_WR) && !en_rel_wr)
-		goto no_packed;
-
-	if (mmc_large_sector(card) &&
-	    !IS_ALIGNED(blk_rq_sectors(cur), 8))
-		goto no_packed;
-
-	mmc_blk_clear_packed(mqrq);
-
-	max_blk_count = min(card->host->max_blk_count,
-			    card->host->max_req_size >> 9);
-	if (unlikely(max_blk_count > 0xffff))
-		max_blk_count = 0xffff;
-
-	max_phys_segs = queue_max_segments(q);
-	req_sectors += blk_rq_sectors(cur);
-	phys_segments += cur->nr_phys_segments;
-
-	if (rq_data_dir(cur) == WRITE) {
-		req_sectors += mmc_large_sector(card) ? 8 : 1;
-		phys_segments += mmc_calc_packed_hdr_segs(q, card);
-	}
-
-	do {
-		if (reqs >= max_packed_rw - 1) {
-			put_back = false;
-			break;
-		}
-
-		spin_lock_irq(q->queue_lock);
-		next = blk_fetch_request(q);
-		spin_unlock_irq(q->queue_lock);
-		if (!next) {
-			put_back = false;
-			break;
-		}
-
-		if (mmc_large_sector(card) &&
-		    !IS_ALIGNED(blk_rq_sectors(next), 8))
-			break;
-
-		if (mmc_req_is_special(next))
-			break;
-
-		if (rq_data_dir(cur) != rq_data_dir(next))
-			break;
-
-		if (mmc_req_rel_wr(next) &&
-		    (md->flags & MMC_BLK_REL_WR) && !en_rel_wr)
-			break;
-
-		req_sectors += blk_rq_sectors(next);
-		if (req_sectors > max_blk_count)
-			break;
-
-		phys_segments +=  next->nr_phys_segments;
-		if (phys_segments > max_phys_segs)
-			break;
-
-		list_add_tail(&next->queuelist, &mqrq->packed->list);
-		cur = next;
-		reqs++;
-	} while (1);
-
-	if (put_back) {
-		spin_lock_irq(q->queue_lock);
-		blk_requeue_request(q, next);
-		spin_unlock_irq(q->queue_lock);
-	}
-
-	if (reqs > 0) {
-		list_add(&req->queuelist, &mqrq->packed->list);
-		mqrq->packed->nr_entries = ++reqs;
-		mqrq->packed->retries = reqs;
-		return reqs;
-	}
-
-no_packed:
-	mqrq->cmd_type = MMC_PACKED_NONE;
-	return 0;
-}
-
-static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
-					struct mmc_card *card,
-					struct mmc_queue *mq)
-{
-	struct mmc_blk_request *brq = &mqrq->brq;
-	struct request *req = mqrq->req;
-	struct request *prq;
-	struct mmc_blk_data *md = mq->blkdata;
-	struct mmc_packed *packed = mqrq->packed;
-	bool do_rel_wr, do_data_tag;
-	__le32 *packed_cmd_hdr;
-	u8 hdr_blocks;
-	u8 i = 1;
-
-	mqrq->cmd_type = MMC_PACKED_WRITE;
-	packed->blocks = 0;
-	packed->idx_failure = MMC_PACKED_NR_IDX;
-
-	packed_cmd_hdr = packed->cmd_hdr;
-	memset(packed_cmd_hdr, 0, sizeof(packed->cmd_hdr));
-	packed_cmd_hdr[0] = cpu_to_le32((packed->nr_entries << 16) |
-		(PACKED_CMD_WR << 8) | PACKED_CMD_VER);
-	hdr_blocks = mmc_large_sector(card) ? 8 : 1;
-
-	/*
-	 * Argument for each entry of packed group
-	 */
-	list_for_each_entry(prq, &packed->list, queuelist) {
-		do_rel_wr = mmc_req_rel_wr(prq) && (md->flags & MMC_BLK_REL_WR);
-		do_data_tag = (card->ext_csd.data_tag_unit_size) &&
-			(prq->cmd_flags & REQ_META) &&
-			(rq_data_dir(prq) == WRITE) &&
-			blk_rq_bytes(prq) >= card->ext_csd.data_tag_unit_size;
-		/* Argument of CMD23 */
-		packed_cmd_hdr[(i * 2)] = cpu_to_le32(
-			(do_rel_wr ? MMC_CMD23_ARG_REL_WR : 0) |
-			(do_data_tag ? MMC_CMD23_ARG_TAG_REQ : 0) |
-			blk_rq_sectors(prq));
-		/* Argument of CMD18 or CMD25 */
-		packed_cmd_hdr[((i * 2)) + 1] = cpu_to_le32(
-			mmc_card_blockaddr(card) ?
-			blk_rq_pos(prq) : blk_rq_pos(prq) << 9);
-		packed->blocks += blk_rq_sectors(prq);
-		i++;
-	}
-
-	memset(brq, 0, sizeof(struct mmc_blk_request));
-	brq->mrq.cmd = &brq->cmd;
-	brq->mrq.data = &brq->data;
-	brq->mrq.sbc = &brq->sbc;
-	brq->mrq.stop = &brq->stop;
-
-	brq->sbc.opcode = MMC_SET_BLOCK_COUNT;
-	brq->sbc.arg = MMC_CMD23_ARG_PACKED | (packed->blocks + hdr_blocks);
-	brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
-
-	brq->cmd.opcode = MMC_WRITE_MULTIPLE_BLOCK;
-	brq->cmd.arg = blk_rq_pos(req);
-	if (!mmc_card_blockaddr(card))
-		brq->cmd.arg <<= 9;
-	brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
-
-	brq->data.blksz = 512;
-	brq->data.blocks = packed->blocks + hdr_blocks;
-	brq->data.flags = MMC_DATA_WRITE;
-
-	brq->stop.opcode = MMC_STOP_TRANSMISSION;
-	brq->stop.arg = 0;
-	brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-
-	mmc_set_data_timeout(&brq->data, card);
-
-	brq->data.sg = mqrq->sg;
-	brq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
-
-	mqrq->mmc_active.mrq = &brq->mrq;
-	mqrq->mmc_active.err_check = mmc_blk_packed_err_check;
-
-	mmc_queue_bounce_pre(mqrq);
-}
-
 static int mmc_blk_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
 			   struct mmc_blk_request *brq, struct request *req,
 			   int ret)
@@ -1923,79 +1587,10 @@ static int mmc_blk_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
 		if (blocks != (u32)-1) {
 			ret = blk_end_request(req, 0, blocks << 9);
 		}
-	} else {
-		if (!mmc_packed_cmd(mq_rq->cmd_type))
-			ret = blk_end_request(req, 0, brq->data.bytes_xfered);
-	}
-	return ret;
-}
-
-static int mmc_blk_end_packed_req(struct mmc_queue_req *mq_rq)
-{
-	struct request *prq;
-	struct mmc_packed *packed = mq_rq->packed;
-	int idx = packed->idx_failure, i = 0;
-	int ret = 0;
-
-	while (!list_empty(&packed->list)) {
-		prq = list_entry_rq(packed->list.next);
-		if (idx == i) {
-			/* retry from error index */
-			packed->nr_entries -= idx;
-			mq_rq->req = prq;
-			ret = 1;
-
-			if (packed->nr_entries == MMC_PACKED_NR_SINGLE) {
-				list_del_init(&prq->queuelist);
-				mmc_blk_clear_packed(mq_rq);
-			}
-			return ret;
-		}
-		list_del_init(&prq->queuelist);
-		blk_end_request(prq, 0, blk_rq_bytes(prq));
-		i++;
 	}
-
-	mmc_blk_clear_packed(mq_rq);
 	return ret;
 }
 
-static void mmc_blk_abort_packed_req(struct mmc_queue_req *mq_rq)
-{
-	struct request *prq;
-	struct mmc_packed *packed = mq_rq->packed;
-
-	while (!list_empty(&packed->list)) {
-		prq = list_entry_rq(packed->list.next);
-		list_del_init(&prq->queuelist);
-		blk_end_request(prq, -EIO, blk_rq_bytes(prq));
-	}
-
-	mmc_blk_clear_packed(mq_rq);
-}
-
-static void mmc_blk_revert_packed_req(struct mmc_queue *mq,
-				      struct mmc_queue_req *mq_rq)
-{
-	struct request *prq;
-	struct request_queue *q = mq->queue;
-	struct mmc_packed *packed = mq_rq->packed;
-
-	while (!list_empty(&packed->list)) {
-		prq = list_entry_rq(packed->list.prev);
-		if (prq->queuelist.prev != &packed->list) {
-			list_del_init(&prq->queuelist);
-			spin_lock_irq(q->queue_lock);
-			blk_requeue_request(mq->queue, prq);
-			spin_unlock_irq(q->queue_lock);
-		} else {
-			list_del_init(&prq->queuelist);
-		}
-	}
-
-	mmc_blk_clear_packed(mq_rq);
-}
-
 static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 {
 	struct mmc_blk_data *md = mq->blkdata;
@@ -2006,15 +1601,10 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 	struct mmc_queue_req *mq_rq;
 	struct request *req = rqc;
 	struct mmc_async_req *areq;
-	const u8 packed_nr = 2;
-	u8 reqs = 0;
 
 	if (!rqc && !mq->mqrq_prev->req)
 		return 0;
 
-	if (rqc)
-		reqs = mmc_blk_prep_packed_list(mq, rqc);
-
 	do {
 		if (rqc) {
 			/*
@@ -2029,11 +1619,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 				goto cmd_abort;
 			}
 
-			if (reqs >= packed_nr)
-				mmc_blk_packed_hdr_wrq_prep(mq->mqrq_cur,
-							    card, mq);
-			else
-				mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
+			mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
 			areq = &mq->mqrq_cur->mmc_active;
 		} else
 			areq = NULL;
@@ -2058,13 +1644,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			 */
 			mmc_blk_reset_success(md, type);
 
-			if (mmc_packed_cmd(mq_rq->cmd_type)) {
-				ret = mmc_blk_end_packed_req(mq_rq);
-				break;
-			} else {
-				ret = blk_end_request(req, 0,
-						brq->data.bytes_xfered);
-			}
+			ret = blk_end_request(req, 0,
+					brq->data.bytes_xfered);
 
 			/*
 			 * If the blk_end_request function returns non-zero even
@@ -2101,8 +1682,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			err = mmc_blk_reset(md, card->host, type);
 			if (!err)
 				break;
-			if (err == -ENODEV ||
-				mmc_packed_cmd(mq_rq->cmd_type))
+			if (err == -ENODEV)
 				goto cmd_abort;
 			/* Fall through */
 		}
@@ -2133,23 +1713,14 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 		}
 
 		if (ret) {
-			if (mmc_packed_cmd(mq_rq->cmd_type)) {
-				if (!mq_rq->packed->retries)
-					goto cmd_abort;
-				mmc_blk_packed_hdr_wrq_prep(mq_rq, card, mq);
-				mmc_start_req(card->host,
-					      &mq_rq->mmc_active, NULL);
-			} else {
-
-				/*
-				 * In case of a incomplete request
-				 * prepare it again and resend.
-				 */
-				mmc_blk_rw_rq_prep(mq_rq, card,
-						disable_multi, mq);
-				mmc_start_req(card->host,
-						&mq_rq->mmc_active, NULL);
-			}
+			/*
+			 * In case of a incomplete request
+			 * prepare it again and resend.
+			 */
+			mmc_blk_rw_rq_prep(mq_rq, card,
+					disable_multi, mq);
+			mmc_start_req(card->host,
+					&mq_rq->mmc_active, NULL);
 			mq_rq->brq.retune_retry_done = retune_retry_done;
 		}
 	} while (ret);
@@ -2157,15 +1728,11 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 	return 1;
 
  cmd_abort:
-	if (mmc_packed_cmd(mq_rq->cmd_type)) {
-		mmc_blk_abort_packed_req(mq_rq);
-	} else {
-		if (mmc_card_removed(card))
-			req->cmd_flags |= REQ_QUIET;
-		while (ret)
-			ret = blk_end_request(req, -EIO,
-					blk_rq_cur_bytes(req));
-	}
+	if (mmc_card_removed(card))
+		req->cmd_flags |= REQ_QUIET;
+	while (ret)
+		ret = blk_end_request(req, -EIO,
+				blk_rq_cur_bytes(req));
 
  start_new_req:
 	if (rqc) {
@@ -2173,12 +1740,6 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			rqc->cmd_flags |= REQ_QUIET;
 			blk_end_request_all(rqc, -EIO);
 		} else {
-			/*
-			 * If current request is packed, it needs to put back.
-			 */
-			if (mmc_packed_cmd(mq->mqrq_cur->cmd_type))
-				mmc_blk_revert_packed_req(mq, mq->mqrq_cur);
-
 			mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
 			mmc_start_req(card->host,
 				      &mq->mqrq_cur->mmc_active, NULL);
@@ -2361,14 +1922,6 @@ again:
 		blk_queue_write_cache(md->queue.queue, true, true);
 	}
 
-	if (mmc_card_mmc(card) &&
-	    (area_type == MMC_BLK_DATA_AREA_MAIN) &&
-	    (md->flags & MMC_BLK_CMD23) &&
-	    card->ext_csd.packed_event_en) {
-		if (!mmc_packed_init(&md->queue, card))
-			md->flags |= MMC_BLK_PACKED_CMD;
-	}
-
 	return md;
 
  err_putdisk:
@@ -2472,8 +2025,6 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
 		 */
 		card = md->queue.card;
 		mmc_cleanup_queue(&md->queue);
-		if (md->flags & MMC_BLK_PACKED_CMD)
-			mmc_packed_clean(&md->queue);
 		if (md->disk->flags & GENHD_FL_UP) {
 			device_remove_file(disk_to_dev(md->disk), &md->force_ro);
 			if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 3f6a2463ab30..7dacf2744fbd 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -406,41 +406,6 @@ void mmc_queue_resume(struct mmc_queue *mq)
 	}
 }
 
-static unsigned int mmc_queue_packed_map_sg(struct mmc_queue *mq,
-					    struct mmc_packed *packed,
-					    struct scatterlist *sg,
-					    enum mmc_packed_type cmd_type)
-{
-	struct scatterlist *__sg = sg;
-	unsigned int sg_len = 0;
-	struct request *req;
-
-	if (mmc_packed_wr(cmd_type)) {
-		unsigned int hdr_sz = mmc_large_sector(mq->card) ? 4096 : 512;
-		unsigned int max_seg_sz = queue_max_segment_size(mq->queue);
-		unsigned int len, remain, offset = 0;
-		u8 *buf = (u8 *)packed->cmd_hdr;
-
-		remain = hdr_sz;
-		do {
-			len = min(remain, max_seg_sz);
-			sg_set_buf(__sg, buf + offset, len);
-			offset += len;
-			remain -= len;
-			sg_unmark_end(__sg++);
-			sg_len++;
-		} while (remain);
-	}
-
-	list_for_each_entry(req, &packed->list, queuelist) {
-		sg_len += blk_rq_map_sg(mq->queue, req, __sg);
-		__sg = sg + (sg_len - 1);
-		sg_unmark_end(__sg++);
-	}
-	sg_mark_end(sg + (sg_len - 1));
-	return sg_len;
-}
-
 /*
  * Prepare the sg list(s) to be handed of to the host driver
  */
@@ -449,26 +414,14 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq)
 	unsigned int sg_len;
 	size_t buflen;
 	struct scatterlist *sg;
-	enum mmc_packed_type cmd_type;
 	int i;
 
-	cmd_type = mqrq->cmd_type;
-
-	if (!mqrq->bounce_buf) {
-		if (mmc_packed_cmd(cmd_type))
-			return mmc_queue_packed_map_sg(mq, mqrq->packed,
-						       mqrq->sg, cmd_type);
-		else
-			return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg);
-	}
+	if (!mqrq->bounce_buf)
+		return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg);
 
 	BUG_ON(!mqrq->bounce_sg);
 
-	if (mmc_packed_cmd(cmd_type))
-		sg_len = mmc_queue_packed_map_sg(mq, mqrq->packed,
-						 mqrq->bounce_sg, cmd_type);
-	else
-		sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg);
+	sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg);
 
 	mqrq->bounce_sg_len = sg_len;
 
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index 334c9306070f..47f5532b5776 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -22,23 +22,6 @@ struct mmc_blk_request {
 	int			retune_retry_done;
 };
 
-enum mmc_packed_type {
-	MMC_PACKED_NONE = 0,
-	MMC_PACKED_WRITE,
-};
-
-#define mmc_packed_cmd(type)	((type) != MMC_PACKED_NONE)
-#define mmc_packed_wr(type)	((type) == MMC_PACKED_WRITE)
-
-struct mmc_packed {
-	struct list_head	list;
-	__le32			cmd_hdr[1024];
-	unsigned int		blocks;
-	u8			nr_entries;
-	u8			retries;
-	s16			idx_failure;
-};
-
 struct mmc_queue_req {
 	struct request		*req;
 	struct mmc_blk_request	brq;
@@ -47,8 +30,6 @@ struct mmc_queue_req {
 	struct scatterlist	*bounce_sg;
 	unsigned int		bounce_sg_len;
 	struct mmc_async_req	mmc_active;
-	enum mmc_packed_type	cmd_type;
-	struct mmc_packed	*packed;
 };
 
 struct mmc_queue {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 2a6418d0c343..2ce32fefb41c 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -494,11 +494,6 @@ static inline int mmc_host_uhs(struct mmc_host *host)
 		 MMC_CAP_UHS_DDR50);
 }
 
-static inline int mmc_host_packed_wr(struct mmc_host *host)
-{
-	return host->caps2 & MMC_CAP2_PACKED_WR;
-}
-
 static inline int mmc_card_hs(struct mmc_card *card)
 {
 	return card->host->ios.timing == MMC_TIMING_SD_HS ||
-- 
cgit v1.2.3


From a6fc3b698130230a2342baacd7821eea0405154c Mon Sep 17 00:00:00 2001
From: yangbo lu <yangbo.lu@nxp.com>
Date: Wed, 9 Nov 2016 11:14:08 +0800
Subject: soc: fsl: add GUTS driver for QorIQ platforms

The global utilities block controls power management, I/O device
enabling, power-onreset(POR) configuration monitoring, alternate
function selection for multiplexed signals,and clock control.

This patch adds a driver to manage and access global utilities block.
Initially only reading SVR and registering soc device are supported.
Other guts accesses, such as reading RCW, should eventually be moved
into this driver as well.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/soc/Kconfig      |   3 +-
 drivers/soc/fsl/Kconfig  |  18 ++++
 drivers/soc/fsl/Makefile |   1 +
 drivers/soc/fsl/guts.c   | 236 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fsl/guts.h | 125 +++++++++++++++----------
 5 files changed, 333 insertions(+), 50 deletions(-)
 create mode 100644 drivers/soc/fsl/Kconfig
 create mode 100644 drivers/soc/fsl/guts.c

(limited to 'include/linux')

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index e6e90e80519a..f31bceb69c0d 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -1,8 +1,7 @@
 menu "SOC (System On Chip) specific Drivers"
 
 source "drivers/soc/bcm/Kconfig"
-source "drivers/soc/fsl/qbman/Kconfig"
-source "drivers/soc/fsl/qe/Kconfig"
+source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/fsl/Kconfig b/drivers/soc/fsl/Kconfig
new file mode 100644
index 000000000000..7a9fb9baa66d
--- /dev/null
+++ b/drivers/soc/fsl/Kconfig
@@ -0,0 +1,18 @@
+#
+# Freescale SOC drivers
+#
+
+source "drivers/soc/fsl/qbman/Kconfig"
+source "drivers/soc/fsl/qe/Kconfig"
+
+config FSL_GUTS
+	bool
+	select SOC_BUS
+	help
+	  The global utilities block controls power management, I/O device
+	  enabling, power-onreset(POR) configuration monitoring, alternate
+	  function selection for multiplexed signals,and clock control.
+	  This driver is to manage and access global utilities block.
+	  Initially only reading SVR and registering soc device are supported.
+	  Other guts accesses, such as reading RCW, should eventually be moved
+	  into this driver as well.
diff --git a/drivers/soc/fsl/Makefile b/drivers/soc/fsl/Makefile
index 75e1f5334821..44b3bebef24a 100644
--- a/drivers/soc/fsl/Makefile
+++ b/drivers/soc/fsl/Makefile
@@ -5,3 +5,4 @@
 obj-$(CONFIG_FSL_DPAA)                 += qbman/
 obj-$(CONFIG_QUICC_ENGINE)		+= qe/
 obj-$(CONFIG_CPM)			+= qe/
+obj-$(CONFIG_FSL_GUTS)			+= guts.o
diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c
new file mode 100644
index 000000000000..0ac88263c2d7
--- /dev/null
+++ b/drivers/soc/fsl/guts.c
@@ -0,0 +1,236 @@
+/*
+ * Freescale QorIQ Platforms GUTS Driver
+ *
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/of_fdt.h>
+#include <linux/sys_soc.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/fsl/guts.h>
+
+struct guts {
+	struct ccsr_guts __iomem *regs;
+	bool little_endian;
+};
+
+struct fsl_soc_die_attr {
+	char	*die;
+	u32	svr;
+	u32	mask;
+};
+
+static struct guts *guts;
+static struct soc_device_attribute soc_dev_attr;
+static struct soc_device *soc_dev;
+
+
+/* SoC die attribute definition for QorIQ platform */
+static const struct fsl_soc_die_attr fsl_soc_die[] = {
+	/*
+	 * Power Architecture-based SoCs T Series
+	 */
+
+	/* Die: T4240, SoC: T4240/T4160/T4080 */
+	{ .die		= "T4240",
+	  .svr		= 0x82400000,
+	  .mask		= 0xfff00000,
+	},
+	/* Die: T1040, SoC: T1040/T1020/T1042/T1022 */
+	{ .die		= "T1040",
+	  .svr		= 0x85200000,
+	  .mask		= 0xfff00000,
+	},
+	/* Die: T2080, SoC: T2080/T2081 */
+	{ .die		= "T2080",
+	  .svr		= 0x85300000,
+	  .mask		= 0xfff00000,
+	},
+	/* Die: T1024, SoC: T1024/T1014/T1023/T1013 */
+	{ .die		= "T1024",
+	  .svr		= 0x85400000,
+	  .mask		= 0xfff00000,
+	},
+
+	/*
+	 * ARM-based SoCs LS Series
+	 */
+
+	/* Die: LS1043A, SoC: LS1043A/LS1023A */
+	{ .die		= "LS1043A",
+	  .svr		= 0x87920000,
+	  .mask		= 0xffff0000,
+	},
+	/* Die: LS2080A, SoC: LS2080A/LS2040A/LS2085A */
+	{ .die		= "LS2080A",
+	  .svr		= 0x87010000,
+	  .mask		= 0xff3f0000,
+	},
+	/* Die: LS1088A, SoC: LS1088A/LS1048A/LS1084A/LS1044A */
+	{ .die		= "LS1088A",
+	  .svr		= 0x87030000,
+	  .mask		= 0xff3f0000,
+	},
+	/* Die: LS1012A, SoC: LS1012A */
+	{ .die		= "LS1012A",
+	  .svr		= 0x87040000,
+	  .mask		= 0xffff0000,
+	},
+	/* Die: LS1046A, SoC: LS1046A/LS1026A */
+	{ .die		= "LS1046A",
+	  .svr		= 0x87070000,
+	  .mask		= 0xffff0000,
+	},
+	/* Die: LS2088A, SoC: LS2088A/LS2048A/LS2084A/LS2044A */
+	{ .die		= "LS2088A",
+	  .svr		= 0x87090000,
+	  .mask		= 0xff3f0000,
+	},
+	/* Die: LS1021A, SoC: LS1021A/LS1020A/LS1022A */
+	{ .die		= "LS1021A",
+	  .svr		= 0x87000000,
+	  .mask		= 0xfff70000,
+	},
+	{ },
+};
+
+static const struct fsl_soc_die_attr *fsl_soc_die_match(
+	u32 svr, const struct fsl_soc_die_attr *matches)
+{
+	while (matches->svr) {
+		if (matches->svr == (svr & matches->mask))
+			return matches;
+		matches++;
+	};
+	return NULL;
+}
+
+u32 fsl_guts_get_svr(void)
+{
+	u32 svr = 0;
+
+	if (!guts || !guts->regs)
+		return svr;
+
+	if (guts->little_endian)
+		svr = ioread32(&guts->regs->svr);
+	else
+		svr = ioread32be(&guts->regs->svr);
+
+	return svr;
+}
+EXPORT_SYMBOL(fsl_guts_get_svr);
+
+static int fsl_guts_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	const struct fsl_soc_die_attr *soc_die;
+	const char *machine;
+	u32 svr;
+
+	/* Initialize guts */
+	guts = devm_kzalloc(dev, sizeof(*guts), GFP_KERNEL);
+	if (!guts)
+		return -ENOMEM;
+
+	guts->little_endian = of_property_read_bool(np, "little-endian");
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	guts->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(guts->regs))
+		return PTR_ERR(guts->regs);
+
+	/* Register soc device */
+	machine = of_flat_dt_get_machine_name();
+	if (machine)
+		soc_dev_attr.machine = devm_kstrdup(dev, machine, GFP_KERNEL);
+
+	svr = fsl_guts_get_svr();
+	soc_die = fsl_soc_die_match(svr, fsl_soc_die);
+	if (soc_die) {
+		soc_dev_attr.family = devm_kasprintf(dev, GFP_KERNEL,
+						     "QorIQ %s", soc_die->die);
+	} else {
+		soc_dev_attr.family = devm_kasprintf(dev, GFP_KERNEL, "QorIQ");
+	}
+	soc_dev_attr.soc_id = devm_kasprintf(dev, GFP_KERNEL,
+					     "svr:0x%08x", svr);
+	soc_dev_attr.revision = devm_kasprintf(dev, GFP_KERNEL, "%d.%d",
+					       (svr >>  4) & 0xf, svr & 0xf);
+
+	soc_dev = soc_device_register(&soc_dev_attr);
+	if (IS_ERR(soc_dev))
+		return PTR_ERR(soc_dev);
+
+	pr_info("Machine: %s\n", soc_dev_attr.machine);
+	pr_info("SoC family: %s\n", soc_dev_attr.family);
+	pr_info("SoC ID: %s, Revision: %s\n",
+		soc_dev_attr.soc_id, soc_dev_attr.revision);
+	return 0;
+}
+
+static int fsl_guts_remove(struct platform_device *dev)
+{
+	soc_device_unregister(soc_dev);
+	return 0;
+}
+
+/*
+ * Table for matching compatible strings, for device tree
+ * guts node, for Freescale QorIQ SOCs.
+ */
+static const struct of_device_id fsl_guts_of_match[] = {
+	{ .compatible = "fsl,qoriq-device-config-1.0", },
+	{ .compatible = "fsl,qoriq-device-config-2.0", },
+	{ .compatible = "fsl,p1010-guts", },
+	{ .compatible = "fsl,p1020-guts", },
+	{ .compatible = "fsl,p1021-guts", },
+	{ .compatible = "fsl,p1022-guts", },
+	{ .compatible = "fsl,p1023-guts", },
+	{ .compatible = "fsl,p2020-guts", },
+	{ .compatible = "fsl,bsc9131-guts", },
+	{ .compatible = "fsl,bsc9132-guts", },
+	{ .compatible = "fsl,mpc8536-guts", },
+	{ .compatible = "fsl,mpc8544-guts", },
+	{ .compatible = "fsl,mpc8548-guts", },
+	{ .compatible = "fsl,mpc8568-guts", },
+	{ .compatible = "fsl,mpc8569-guts", },
+	{ .compatible = "fsl,mpc8572-guts", },
+	{ .compatible = "fsl,ls1021a-dcfg", },
+	{ .compatible = "fsl,ls1043a-dcfg", },
+	{ .compatible = "fsl,ls2080a-dcfg", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, fsl_guts_of_match);
+
+static struct platform_driver fsl_guts_driver = {
+	.driver = {
+		.name = "fsl-guts",
+		.of_match_table = fsl_guts_of_match,
+	},
+	.probe = fsl_guts_probe,
+	.remove = fsl_guts_remove,
+};
+
+static int __init fsl_guts_init(void)
+{
+	return platform_driver_register(&fsl_guts_driver);
+}
+core_initcall(fsl_guts_init);
+
+static void __exit fsl_guts_exit(void)
+{
+	platform_driver_unregister(&fsl_guts_driver);
+}
+module_exit(fsl_guts_exit);
diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h
index 649e9171a9b3..3efa3b861d44 100644
--- a/include/linux/fsl/guts.h
+++ b/include/linux/fsl/guts.h
@@ -29,83 +29,112 @@
  * #ifdefs.
  */
 struct ccsr_guts {
-	__be32	porpllsr;	/* 0x.0000 - POR PLL Ratio Status Register */
-	__be32	porbmsr;	/* 0x.0004 - POR Boot Mode Status Register */
-	__be32	porimpscr;	/* 0x.0008 - POR I/O Impedance Status and Control Register */
-	__be32	pordevsr;	/* 0x.000c - POR I/O Device Status Register */
-	__be32	pordbgmsr;	/* 0x.0010 - POR Debug Mode Status Register */
-	__be32	pordevsr2;	/* 0x.0014 - POR device status register 2 */
+	u32	porpllsr;	/* 0x.0000 - POR PLL Ratio Status Register */
+	u32	porbmsr;	/* 0x.0004 - POR Boot Mode Status Register */
+	u32	porimpscr;	/* 0x.0008 - POR I/O Impedance Status and
+				 *           Control Register
+				 */
+	u32	pordevsr;	/* 0x.000c - POR I/O Device Status Register */
+	u32	pordbgmsr;	/* 0x.0010 - POR Debug Mode Status Register */
+	u32	pordevsr2;	/* 0x.0014 - POR device status register 2 */
 	u8	res018[0x20 - 0x18];
-	__be32	porcir;		/* 0x.0020 - POR Configuration Information Register */
+	u32	porcir;		/* 0x.0020 - POR Configuration Information
+				 *           Register
+				 */
 	u8	res024[0x30 - 0x24];
-	__be32	gpiocr;		/* 0x.0030 - GPIO Control Register */
+	u32	gpiocr;		/* 0x.0030 - GPIO Control Register */
 	u8	res034[0x40 - 0x34];
-	__be32	gpoutdr;	/* 0x.0040 - General-Purpose Output Data Register */
+	u32	gpoutdr;	/* 0x.0040 - General-Purpose Output Data
+				 *           Register
+				 */
 	u8	res044[0x50 - 0x44];
-	__be32	gpindr;		/* 0x.0050 - General-Purpose Input Data Register */
+	u32	gpindr;		/* 0x.0050 - General-Purpose Input Data
+				 *           Register
+				 */
 	u8	res054[0x60 - 0x54];
-	__be32	pmuxcr;		/* 0x.0060 - Alternate Function Signal Multiplex Control */
-        __be32  pmuxcr2;	/* 0x.0064 - Alternate function signal multiplex control 2 */
-        __be32  dmuxcr;		/* 0x.0068 - DMA Mux Control Register */
+	u32	pmuxcr;		/* 0x.0060 - Alternate Function Signal
+				 *           Multiplex Control
+				 */
+	u32	pmuxcr2;	/* 0x.0064 - Alternate function signal
+				 *           multiplex control 2
+				 */
+	u32	dmuxcr;		/* 0x.0068 - DMA Mux Control Register */
         u8	res06c[0x70 - 0x6c];
-	__be32	devdisr;	/* 0x.0070 - Device Disable Control */
+	u32	devdisr;	/* 0x.0070 - Device Disable Control */
 #define CCSR_GUTS_DEVDISR_TB1	0x00001000
 #define CCSR_GUTS_DEVDISR_TB0	0x00004000
-	__be32	devdisr2;	/* 0x.0074 - Device Disable Control 2 */
+	u32	devdisr2;	/* 0x.0074 - Device Disable Control 2 */
 	u8	res078[0x7c - 0x78];
-	__be32  pmjcr;		/* 0x.007c - 4 Power Management Jog Control Register */
-	__be32	powmgtcsr;	/* 0x.0080 - Power Management Status and Control Register */
-	__be32  pmrccr;		/* 0x.0084 - Power Management Reset Counter Configuration Register */
-	__be32  pmpdccr;	/* 0x.0088 - Power Management Power Down Counter Configuration Register */
-	__be32  pmcdr;		/* 0x.008c - 4Power management clock disable register */
-	__be32	mcpsumr;	/* 0x.0090 - Machine Check Summary Register */
-	__be32	rstrscr;	/* 0x.0094 - Reset Request Status and Control Register */
-	__be32  ectrstcr;	/* 0x.0098 - Exception reset control register */
-	__be32  autorstsr;	/* 0x.009c - Automatic reset status register */
-	__be32	pvr;		/* 0x.00a0 - Processor Version Register */
-	__be32	svr;		/* 0x.00a4 - System Version Register */
+	u32	pmjcr;		/* 0x.007c - 4 Power Management Jog Control
+				 *           Register
+				 */
+	u32	powmgtcsr;	/* 0x.0080 - Power Management Status and
+				 *           Control Register
+				 */
+	u32	pmrccr;		/* 0x.0084 - Power Management Reset Counter
+				 *           Configuration Register
+				 */
+	u32	pmpdccr;	/* 0x.0088 - Power Management Power Down Counter
+				 *           Configuration Register
+				 */
+	u32	pmcdr;		/* 0x.008c - 4Power management clock disable
+				 *           register
+				 */
+	u32	mcpsumr;	/* 0x.0090 - Machine Check Summary Register */
+	u32	rstrscr;	/* 0x.0094 - Reset Request Status and
+				 *           Control Register
+				 */
+	u32	ectrstcr;	/* 0x.0098 - Exception reset control register */
+	u32	autorstsr;	/* 0x.009c - Automatic reset status register */
+	u32	pvr;		/* 0x.00a0 - Processor Version Register */
+	u32	svr;		/* 0x.00a4 - System Version Register */
 	u8	res0a8[0xb0 - 0xa8];
-	__be32	rstcr;		/* 0x.00b0 - Reset Control Register */
+	u32	rstcr;		/* 0x.00b0 - Reset Control Register */
 	u8	res0b4[0xc0 - 0xb4];
-	__be32  iovselsr;	/* 0x.00c0 - I/O voltage select status register
+	u32	iovselsr;	/* 0x.00c0 - I/O voltage select status register
 					     Called 'elbcvselcr' on 86xx SOCs */
 	u8	res0c4[0x100 - 0xc4];
-	__be32	rcwsr[16];	/* 0x.0100 - Reset Control Word Status registers
+	u32	rcwsr[16];	/* 0x.0100 - Reset Control Word Status registers
 					     There are 16 registers */
 	u8	res140[0x224 - 0x140];
-	__be32  iodelay1;	/* 0x.0224 - IO delay control register 1 */
-	__be32  iodelay2;	/* 0x.0228 - IO delay control register 2 */
+	u32	iodelay1;	/* 0x.0224 - IO delay control register 1 */
+	u32	iodelay2;	/* 0x.0228 - IO delay control register 2 */
 	u8	res22c[0x604 - 0x22c];
-	__be32	pamubypenr; 	/* 0x.604 - PAMU bypass enable register */
+	u32	pamubypenr;	/* 0x.604 - PAMU bypass enable register */
 	u8	res608[0x800 - 0x608];
-	__be32	clkdvdr;	/* 0x.0800 - Clock Divide Register */
+	u32	clkdvdr;	/* 0x.0800 - Clock Divide Register */
 	u8	res804[0x900 - 0x804];
-	__be32	ircr;		/* 0x.0900 - Infrared Control Register */
+	u32	ircr;		/* 0x.0900 - Infrared Control Register */
 	u8	res904[0x908 - 0x904];
-	__be32	dmacr;		/* 0x.0908 - DMA Control Register */
+	u32	dmacr;		/* 0x.0908 - DMA Control Register */
 	u8	res90c[0x914 - 0x90c];
-	__be32	elbccr;		/* 0x.0914 - eLBC Control Register */
+	u32	elbccr;		/* 0x.0914 - eLBC Control Register */
 	u8	res918[0xb20 - 0x918];
-	__be32	ddr1clkdr;	/* 0x.0b20 - DDR1 Clock Disable Register */
-	__be32	ddr2clkdr;	/* 0x.0b24 - DDR2 Clock Disable Register */
-	__be32	ddrclkdr;	/* 0x.0b28 - DDR Clock Disable Register */
+	u32	ddr1clkdr;	/* 0x.0b20 - DDR1 Clock Disable Register */
+	u32	ddr2clkdr;	/* 0x.0b24 - DDR2 Clock Disable Register */
+	u32	ddrclkdr;	/* 0x.0b28 - DDR Clock Disable Register */
 	u8	resb2c[0xe00 - 0xb2c];
-	__be32	clkocr;		/* 0x.0e00 - Clock Out Select Register */
+	u32	clkocr;		/* 0x.0e00 - Clock Out Select Register */
 	u8	rese04[0xe10 - 0xe04];
-	__be32	ddrdllcr;	/* 0x.0e10 - DDR DLL Control Register */
+	u32	ddrdllcr;	/* 0x.0e10 - DDR DLL Control Register */
 	u8	rese14[0xe20 - 0xe14];
-	__be32	lbcdllcr;	/* 0x.0e20 - LBC DLL Control Register */
-	__be32  cpfor;		/* 0x.0e24 - L2 charge pump fuse override register */
+	u32	lbcdllcr;	/* 0x.0e20 - LBC DLL Control Register */
+	u32	cpfor;		/* 0x.0e24 - L2 charge pump fuse override
+				 *           register
+				 */
 	u8	rese28[0xf04 - 0xe28];
-	__be32	srds1cr0;	/* 0x.0f04 - SerDes1 Control Register 0 */
-	__be32	srds1cr1;	/* 0x.0f08 - SerDes1 Control Register 0 */
+	u32	srds1cr0;	/* 0x.0f04 - SerDes1 Control Register 0 */
+	u32	srds1cr1;	/* 0x.0f08 - SerDes1 Control Register 0 */
 	u8	resf0c[0xf2c - 0xf0c];
-	__be32  itcr;		/* 0x.0f2c - Internal transaction control register */
+	u32	itcr;		/* 0x.0f2c - Internal transaction control
+				 *           register
+				 */
 	u8	resf30[0xf40 - 0xf30];
-	__be32	srds2cr0;	/* 0x.0f40 - SerDes2 Control Register 0 */
-	__be32	srds2cr1;	/* 0x.0f44 - SerDes2 Control Register 0 */
+	u32	srds2cr0;	/* 0x.0f40 - SerDes2 Control Register 0 */
+	u32	srds2cr1;	/* 0x.0f44 - SerDes2 Control Register 0 */
 } __attribute__ ((packed));
 
+u32 fsl_guts_get_svr(void);
 
 /* Alternate function signal multiplex control */
 #define MPC85xx_PMUXCR_QE(x) (0x8000 >> (x))
-- 
cgit v1.2.3


From 937d3a0af521ece133a8716c1bf2d8044e15faa0 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Thu, 15 Sep 2016 14:44:03 +0200
Subject: mfd: Add support for Allwinner SoCs ADC

The Allwinner SoCs all have an ADC that can also act as a touchscreen
controller and a thermal sensor. For now, only the ADC and the thermal
sensor drivers are probed by the MFD, the touchscreen controller support
will be added later.

Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig             |  15 ++++
 drivers/mfd/Makefile            |   1 +
 drivers/mfd/sun4i-gpadc.c       | 181 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/sun4i-gpadc.h |  94 +++++++++++++++++++++
 4 files changed, 291 insertions(+)
 create mode 100644 drivers/mfd/sun4i-gpadc.c
 create mode 100644 include/linux/mfd/sun4i-gpadc.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 1ed0584f494e..94db77307f62 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -40,6 +40,21 @@ config MFD_ACT8945A
 	  linear regulators, along with a complete ActivePath battery
 	  charger.
 
+config MFD_SUN4I_GPADC
+	tristate "Allwinner sunxi platforms' GPADC MFD driver"
+	select MFD_CORE
+	select REGMAP_MMIO
+	depends on ARCH_SUNXI || COMPILE_TEST
+	help
+	  Select this to get support for Allwinner SoCs (A10, A13 and A31) ADC.
+	  This driver will only map the hardware interrupt and registers, you
+	  have to select individual drivers based on this MFD to be able to use
+	  the ADC or the thermal sensor. This will try to probe the ADC driver
+	  sun4i-gpadc-iio and the hwmon driver iio_hwmon.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called sun4i-gpadc.
+
 config MFD_AS3711
 	bool "AMS AS3711"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 7bb5a50127cb..dda4d4f73ad7 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -211,3 +211,4 @@ obj-$(CONFIG_INTEL_SOC_PMIC)	+= intel-soc-pmic.o
 obj-$(CONFIG_MFD_MT6397)	+= mt6397-core.o
 
 obj-$(CONFIG_MFD_ALTERA_A10SR)	+= altera-a10sr.o
+obj-$(CONFIG_MFD_SUN4I_GPADC)	+= sun4i-gpadc.o
diff --git a/drivers/mfd/sun4i-gpadc.c b/drivers/mfd/sun4i-gpadc.c
new file mode 100644
index 000000000000..8a7ee5b6beb1
--- /dev/null
+++ b/drivers/mfd/sun4i-gpadc.c
@@ -0,0 +1,181 @@
+/* ADC MFD core driver for sunxi platforms
+ *
+ * Copyright (c) 2016 Quentin Schulz <quentin.schulz@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/sun4i-gpadc.h>
+
+#define ARCH_SUN4I_A10 0
+#define ARCH_SUN5I_A13 1
+#define ARCH_SUN6I_A31 2
+
+static struct resource adc_resources[] = {
+	DEFINE_RES_IRQ_NAMED(SUN4I_GPADC_IRQ_FIFO_DATA, "FIFO_DATA_PENDING"),
+	DEFINE_RES_IRQ_NAMED(SUN4I_GPADC_IRQ_TEMP_DATA, "TEMP_DATA_PENDING"),
+};
+
+static const struct regmap_irq sun4i_gpadc_regmap_irq[] = {
+	REGMAP_IRQ_REG(SUN4I_GPADC_IRQ_FIFO_DATA, 0,
+		       SUN4I_GPADC_INT_FIFOC_TP_DATA_IRQ_EN),
+	REGMAP_IRQ_REG(SUN4I_GPADC_IRQ_TEMP_DATA, 0,
+		       SUN4I_GPADC_INT_FIFOC_TEMP_IRQ_EN),
+};
+
+static const struct regmap_irq_chip sun4i_gpadc_regmap_irq_chip = {
+	.name = "sun4i_gpadc_irq_chip",
+	.status_base = SUN4I_GPADC_INT_FIFOS,
+	.ack_base = SUN4I_GPADC_INT_FIFOS,
+	.mask_base = SUN4I_GPADC_INT_FIFOC,
+	.init_ack_masked = true,
+	.mask_invert = true,
+	.irqs = sun4i_gpadc_regmap_irq,
+	.num_irqs = ARRAY_SIZE(sun4i_gpadc_regmap_irq),
+	.num_regs = 1,
+};
+
+static struct mfd_cell sun4i_gpadc_cells[] = {
+	{
+		.name	= "sun4i-a10-gpadc-iio",
+		.resources = adc_resources,
+		.num_resources = ARRAY_SIZE(adc_resources),
+	},
+	{ .name = "iio_hwmon" }
+};
+
+static struct mfd_cell sun5i_gpadc_cells[] = {
+	{
+		.name	= "sun5i-a13-gpadc-iio",
+		.resources = adc_resources,
+		.num_resources = ARRAY_SIZE(adc_resources),
+	},
+	{ .name = "iio_hwmon" },
+};
+
+static struct mfd_cell sun6i_gpadc_cells[] = {
+	{
+		.name	= "sun6i-a31-gpadc-iio",
+		.resources = adc_resources,
+		.num_resources = ARRAY_SIZE(adc_resources),
+	},
+	{ .name = "iio_hwmon" },
+};
+
+static const struct regmap_config sun4i_gpadc_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.fast_io = true,
+};
+
+static const struct of_device_id sun4i_gpadc_of_match[] = {
+	{
+		.compatible = "allwinner,sun4i-a10-ts",
+		.data = (void *)ARCH_SUN4I_A10,
+	}, {
+		.compatible = "allwinner,sun5i-a13-ts",
+		.data = (void *)ARCH_SUN5I_A13,
+	}, {
+		.compatible = "allwinner,sun6i-a31-ts",
+		.data = (void *)ARCH_SUN6I_A31,
+	}, { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, sun4i_gpadc_of_match);
+
+static int sun4i_gpadc_probe(struct platform_device *pdev)
+{
+	struct sun4i_gpadc_dev *dev;
+	struct resource *mem;
+	const struct of_device_id *of_id;
+	const struct mfd_cell *cells;
+	unsigned int irq, size;
+	int ret;
+
+	of_id = of_match_node(sun4i_gpadc_of_match, pdev->dev.of_node);
+	if (!of_id)
+		return -EINVAL;
+
+	switch ((int)of_id->data) {
+	case ARCH_SUN4I_A10:
+		cells = sun4i_gpadc_cells;
+		size = ARRAY_SIZE(sun4i_gpadc_cells);
+		break;
+	case ARCH_SUN5I_A13:
+		cells = sun5i_gpadc_cells;
+		size = ARRAY_SIZE(sun5i_gpadc_cells);
+		break;
+	case ARCH_SUN6I_A31:
+		cells = sun6i_gpadc_cells;
+		size = ARRAY_SIZE(sun6i_gpadc_cells);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dev->base = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(dev->base))
+		return PTR_ERR(dev->base);
+
+	dev->dev = &pdev->dev;
+	dev_set_drvdata(dev->dev, dev);
+
+	dev->regmap = devm_regmap_init_mmio(dev->dev, dev->base,
+					    &sun4i_gpadc_regmap_config);
+	if (IS_ERR(dev->regmap)) {
+		ret = PTR_ERR(dev->regmap);
+		dev_err(&pdev->dev, "failed to init regmap: %d\n", ret);
+		return ret;
+	}
+
+	/* Disable all interrupts */
+	regmap_write(dev->regmap, SUN4I_GPADC_INT_FIFOC, 0);
+
+	irq = platform_get_irq(pdev, 0);
+	ret = devm_regmap_add_irq_chip(&pdev->dev, dev->regmap, irq,
+				       IRQF_ONESHOT, 0,
+				       &sun4i_gpadc_regmap_irq_chip,
+				       &dev->regmap_irqc);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to add irq chip: %d\n", ret);
+		return ret;
+	}
+
+	ret = devm_mfd_add_devices(dev->dev, 0, cells, size, NULL, 0, NULL);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to add MFD devices: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct platform_driver sun4i_gpadc_driver = {
+	.driver = {
+		.name = "sun4i-gpadc",
+		.of_match_table = of_match_ptr(sun4i_gpadc_of_match),
+	},
+	.probe = sun4i_gpadc_probe,
+};
+
+module_platform_driver(sun4i_gpadc_driver);
+
+MODULE_DESCRIPTION("Allwinner sunxi platforms' GPADC MFD core driver");
+MODULE_AUTHOR("Quentin Schulz <quentin.schulz@free-electrons.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/sun4i-gpadc.h b/include/linux/mfd/sun4i-gpadc.h
new file mode 100644
index 000000000000..d7a29f246d64
--- /dev/null
+++ b/include/linux/mfd/sun4i-gpadc.h
@@ -0,0 +1,94 @@
+/* Header of ADC MFD core driver for sunxi platforms
+ *
+ * Copyright (c) 2016 Quentin Schulz <quentin.schulz@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ */
+
+#ifndef __SUN4I_GPADC__H__
+#define __SUN4I_GPADC__H__
+
+#define SUN4I_GPADC_CTRL0				0x00
+
+#define SUN4I_GPADC_CTRL0_ADC_FIRST_DLY(x)		((GENMASK(7, 0) & (x)) << 24)
+#define SUN4I_GPADC_CTRL0_ADC_FIRST_DLY_MODE		BIT(23)
+#define SUN4I_GPADC_CTRL0_ADC_CLK_SELECT		BIT(22)
+#define SUN4I_GPADC_CTRL0_ADC_CLK_DIVIDER(x)		((GENMASK(1, 0) & (x)) << 20)
+#define SUN4I_GPADC_CTRL0_FS_DIV(x)			((GENMASK(3, 0) & (x)) << 16)
+#define SUN4I_GPADC_CTRL0_T_ACQ(x)			(GENMASK(15, 0) & (x))
+
+#define SUN4I_GPADC_CTRL1				0x04
+
+#define SUN4I_GPADC_CTRL1_STYLUS_UP_DEBOUNCE(x)		((GENMASK(7, 0) & (x)) << 12)
+#define SUN4I_GPADC_CTRL1_STYLUS_UP_DEBOUNCE_EN		BIT(9)
+#define SUN4I_GPADC_CTRL1_TOUCH_PAN_CALI_EN		BIT(6)
+#define SUN4I_GPADC_CTRL1_TP_DUAL_EN			BIT(5)
+#define SUN4I_GPADC_CTRL1_TP_MODE_EN			BIT(4)
+#define SUN4I_GPADC_CTRL1_TP_ADC_SELECT			BIT(3)
+#define SUN4I_GPADC_CTRL1_ADC_CHAN_SELECT(x)		(GENMASK(2, 0) & (x))
+
+/* TP_CTRL1 bits for sun6i SOCs */
+#define SUN6I_GPADC_CTRL1_TOUCH_PAN_CALI_EN		BIT(7)
+#define SUN6I_GPADC_CTRL1_TP_DUAL_EN			BIT(6)
+#define SUN6I_GPADC_CTRL1_TP_MODE_EN			BIT(5)
+#define SUN6I_GPADC_CTRL1_TP_ADC_SELECT			BIT(4)
+#define SUN6I_GPADC_CTRL1_ADC_CHAN_SELECT(x)		(GENMASK(3, 0) & BIT(x))
+
+#define SUN4I_GPADC_CTRL2				0x08
+
+#define SUN4I_GPADC_CTRL2_TP_SENSITIVE_ADJUST(x)	((GENMASK(3, 0) & (x)) << 28)
+#define SUN4I_GPADC_CTRL2_TP_MODE_SELECT(x)		((GENMASK(1, 0) & (x)) << 26)
+#define SUN4I_GPADC_CTRL2_PRE_MEA_EN			BIT(24)
+#define SUN4I_GPADC_CTRL2_PRE_MEA_THRE_CNT(x)		(GENMASK(23, 0) & (x))
+
+#define SUN4I_GPADC_CTRL3				0x0c
+
+#define SUN4I_GPADC_CTRL3_FILTER_EN			BIT(2)
+#define SUN4I_GPADC_CTRL3_FILTER_TYPE(x)		(GENMASK(1, 0) & (x))
+
+#define SUN4I_GPADC_TPR					0x18
+
+#define SUN4I_GPADC_TPR_TEMP_ENABLE			BIT(16)
+#define SUN4I_GPADC_TPR_TEMP_PERIOD(x)			(GENMASK(15, 0) & (x))
+
+#define SUN4I_GPADC_INT_FIFOC				0x10
+
+#define SUN4I_GPADC_INT_FIFOC_TEMP_IRQ_EN		BIT(18)
+#define SUN4I_GPADC_INT_FIFOC_TP_OVERRUN_IRQ_EN		BIT(17)
+#define SUN4I_GPADC_INT_FIFOC_TP_DATA_IRQ_EN		BIT(16)
+#define SUN4I_GPADC_INT_FIFOC_TP_DATA_XY_CHANGE		BIT(13)
+#define SUN4I_GPADC_INT_FIFOC_TP_FIFO_TRIG_LEVEL(x)	((GENMASK(4, 0) & (x)) << 8)
+#define SUN4I_GPADC_INT_FIFOC_TP_DATA_DRQ_EN		BIT(7)
+#define SUN4I_GPADC_INT_FIFOC_TP_FIFO_FLUSH		BIT(4)
+#define SUN4I_GPADC_INT_FIFOC_TP_UP_IRQ_EN		BIT(1)
+#define SUN4I_GPADC_INT_FIFOC_TP_DOWN_IRQ_EN		BIT(0)
+
+#define SUN4I_GPADC_INT_FIFOS				0x14
+
+#define SUN4I_GPADC_INT_FIFOS_TEMP_DATA_PENDING		BIT(18)
+#define SUN4I_GPADC_INT_FIFOS_FIFO_OVERRUN_PENDING	BIT(17)
+#define SUN4I_GPADC_INT_FIFOS_FIFO_DATA_PENDING		BIT(16)
+#define SUN4I_GPADC_INT_FIFOS_TP_IDLE_FLG		BIT(2)
+#define SUN4I_GPADC_INT_FIFOS_TP_UP_PENDING		BIT(1)
+#define SUN4I_GPADC_INT_FIFOS_TP_DOWN_PENDING		BIT(0)
+
+#define SUN4I_GPADC_CDAT				0x1c
+#define SUN4I_GPADC_TEMP_DATA				0x20
+#define SUN4I_GPADC_DATA				0x24
+
+#define SUN4I_GPADC_IRQ_FIFO_DATA			0
+#define SUN4I_GPADC_IRQ_TEMP_DATA			1
+
+/* 10s delay before suspending the IP */
+#define SUN4I_GPADC_AUTOSUSPEND_DELAY			10000
+
+struct sun4i_gpadc_dev {
+	struct device			*dev;
+	struct regmap			*regmap;
+	struct regmap_irq_chip_data	*regmap_irqc;
+	void __iomem			*base;
+};
+
+#endif
-- 
cgit v1.2.3


From 054814b863b32a19a5094edb78a14c3c441f57dd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 15 Sep 2016 13:35:30 +0200
Subject: mfd: tps65912: Move regmap config into core driver

When building with extra warnings enabled, most files including
linux/mfd/tps65912.h warn about a static variable defined in the
header:

include/linux/mfd/tps65912.h:331:35: warning: 'tps65912_regmap_config' defined but not used [-Wunused-const-variable=]

We also duplicate the data structure between the i2c and spi front-end
drivers. Moving it into the driver code avoids the warning and
the duplication.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65912-core.c  | 17 +++++++++++++++++
 include/linux/mfd/tps65912.h | 16 +---------------
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65912-core.c b/drivers/mfd/tps65912-core.c
index a88cfa80dbc4..f33567bc428d 100644
--- a/drivers/mfd/tps65912-core.c
+++ b/drivers/mfd/tps65912-core.c
@@ -77,6 +77,23 @@ static struct regmap_irq_chip tps65912_irq_chip = {
 	.init_ack_masked = true,
 };
 
+static const struct regmap_range tps65912_yes_ranges[] = {
+	regmap_reg_range(TPS65912_INT_STS, TPS65912_GPIO5),
+};
+
+static const struct regmap_access_table tps65912_volatile_table = {
+	.yes_ranges = tps65912_yes_ranges,
+	.n_yes_ranges = ARRAY_SIZE(tps65912_yes_ranges),
+};
+
+const struct regmap_config tps65912_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_table = &tps65912_volatile_table,
+};
+EXPORT_SYMBOL_GPL(tps65912_regmap_config);
+
 int tps65912_device_init(struct tps65912 *tps)
 {
 	int ret;
diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h
index 1a603701550e..b25d0297ba88 100644
--- a/include/linux/mfd/tps65912.h
+++ b/include/linux/mfd/tps65912.h
@@ -319,21 +319,7 @@ struct tps65912 {
 	struct regmap_irq_chip_data *irq_data;
 };
 
-static const struct regmap_range tps65912_yes_ranges[] = {
-	regmap_reg_range(TPS65912_INT_STS, TPS65912_GPIO5),
-};
-
-static const struct regmap_access_table tps65912_volatile_table = {
-	.yes_ranges = tps65912_yes_ranges,
-	.n_yes_ranges = ARRAY_SIZE(tps65912_yes_ranges),
-};
-
-static const struct regmap_config tps65912_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-	.cache_type = REGCACHE_RBTREE,
-	.volatile_table = &tps65912_volatile_table,
-};
+extern const struct regmap_config tps65912_regmap_config;
 
 int tps65912_device_init(struct tps65912 *tps);
 int tps65912_device_exit(struct tps65912 *tps);
-- 
cgit v1.2.3


From b2e2c85091710159b305735d557f4ef4695f5dff Mon Sep 17 00:00:00 2001
From: Jianhong Chen <chenjh@rock-chips.com>
Date: Mon, 17 Oct 2016 17:03:10 +0800
Subject: mfd: rk808: RK818 uses DEV_OFF to power off supplies

DEV_OFF and DEV_OFF_RST functions for RK808 are designed error that
only DEV_OFF_RST can power off supplies. RK818 has been fixed this
issue, so that DEV_OFF is used to power off supplies.

Signed-off-by: Jianhong Chen <chenjh@rock-chips.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rk808.c       | 23 ++++++++++++++++++++++-
 include/linux/mfd/rk808.h |  1 +
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c
index 0f8acc5882a4..2c9acdba7c2d 100644
--- a/drivers/mfd/rk808.c
+++ b/drivers/mfd/rk808.c
@@ -290,6 +290,24 @@ static void rk808_device_shutdown(void)
 		dev_err(&rk808_i2c_client->dev, "power off error!\n");
 }
 
+static void rk818_device_shutdown(void)
+{
+	int ret;
+	struct rk808 *rk808 = i2c_get_clientdata(rk808_i2c_client);
+
+	if (!rk808) {
+		dev_warn(&rk808_i2c_client->dev,
+			 "have no rk818, so do nothing here\n");
+		return;
+	}
+
+	ret = regmap_update_bits(rk808->regmap,
+				 RK818_DEVCTRL_REG,
+				 DEV_OFF, DEV_OFF);
+	if (ret)
+		dev_err(&rk808_i2c_client->dev, "power off error!\n");
+}
+
 static const struct of_device_id rk808_of_match[] = {
 	{ .compatible = "rockchip,rk808" },
 	{ .compatible = "rockchip,rk818" },
@@ -304,6 +322,7 @@ static int rk808_probe(struct i2c_client *client,
 	struct rk808 *rk808;
 	const struct rk808_reg_data *pre_init_reg;
 	const struct mfd_cell *cells;
+	void (*pm_pwroff_fn)(void);
 	int nr_pre_init_regs;
 	int nr_cells;
 	int pm_off = 0;
@@ -331,6 +350,7 @@ static int rk808_probe(struct i2c_client *client,
 		nr_pre_init_regs = ARRAY_SIZE(rk808_pre_init_reg);
 		cells = rk808s;
 		nr_cells = ARRAY_SIZE(rk808s);
+		pm_pwroff_fn = rk808_device_shutdown;
 		break;
 	case RK818_ID:
 		rk808->regmap_cfg = &rk818_regmap_config;
@@ -339,6 +359,7 @@ static int rk808_probe(struct i2c_client *client,
 		nr_pre_init_regs = ARRAY_SIZE(rk818_pre_init_reg);
 		cells = rk818s;
 		nr_cells = ARRAY_SIZE(rk818s);
+		pm_pwroff_fn = rk818_device_shutdown;
 		break;
 	default:
 		dev_err(&client->dev, "Unsupported RK8XX ID %lu\n",
@@ -393,7 +414,7 @@ static int rk808_probe(struct i2c_client *client,
 				"rockchip,system-power-controller");
 	if (pm_off && !pm_power_off) {
 		rk808_i2c_client = client;
-		pm_power_off = rk808_device_shutdown;
+		pm_power_off = pm_pwroff_fn;
 	}
 
 	return 0;
diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h
index 6d435a3c06bc..83701ef7d3c7 100644
--- a/include/linux/mfd/rk808.h
+++ b/include/linux/mfd/rk808.h
@@ -290,6 +290,7 @@ enum rk818_reg {
 #define SWITCH2_EN	BIT(6)
 #define SWITCH1_EN	BIT(5)
 #define DEV_OFF_RST	BIT(3)
+#define DEV_OFF		BIT(0)
 
 #define VB_LO_ACT		BIT(4)
 #define VB_LO_SEL_3500MV	(7 << 0)
-- 
cgit v1.2.3


From 8ca9edc837932469b81b8b47ea43a074b6add970 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 27 Oct 2016 08:38:08 +0000
Subject: mfd: davinci_voicecodec: Tidyup header difinitions

mach/hardware.h is needed on C source code side, not header.
And struct davinci_vc is duplicated definition.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/davinci_voicecodec.c       | 1 +
 include/linux/mfd/davinci_voicecodec.h | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c
index dff2f19296b8..4d0a5f38038a 100644
--- a/drivers/mfd/davinci_voicecodec.c
+++ b/drivers/mfd/davinci_voicecodec.c
@@ -32,6 +32,7 @@
 #include <sound/pcm.h>
 
 #include <linux/mfd/davinci_voicecodec.h>
+#include <mach/hardware.h>
 
 static const struct regmap_config davinci_vc_regmap = {
 	.reg_bits = 32,
diff --git a/include/linux/mfd/davinci_voicecodec.h b/include/linux/mfd/davinci_voicecodec.h
index 8e1cdbef3dad..2c0127cb06c5 100644
--- a/include/linux/mfd/davinci_voicecodec.h
+++ b/include/linux/mfd/davinci_voicecodec.h
@@ -28,8 +28,6 @@
 #include <linux/mfd/core.h>
 #include <linux/platform_data/edma.h>
 
-#include <mach/hardware.h>
-
 struct regmap;
 
 /*
@@ -99,8 +97,6 @@ struct davinci_vcif {
 	dma_addr_t dma_rx_addr;
 };
 
-struct davinci_vc;
-
 struct davinci_vc {
 	/* Device data */
 	struct device *dev;
-- 
cgit v1.2.3


From 34d9030b5d06ec0072796b3ab6a3fa24e53ece3d Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Fri, 11 Nov 2016 11:29:52 +0800
Subject: mfd: axp20x: Add address extension registers for AXP806 regmap

The AXP806 supports either master/standalone or slave mode.
Slave mode allows sharing the serial bus, even with multiple
AXP806 which all have the same hardware address.

This is done with extra "serial interface address extension",
or AXP806_BUS_ADDR_EXT, and "register address extension", or
AXP806_REG_ADDR_EXT, registers. The former is read-only, with
1 bit customizable at the factory, and 1 bit depending on the
state of an external pin. The latter is writable. Only when
the these device addressing bits (in the upper 4 bits of the
registers) match, will the device respond to operations on
its other registers.

Add these 2 registers to the regmap so we can access them.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/axp20x.c       | 3 ++-
 include/linux/mfd/axp20x.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index 98377d29b783..ed918de84238 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -136,6 +136,7 @@ static const struct regmap_range axp806_writeable_ranges[] = {
 	regmap_reg_range(AXP806_PWR_OUT_CTRL1, AXP806_CLDO3_V_CTRL),
 	regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IRQ2_EN),
 	regmap_reg_range(AXP20X_IRQ1_STATE, AXP20X_IRQ2_STATE),
+	regmap_reg_range(AXP806_REG_ADDR_EXT, AXP806_REG_ADDR_EXT),
 };
 
 static const struct regmap_range axp806_volatile_ranges[] = {
@@ -306,7 +307,7 @@ static const struct regmap_config axp806_regmap_config = {
 	.val_bits	= 8,
 	.wr_table	= &axp806_writeable_table,
 	.volatile_table	= &axp806_volatile_table,
-	.max_register	= AXP806_VREF_TEMP_WARN_L,
+	.max_register	= AXP806_REG_ADDR_EXT,
 	.cache_type	= REGCACHE_RBTREE,
 };
 
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 6349496f09fc..a4860bc9b73d 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -115,6 +115,8 @@ enum {
 #define AXP806_CLDO2_V_CTRL		0x25
 #define AXP806_CLDO3_V_CTRL		0x26
 #define AXP806_VREF_TEMP_WARN_L		0xf3
+#define AXP806_BUS_ADDR_EXT		0xfe
+#define AXP806_REG_ADDR_EXT		0xff
 
 /* Interrupt */
 #define AXP152_IRQ1_EN			0x40
-- 
cgit v1.2.3


From c5e589a171728c9f5c587f9254ec6b343153c2ce Mon Sep 17 00:00:00 2001
From: Pierre-Hugues Husson <phh@phh.me>
Date: Sat, 5 Nov 2016 17:19:24 +0100
Subject: mfd: rn5t618: Add Ricoh RC5T619 PMIC support

The Ricoh RN5T567 is from the same family as the Ricoh RN5T618 is,
the differences are:

+ DCDC4/DCDC5
+ LDO7-10
+ Slightly different output voltage/currents
+ 32kHz Output
+ RTC
+ USB Charger detection

Signed-off-by: Pierre-Hugues Husson <phh@phh.me>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/rn5t618.txt | 16 ++++++++++------
 drivers/mfd/Kconfig                               |  3 ++-
 drivers/mfd/rn5t618.c                             |  1 +
 include/linux/mfd/rn5t618.h                       |  9 +++++++++
 4 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mfd/rn5t618.txt b/Documentation/devicetree/bindings/mfd/rn5t618.txt
index 9e6770b105c9..65c23263cc54 100644
--- a/Documentation/devicetree/bindings/mfd/rn5t618.txt
+++ b/Documentation/devicetree/bindings/mfd/rn5t618.txt
@@ -1,21 +1,25 @@
 * Ricoh RN5T567/RN5T618 PMIC
 
-Ricoh RN5T567/RN5T618 is a power management IC family which integrates
-3 to 4 step-down DCDC converters, 7 low-dropout regulators, GPIOs and
-a watchdog timer. The RN5T618 provides additionally a Li-ion battery
-charger, fuel gauge and an ADC. It can be controlled through an I2C
-interface.
+Ricoh RN5T567/RN5T618/RC5T619 is a power management IC family which
+integrates 3 to 5 step-down DCDC converters, 7 to 10 low-dropout regulators,
+GPIOs, and a watchdog timer. It can be controlled through an I2C interface.
+The RN5T618/RC5T619 provides additionally a Li-ion battery charger,
+fuel gauge, and an ADC.
+The RC5T619 additionnally includes USB charger detection and an RTC.
 
 Required properties:
  - compatible: must be one of
 		"ricoh,rn5t567"
 		"ricoh,rn5t618"
+		"ricoh,rc5t619"
  - reg: the I2C slave address of the device
 
 Sub-nodes:
  - regulators: the node is required if the regulator functionality is
    needed. The valid regulator names are: DCDC1, DCDC2, DCDC3, DCDC4
-   (RN5T567), LDO1, LDO2, LDO3, LDO4, LDO5, LDORTC1 and LDORTC2.
+   (RN5T567/RC5T619), LDO1, LDO2, LDO3, LDO4, LDO5, LDO6, LDO7, LDO8,
+   LDO9, LDO10, LDORTC1 and LDORTC2.
+   LDO7-10 are specific to RC5T619.
    The common bindings for each individual regulator can be found in:
    Documentation/devicetree/bindings/regulator/regulator.txt
 
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 0fde51ee633d..4ce3b6f11830 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -894,7 +894,8 @@ config MFD_RN5T618
 	select MFD_CORE
 	select REGMAP_I2C
 	help
-	  Say yes here to add support for the Ricoh RN5T567 or R5T618 PMIC.
+	  Say yes here to add support for the Ricoh RN5T567,
+          RN5T618, RC5T619 PMIC.
 	  This driver provides common support for accessing the device,
 	  additional drivers must be enabled in order to use the
 	  functionality of the device.
diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c
index ee94080e1cbb..8131d1975745 100644
--- a/drivers/mfd/rn5t618.c
+++ b/drivers/mfd/rn5t618.c
@@ -87,6 +87,7 @@ static int rn5t618_restart(struct notifier_block *this,
 static const struct of_device_id rn5t618_of_match[] = {
 	{ .compatible = "ricoh,rn5t567", .data = (void *)RN5T567 },
 	{ .compatible = "ricoh,rn5t618", .data = (void *)RN5T618 },
+	{ .compatible = "ricoh,rc5t619", .data = (void *)RC5T619 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, rn5t618_of_match);
diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h
index cadc6543909d..e5a6cdeb77db 100644
--- a/include/linux/mfd/rn5t618.h
+++ b/include/linux/mfd/rn5t618.h
@@ -58,10 +58,13 @@
 #define RN5T618_DC3CTL2			0x31
 #define RN5T618_DC4CTL			0x32
 #define RN5T618_DC4CTL2			0x33
+#define RN5T618_DC5CTL			0x34
+#define RN5T618_DC5CTL2			0x35
 #define RN5T618_DC1DAC			0x36
 #define RN5T618_DC2DAC			0x37
 #define RN5T618_DC3DAC			0x38
 #define RN5T618_DC4DAC			0x39
+#define RN5T618_DC5DAC			0x3a
 #define RN5T618_DC1DAC_SLP		0x3b
 #define RN5T618_DC2DAC_SLP		0x3c
 #define RN5T618_DC3DAC_SLP		0x3d
@@ -77,6 +80,11 @@
 #define RN5T618_LDO3DAC			0x4e
 #define RN5T618_LDO4DAC			0x4f
 #define RN5T618_LDO5DAC			0x50
+#define RN5T618_LDO6DAC			0x51
+#define RN5T618_LDO7DAC			0x52
+#define RN5T618_LDO8DAC			0x53
+#define RN5T618_LDO9DAC			0x54
+#define RN5T618_LDO10DAC		0x55
 #define RN5T618_LDORTCDAC		0x56
 #define RN5T618_LDORTC2DAC		0x57
 #define RN5T618_LDO1DAC_SLP		0x58
@@ -231,6 +239,7 @@ enum {
 enum {
 	RN5T567 = 0,
 	RN5T618,
+	RC5T619,
 };
 
 struct rn5t618 {
-- 
cgit v1.2.3


From 6d2c2b9f806a4ec81833af533d57395db856d5a3 Mon Sep 17 00:00:00 2001
From: Milo Kim <woogyom.kim@gmail.com>
Date: Tue, 15 Nov 2016 22:02:13 +0900
Subject: mfd: tps65217: Update register interrupt mask bits instead of writing
 operation

TPS65217 interrupt register includes read/writeable mask bits with
read-only status bits. (bit 4, 5, 6 are R/W, bit 0, 1, 2 are RO)
And reserved bit is not required.

Register update operation is preferred for disabling all interrupts during
the device initialisation.

Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65217.c       | 7 +++----
 include/linux/mfd/tps65217.h | 3 ++-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index 77fb8122abd8..9d76de99b2e3 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -189,10 +189,9 @@ static int tps65217_irq_init(struct tps65217 *tps, int irq)
 	tps->irq = irq;
 
 	/* Mask all interrupt sources */
-	tps->irq_mask = (TPS65217_INT_RESERVEDM | TPS65217_INT_PBM
-			| TPS65217_INT_ACM | TPS65217_INT_USBM);
-	tps65217_reg_write(tps, TPS65217_REG_INT, tps->irq_mask,
-			TPS65217_PROTECT_NONE);
+	tps->irq_mask = TPS65217_INT_MASK;
+	tps65217_set_bits(tps, TPS65217_REG_INT, TPS65217_INT_MASK,
+			  TPS65217_INT_MASK, TPS65217_PROTECT_NONE);
 
 	tps->irq_domain = irq_domain_add_linear(tps->dev->of_node,
 		TPS65217_NUM_IRQ, &tps65217_irq_domain_ops, tps);
diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index 4ccda8969639..dfa9f0d0ae2b 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -73,13 +73,14 @@
 #define TPS65217_PPATH_AC_CURRENT_MASK	0x0C
 #define TPS65217_PPATH_USB_CURRENT_MASK	0x03
 
-#define TPS65217_INT_RESERVEDM		BIT(7)
 #define TPS65217_INT_PBM		BIT(6)
 #define TPS65217_INT_ACM		BIT(5)
 #define TPS65217_INT_USBM		BIT(4)
 #define TPS65217_INT_PBI		BIT(2)
 #define TPS65217_INT_ACI		BIT(1)
 #define TPS65217_INT_USBI		BIT(0)
+#define TPS65217_INT_MASK		(TPS65217_INT_PBM | TPS65217_INT_ACM | \
+					TPS65217_INT_USBM)
 
 #define TPS65217_CHGCONFIG0_TREG	BIT(7)
 #define TPS65217_CHGCONFIG0_DPPM	BIT(6)
-- 
cgit v1.2.3


From fa9170522b362aefb4fef58b5cbff45dbefec258 Mon Sep 17 00:00:00 2001
From: Milo Kim <woogyom.kim@gmail.com>
Date: Tue, 15 Nov 2016 22:02:14 +0900
Subject: mfd: tps65217: Make an interrupt handler simpler

Rework the IRQ handler by using HW IRQ number and status bit.

Each HW IRQ number is matched with TPS65217 register layout[*].
(USB IRQ number is 0, AC is 1, Push button is 2)

When an interrupt is enabled, mask bit should be cleared (unmasked).
If an interrupt is disabled, then mask bit should be set (masked).
This mask value is updated into the TPS65217 register in irq_sync_unlock().

Mask bit and interrupt status bit can be handled with HW IRQ number.
Eventually, additional IRQ data, 'tps65217_irqs[]' and the function,
'irq_to_tps65217_irq()' are not necessary.

[*] TPS65217 interrupt register layout

 Bit7  6     5     4     3    2     1     0
----------------------------------------------
| x | PBM | ACM | USBM | x | PBI | ACI | USBI

PBM:  Push button status change interrupt mask
ACM:  AC interrupt mask
USBM: USB power status change interrupt mask
PBI:  Push button status change interrupt
ACI:  AC power status change interrupt
USBI: USB power status change interrupt
x:    Not used

Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65217.c       | 44 +++++++++-----------------------------------
 include/linux/mfd/tps65217.h |  1 +
 2 files changed, 10 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index 9d76de99b2e3..73760906681c 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -42,26 +42,6 @@ static struct resource pb_resources[] = {
 	DEFINE_RES_IRQ_NAMED(TPS65217_IRQ_PB, "PB"),
 };
 
-struct tps65217_irq {
-	int mask;
-	int interrupt;
-};
-
-static const struct tps65217_irq tps65217_irqs[] = {
-	[TPS65217_IRQ_PB] = {
-		.mask = TPS65217_INT_PBM,
-		.interrupt = TPS65217_INT_PBI,
-	},
-	[TPS65217_IRQ_AC] = {
-		.mask = TPS65217_INT_ACM,
-		.interrupt = TPS65217_INT_ACI,
-	},
-	[TPS65217_IRQ_USB] = {
-		.mask = TPS65217_INT_USBM,
-		.interrupt = TPS65217_INT_USBI,
-	},
-};
-
 static void tps65217_irq_lock(struct irq_data *data)
 {
 	struct tps65217 *tps = irq_data_get_irq_chip_data(data);
@@ -74,34 +54,28 @@ static void tps65217_irq_sync_unlock(struct irq_data *data)
 	struct tps65217 *tps = irq_data_get_irq_chip_data(data);
 	int ret;
 
-	ret = tps65217_reg_write(tps, TPS65217_REG_INT, tps->irq_mask,
-				TPS65217_PROTECT_NONE);
+	ret = tps65217_set_bits(tps, TPS65217_REG_INT, TPS65217_INT_MASK,
+				tps->irq_mask, TPS65217_PROTECT_NONE);
 	if (ret != 0)
 		dev_err(tps->dev, "Failed to sync IRQ masks\n");
 
 	mutex_unlock(&tps->irq_lock);
 }
 
-static inline const struct tps65217_irq *
-irq_to_tps65217_irq(struct tps65217 *tps, struct irq_data *data)
-{
-	return &tps65217_irqs[data->hwirq];
-}
-
 static void tps65217_irq_enable(struct irq_data *data)
 {
 	struct tps65217 *tps = irq_data_get_irq_chip_data(data);
-	const struct tps65217_irq *irq_data = irq_to_tps65217_irq(tps, data);
+	u8 mask = BIT(data->hwirq) << TPS65217_INT_SHIFT;
 
-	tps->irq_mask &= ~irq_data->mask;
+	tps->irq_mask &= ~mask;
 }
 
 static void tps65217_irq_disable(struct irq_data *data)
 {
 	struct tps65217 *tps = irq_data_get_irq_chip_data(data);
-	const struct tps65217_irq *irq_data = irq_to_tps65217_irq(tps, data);
+	u8 mask = BIT(data->hwirq) << TPS65217_INT_SHIFT;
 
-	tps->irq_mask |= irq_data->mask;
+	tps->irq_mask |= mask;
 }
 
 static struct irq_chip tps65217_irq_chip = {
@@ -150,8 +124,8 @@ static irqreturn_t tps65217_irq_thread(int irq, void *data)
 		return IRQ_NONE;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(tps65217_irqs); i++) {
-		if (status & tps65217_irqs[i].interrupt) {
+	for (i = 0; i < TPS65217_NUM_IRQ; i++) {
+		if (status & BIT(i)) {
 			handle_nested_irq(irq_find_mapping(tps->irq_domain, i));
 			handled = true;
 		}
@@ -430,7 +404,7 @@ static int tps65217_remove(struct i2c_client *client)
 	unsigned int virq;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(tps65217_irqs); i++) {
+	for (i = 0; i < TPS65217_NUM_IRQ; i++) {
 		virq = irq_find_mapping(tps->irq_domain, i);
 		if (virq)
 			irq_dispose_mapping(virq);
diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index dfa9f0d0ae2b..20d9dd3d74f1 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -79,6 +79,7 @@
 #define TPS65217_INT_PBI		BIT(2)
 #define TPS65217_INT_ACI		BIT(1)
 #define TPS65217_INT_USBI		BIT(0)
+#define TPS65217_INT_SHIFT		4
 #define TPS65217_INT_MASK		(TPS65217_INT_PBM | TPS65217_INT_ACM | \
 					TPS65217_INT_USBM)
 
-- 
cgit v1.2.3


From b11283eb89b0697984cadee6016dabbcf511af27 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Wed, 2 Nov 2016 11:54:03 +0000
Subject: irqchip/gic-v3-its: Change unsigned types for AArch32 compatibility

Make sure that constants which are supposed to be applied on 64-bit
data is actually unsigned long long, so they won't be truncated when
used in 32-bit mode.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3-its.c   | 28 ++++++++++++++--------------
 include/linux/irqchip/arm-gic-v3.h |  4 ++--
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index c5dee300e8a3..bca125e6c8e8 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -196,7 +196,7 @@ typedef struct its_collection *(*its_cmd_builder_t)(struct its_cmd_block *,
 
 static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
 {
-	cmd->raw_cmd[0] &= ~0xffUL;
+	cmd->raw_cmd[0] &= ~0xffULL;
 	cmd->raw_cmd[0] |= cmd_nr;
 }
 
@@ -208,43 +208,43 @@ static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
 
 static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
 {
-	cmd->raw_cmd[1] &= ~0xffffffffUL;
+	cmd->raw_cmd[1] &= ~0xffffffffULL;
 	cmd->raw_cmd[1] |= id;
 }
 
 static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
 {
-	cmd->raw_cmd[1] &= 0xffffffffUL;
+	cmd->raw_cmd[1] &= 0xffffffffULL;
 	cmd->raw_cmd[1] |= ((u64)phys_id) << 32;
 }
 
 static void its_encode_size(struct its_cmd_block *cmd, u8 size)
 {
-	cmd->raw_cmd[1] &= ~0x1fUL;
+	cmd->raw_cmd[1] &= ~0x1fULL;
 	cmd->raw_cmd[1] |= size & 0x1f;
 }
 
 static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
 {
-	cmd->raw_cmd[2] &= ~0xffffffffffffUL;
-	cmd->raw_cmd[2] |= itt_addr & 0xffffffffff00UL;
+	cmd->raw_cmd[2] &= ~0xffffffffffffULL;
+	cmd->raw_cmd[2] |= itt_addr & 0xffffffffff00ULL;
 }
 
 static void its_encode_valid(struct its_cmd_block *cmd, int valid)
 {
-	cmd->raw_cmd[2] &= ~(1UL << 63);
+	cmd->raw_cmd[2] &= ~(1ULL << 63);
 	cmd->raw_cmd[2] |= ((u64)!!valid) << 63;
 }
 
 static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
 {
-	cmd->raw_cmd[2] &= ~(0xffffffffUL << 16);
-	cmd->raw_cmd[2] |= (target_addr & (0xffffffffUL << 16));
+	cmd->raw_cmd[2] &= ~(0xffffffffULL << 16);
+	cmd->raw_cmd[2] |= (target_addr & (0xffffffffULL << 16));
 }
 
 static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
 {
-	cmd->raw_cmd[2] &= ~0xffffUL;
+	cmd->raw_cmd[2] &= ~0xffffULL;
 	cmd->raw_cmd[2] |= col;
 }
 
@@ -657,8 +657,8 @@ static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
 	its = its_dev->its;
 	addr = its->phys_base + GITS_TRANSLATER;
 
-	msg->address_lo		= addr & ((1UL << 32) - 1);
-	msg->address_hi		= addr >> 32;
+	msg->address_lo		= lower_32_bits(addr);
+	msg->address_hi		= upper_32_bits(addr);
 	msg->data		= its_get_event_id(d);
 
 	iommu_dma_map_msi_msg(d->irq, msg);
@@ -935,9 +935,9 @@ retry_baser:
 	}
 
 	if (val != tmp) {
-		pr_err("ITS@%pa: %s doesn't stick: %lx %lx\n",
+		pr_err("ITS@%pa: %s doesn't stick: %llx %llx\n",
 		       &its->phys_base, its_base_type_string[type],
-		       (unsigned long) val, (unsigned long) tmp);
+		       val, tmp);
 		free_pages((unsigned long)base, order);
 		return -ENXIO;
 	}
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index b7e34313cdfe..5118d3a0c9ca 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -239,7 +239,7 @@
 #define GITS_TYPER_PTA			(1UL << 19)
 #define GITS_TYPER_HWCOLLCNT_SHIFT	24
 
-#define GITS_CBASER_VALID			(1UL << 63)
+#define GITS_CBASER_VALID			(1ULL << 63)
 #define GITS_CBASER_SHAREABILITY_SHIFT		(10)
 #define GITS_CBASER_INNER_CACHEABILITY_SHIFT	(59)
 #define GITS_CBASER_OUTER_CACHEABILITY_SHIFT	(53)
@@ -265,7 +265,7 @@
 
 #define GITS_BASER_NR_REGS		8
 
-#define GITS_BASER_VALID			(1UL << 63)
+#define GITS_BASER_VALID			(1ULL << 63)
 #define GITS_BASER_INDIRECT			(1ULL << 62)
 
 #define GITS_BASER_INNER_CACHEABILITY_SHIFT	(59)
-- 
cgit v1.2.3


From bb8313b603eb8fd52de48a079bfcd72dcab2ef1e Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Mon, 28 Nov 2016 23:03:04 -0800
Subject: cpuidle: Allow enforcing deepest idle state selection

When idle injection is used to cap power, we need to override the
governor's choice of idle states.

For this reason, make it possible the deepest idle state selection to
be enforced by setting a flag on a given CPU to achieve the maximum
potential power draw reduction.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c | 13 ++++++++++++-
 include/linux/cpuidle.h   |  7 ++++++-
 kernel/sched/idle.c       | 13 ++++++++-----
 3 files changed, 26 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index c73207abb5a4..afc005b917fe 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -97,7 +97,17 @@ static int find_deepest_state(struct cpuidle_driver *drv,
 	return ret;
 }
 
-#ifdef CONFIG_SUSPEND
+/* Set the current cpu to use the deepest idle state, override governors */
+void cpuidle_use_deepest_state(bool enable)
+{
+	struct cpuidle_device *dev;
+
+	preempt_disable();
+	dev = cpuidle_get_device();
+	dev->use_deepest_state = enable;
+	preempt_enable();
+}
+
 /**
  * cpuidle_find_deepest_state - Find the deepest available idle state.
  * @drv: cpuidle driver for the given CPU.
@@ -109,6 +119,7 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 	return find_deepest_state(drv, dev, UINT_MAX, 0, false);
 }
 
+#ifdef CONFIG_SUSPEND
 static void enter_freeze_proper(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev, int index)
 {
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 15deea449edc..da346f2817a8 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -74,6 +74,7 @@ struct cpuidle_driver_kobj;
 struct cpuidle_device {
 	unsigned int		registered:1;
 	unsigned int		enabled:1;
+	unsigned int		use_deepest_state:1;
 	unsigned int		cpu;
 
 	int			last_residency;
@@ -192,11 +193,12 @@ static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
 static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }
 #endif
 
-#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND)
+#ifdef CONFIG_CPU_IDLE
 extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 				      struct cpuidle_device *dev);
 extern int cpuidle_enter_freeze(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev);
+extern void cpuidle_use_deepest_state(bool enable);
 #else
 static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 					     struct cpuidle_device *dev)
@@ -204,6 +206,9 @@ static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv,
 				       struct cpuidle_device *dev)
 {return -ENODEV; }
+static inline void cpuidle_use_deepest_state(bool enable)
+{
+}
 #endif
 
 /* kernel/sched/idle.c */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 1d8718d5300d..513e4dfeeae7 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -164,11 +164,14 @@ static void cpuidle_idle_call(void)
 	 * timekeeping to prevent timer interrupts from kicking us out of idle
 	 * until a proper wakeup interrupt happens.
 	 */
-	if (idle_should_freeze()) {
-		entered_state = cpuidle_enter_freeze(drv, dev);
-		if (entered_state > 0) {
-			local_irq_enable();
-			goto exit_idle;
+
+	if (idle_should_freeze() || dev->use_deepest_state) {
+		if (idle_should_freeze()) {
+			entered_state = cpuidle_enter_freeze(drv, dev);
+			if (entered_state > 0) {
+				local_irq_enable();
+				goto exit_idle;
+			}
 		}
 
 		next_state = cpuidle_find_deepest_state(drv, dev);
-- 
cgit v1.2.3


From c1de45ca831acee9b72c9320dde447edafadb43f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 Nov 2016 23:03:05 -0800
Subject: sched/idle: Add support for tasks that inject idle

Idle injection drivers such as Intel powerclamp and ACPI PAD drivers use
realtime tasks to take control of CPU then inject idle. There are two
issues with this approach:

 1. Low efficiency: injected idle task is treated as busy so sched ticks
    do not stop during injected idle period, the result of these
    unwanted wakeups can be ~20% loss in power savings.

 2. Idle accounting: injected idle time is presented to user as busy.

This patch addresses the issues by introducing a new PF_IDLE flag which
allows any given task to be treated as idle task while the flag is set.
Therefore, idle injection tasks can run through the normal flow of NOHZ
idle enter/exit to get the correct accounting as well as tick stop when
possible.

The implication is that idle task is then no longer limited to PID == 0.

Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpu.h   |   2 +
 include/linux/sched.h |   3 +-
 kernel/fork.c         |   2 +-
 kernel/sched/core.c   |   1 +
 kernel/sched/idle.c   | 162 +++++++++++++++++++++++++++++++-------------------
 5 files changed, 107 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index b886dc17f2f3..ac0efae38072 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -245,6 +245,8 @@ void arch_cpu_idle_dead(void);
 int cpu_report_state(int cpu);
 int cpu_check_up_prepare(int cpu);
 void cpu_set_state_online(int cpu);
+void play_idle(unsigned long duration_ms);
+
 #ifdef CONFIG_HOTPLUG_CPU
 bool cpu_wait_death(unsigned int cpu, int seconds);
 bool cpu_report_death(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b0ec92..114c7fcb6af6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2254,6 +2254,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 /*
  * Per process flags
  */
+#define PF_IDLE		0x00000002	/* I am an IDLE thread */
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
@@ -2609,7 +2610,7 @@ extern struct task_struct *idle_task(int cpu);
  */
 static inline bool is_idle_task(const struct task_struct *p)
 {
-	return p->pid == 0;
+	return !!(p->flags & PF_IDLE);
 }
 extern struct task_struct *curr_task(int cpu);
 extern void ia64_set_curr_task(int cpu, struct task_struct *p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 623259fc794d..5074b2f0827b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1537,7 +1537,7 @@ static __latent_entropy struct task_struct *copy_process(
 		goto bad_fork_cleanup_count;
 
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
-	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
+	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);
 	p->flags |= PF_FORKNOEXEC;
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 94732d1ab00a..63b3a8a49884 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5285,6 +5285,7 @@ void init_idle(struct task_struct *idle, int cpu)
 	__sched_fork(0, idle);
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
+	idle->flags |= PF_IDLE;
 
 	kasan_unpoison_task_stack(idle);
 
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 513e4dfeeae7..6a4bae0a649d 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -205,76 +205,65 @@ exit_idle:
  *
  * Called with polling cleared.
  */
-static void cpu_idle_loop(void)
+static void do_idle(void)
 {
-	int cpu = smp_processor_id();
-
-	while (1) {
-		/*
-		 * If the arch has a polling bit, we maintain an invariant:
-		 *
-		 * Our polling bit is clear if we're not scheduled (i.e. if
-		 * rq->curr != rq->idle).  This means that, if rq->idle has
-		 * the polling bit set, then setting need_resched is
-		 * guaranteed to cause the cpu to reschedule.
-		 */
-
-		__current_set_polling();
-		quiet_vmstat();
-		tick_nohz_idle_enter();
+	/*
+	 * If the arch has a polling bit, we maintain an invariant:
+	 *
+	 * Our polling bit is clear if we're not scheduled (i.e. if rq->curr !=
+	 * rq->idle). This means that, if rq->idle has the polling bit set,
+	 * then setting need_resched is guaranteed to cause the CPU to
+	 * reschedule.
+	 */
 
-		while (!need_resched()) {
-			check_pgt_cache();
-			rmb();
+	__current_set_polling();
+	tick_nohz_idle_enter();
 
-			if (cpu_is_offline(cpu)) {
-				cpuhp_report_idle_dead();
-				arch_cpu_idle_dead();
-			}
+	while (!need_resched()) {
+		check_pgt_cache();
+		rmb();
 
-			local_irq_disable();
-			arch_cpu_idle_enter();
-
-			/*
-			 * In poll mode we reenable interrupts and spin.
-			 *
-			 * Also if we detected in the wakeup from idle
-			 * path that the tick broadcast device expired
-			 * for us, we don't want to go deep idle as we
-			 * know that the IPI is going to arrive right
-			 * away
-			 */
-			if (cpu_idle_force_poll || tick_check_broadcast_expired())
-				cpu_idle_poll();
-			else
-				cpuidle_idle_call();
-
-			arch_cpu_idle_exit();
+		if (cpu_is_offline(smp_processor_id())) {
+			cpuhp_report_idle_dead();
+			arch_cpu_idle_dead();
 		}
 
-		/*
-		 * Since we fell out of the loop above, we know
-		 * TIF_NEED_RESCHED must be set, propagate it into
-		 * PREEMPT_NEED_RESCHED.
-		 *
-		 * This is required because for polling idle loops we will
-		 * not have had an IPI to fold the state for us.
-		 */
-		preempt_set_need_resched();
-		tick_nohz_idle_exit();
-		__current_clr_polling();
+		local_irq_disable();
+		arch_cpu_idle_enter();
 
 		/*
-		 * We promise to call sched_ttwu_pending and reschedule
-		 * if need_resched is set while polling is set.  That
-		 * means that clearing polling needs to be visible
-		 * before doing these things.
+		 * In poll mode we reenable interrupts and spin. Also if we
+		 * detected in the wakeup from idle path that the tick
+		 * broadcast device expired for us, we don't want to go deep
+		 * idle as we know that the IPI is going to arrive right away.
 		 */
-		smp_mb__after_atomic();
-
-		sched_ttwu_pending();
-		schedule_preempt_disabled();
+		if (cpu_idle_force_poll || tick_check_broadcast_expired())
+			cpu_idle_poll();
+		else
+			cpuidle_idle_call();
+		arch_cpu_idle_exit();
 	}
+
+	/*
+	 * Since we fell out of the loop above, we know TIF_NEED_RESCHED must
+	 * be set, propagate it into PREEMPT_NEED_RESCHED.
+	 *
+	 * This is required because for polling idle loops we will not have had
+	 * an IPI to fold the state for us.
+	 */
+	preempt_set_need_resched();
+	tick_nohz_idle_exit();
+	__current_clr_polling();
+
+	/*
+	 * We promise to call sched_ttwu_pending() and reschedule if
+	 * need_resched() is set while polling is set. That means that clearing
+	 * polling needs to be visible before doing these things.
+	 */
+	smp_mb__after_atomic();
+
+	sched_ttwu_pending();
+	schedule_preempt_disabled();
 }
 
 bool cpu_in_idle(unsigned long pc)
@@ -283,6 +272,56 @@ bool cpu_in_idle(unsigned long pc)
 		pc < (unsigned long)__cpuidle_text_end;
 }
 
+struct idle_timer {
+	struct hrtimer timer;
+	int done;
+};
+
+static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
+{
+	struct idle_timer *it = container_of(timer, struct idle_timer, timer);
+
+	WRITE_ONCE(it->done, 1);
+	set_tsk_need_resched(current);
+
+	return HRTIMER_NORESTART;
+}
+
+void play_idle(unsigned long duration_ms)
+{
+	struct idle_timer it;
+
+	/*
+	 * Only FIFO tasks can disable the tick since they don't need the forced
+	 * preemption.
+	 */
+	WARN_ON_ONCE(current->policy != SCHED_FIFO);
+	WARN_ON_ONCE(current->nr_cpus_allowed != 1);
+	WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
+	WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
+	WARN_ON_ONCE(!duration_ms);
+
+	rcu_sleep_check();
+	preempt_disable();
+	current->flags |= PF_IDLE;
+	cpuidle_use_deepest_state(true);
+
+	it.done = 0;
+	hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	it.timer.function = idle_inject_timer_fn;
+	hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED);
+
+	while (!READ_ONCE(it.done))
+		do_idle();
+
+	cpuidle_use_deepest_state(false);
+	current->flags &= ~PF_IDLE;
+
+	preempt_fold_need_resched();
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(play_idle);
+
 void cpu_startup_entry(enum cpuhp_state state)
 {
 	/*
@@ -302,5 +341,6 @@ void cpu_startup_entry(enum cpuhp_state state)
 #endif
 	arch_cpu_idle_prepare();
 	cpuhp_online_idle(state);
-	cpu_idle_loop();
+	while (1)
+		do_idle();
 }
-- 
cgit v1.2.3


From 027b25b26447aaf597c8b7729dd3b1fbebc6d5e8 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 21 Nov 2016 10:01:33 +0000
Subject: ACPI: Add FWNODE_ACPI_STATIC fwnode type

On systems booting with a device tree, every struct device is associated
with a struct device_node, that provides its DT firmware representation.
The device node can be used in generic kernel contexts (eg IRQ
translation, IOMMU streamid mapping), to retrieve the properties
associated with the device and carry out kernel operations accordingly.
Owing to the 1:1 relationship between the device and its device_node,
the device_node can also be used as a look-up token for the device (eg
looking up a device through its device_node), to retrieve the device in
kernel paths where the device_node is available.

On systems booting with ACPI, the same abstraction provided by
the device_node is required to provide look-up functionality.

The struct acpi_device, that represents firmware objects in the
ACPI namespace already includes a struct fwnode_handle of
type FWNODE_ACPI as their member; the same abstraction is missing
though for devices that are instantiated out of static ACPI tables
entries (eg ARM SMMU devices).

Add a new fwnode_handle type to associate devices created out
of static ACPI table entries to the respective firmware components
and create a simple ACPI core layer interface to dynamically allocate
and free the corresponding firmware nodes so that kernel subsystems
can use it to instantiate the nodes and associate them with the
respective devices.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Tomasz Nowicki <tn@semihalf.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Tomasz Nowicki <tn@semihalf.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/acpi.h   | 21 +++++++++++++++++++++
 include/linux/fwnode.h |  3 ++-
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 61a3d90f32b3..996a29cdaccd 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -56,6 +56,27 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
 	acpi_fwnode_handle(adev) : NULL)
 #define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
 
+static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
+{
+	struct fwnode_handle *fwnode;
+
+	fwnode = kzalloc(sizeof(struct fwnode_handle), GFP_KERNEL);
+	if (!fwnode)
+		return NULL;
+
+	fwnode->type = FWNODE_ACPI_STATIC;
+
+	return fwnode;
+}
+
+static inline void acpi_free_fwnode_static(struct fwnode_handle *fwnode)
+{
+	if (WARN_ON(!fwnode || fwnode->type != FWNODE_ACPI_STATIC))
+		return;
+
+	kfree(fwnode);
+}
+
 /**
  * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with
  * the PCI-defined class-code information
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 851671742790..8bd28ce6d76e 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -17,8 +17,9 @@ enum fwnode_type {
 	FWNODE_OF,
 	FWNODE_ACPI,
 	FWNODE_ACPI_DATA,
+	FWNODE_ACPI_STATIC,
 	FWNODE_PDATA,
-	FWNODE_IRQCHIP,
+	FWNODE_IRQCHIP
 };
 
 struct fwnode_handle {
-- 
cgit v1.2.3


From 34ceea275f626ae624b55f2b388a07f806988a55 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 21 Nov 2016 10:01:34 +0000
Subject: ACPI/IORT: Introduce linker section for IORT entries probing

Since commit e647b532275b ("ACPI: Add early device probing
infrastructure") the kernel has gained the infrastructure that allows
adding linker script section entries to execute ACPI driver callbacks
(ie probe routines) for all subsystems that register a table entry
in the respective kernel section (eg clocksource, irqchip).

Since ARM IOMMU devices data is described through IORT tables when
booting with ACPI, the ARM IOMMU drivers must be made able to hook ACPI
callback routines that are called to probe IORT entries and initialize
the respective IOMMU devices.

To avoid adding driver specific hooks into IORT table initialization
code (breaking therefore code modularity - ie ACPI IORT code must be made
aware of ARM SMMU drivers ACPI init callbacks), this patch adds code
that allows ARM SMMU drivers to take advantage of the ACPI early probing
infrastructure, so that they can add linker script section entries
containing drivers callback to be executed on IORT tables detection.

Since IORT nodes are differentiated by a type, the callback routines
can easily parse the IORT table entries, check the IORT nodes and
carry out some actions whenever the IORT node type associated with
the driver specific callback is matched.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Tomasz Nowicki <tn@semihalf.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Tomasz Nowicki <tn@semihalf.com>
Cc: Tomasz Nowicki <tn@semihalf.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/arm64/iort.c         | 13 ++++++++++---
 include/asm-generic/vmlinux.lds.h |  1 +
 include/linux/acpi_iort.h         |  3 +++
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 6b81746cd13c..2c46ebca01af 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -361,8 +361,15 @@ void __init acpi_iort_init(void)
 	acpi_status status;
 
 	status = acpi_get_table(ACPI_SIG_IORT, 0, &iort_table);
-	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
-		const char *msg = acpi_format_exception(status);
-		pr_err("Failed to get table, %s\n", msg);
+	if (ACPI_FAILURE(status)) {
+		if (status != AE_NOT_FOUND) {
+			const char *msg = acpi_format_exception(status);
+
+			pr_err("Failed to get table, %s\n", msg);
+		}
+
+		return;
 	}
+
+	acpi_probe_device_table(iort);
 }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 31e1d639abed..9e3aa34341f4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -566,6 +566,7 @@
 	IRQCHIP_OF_MATCH_TABLE()					\
 	ACPI_PROBE_TABLE(irqchip)					\
 	ACPI_PROBE_TABLE(clksrc)					\
+	ACPI_PROBE_TABLE(iort)						\
 	EARLYCON_TABLE()
 
 #define INIT_TEXT							\
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 0e32dac8fd03..d16fddaf230e 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -39,4 +39,7 @@ static inline struct irq_domain *iort_get_device_domain(struct device *dev,
 { return NULL; }
 #endif
 
+#define IORT_ACPI_DECLARE(name, table_id, fn)		\
+	ACPI_DECLARE_PROBE_ENTRY(iort, name, table_id, 0, NULL, 0, fn)
+
 #endif /* __ACPI_IORT_H__ */
-- 
cgit v1.2.3


From e4f10ffe4c9b500e545b874b816ffea5e8659b05 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 21 Nov 2016 10:01:36 +0000
Subject: iommu: Make of_iommu_set/get_ops() DT agnostic

The of_iommu_{set/get}_ops() API is used to associate a device
tree node with a specific set of IOMMU operations. The same
kernel interface is required on systems booting with ACPI, where
devices are not associated with a device tree node, therefore
the interface requires generalization.

The struct device fwnode member represents the fwnode token associated
with the device and the struct it points at is firmware specific;
regardless, it is initialized on both ACPI and DT systems and makes an
ideal candidate to use it to associate a set of IOMMU operations to a
given device, through its struct device.fwnode member pointer, paving
the way for representing per-device iommu_ops (ie an iommu instance
associated with a device).

Convert the DT specific of_iommu_{set/get}_ops() interface to
use struct device.fwnode as a look-up token, making the interface
usable on ACPI systems and rename the data structures and the
registration API so that they are made to represent their usage
more clearly.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Tomasz Nowicki <tn@semihalf.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Tomasz Nowicki <tn@semihalf.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/iommu.c    | 40 ++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/of_iommu.c | 39 ---------------------------------------
 include/linux/iommu.h    | 14 ++++++++++++++
 include/linux/of_iommu.h | 12 ++++++++++--
 4 files changed, 64 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9a2f1960873b..8d3e847d4845 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1615,6 +1615,46 @@ out:
 	return ret;
 }
 
+struct iommu_instance {
+	struct list_head list;
+	struct fwnode_handle *fwnode;
+	const struct iommu_ops *ops;
+};
+static LIST_HEAD(iommu_instance_list);
+static DEFINE_SPINLOCK(iommu_instance_lock);
+
+void iommu_register_instance(struct fwnode_handle *fwnode,
+			     const struct iommu_ops *ops)
+{
+	struct iommu_instance *iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+
+	if (WARN_ON(!iommu))
+		return;
+
+	of_node_get(to_of_node(fwnode));
+	INIT_LIST_HEAD(&iommu->list);
+	iommu->fwnode = fwnode;
+	iommu->ops = ops;
+	spin_lock(&iommu_instance_lock);
+	list_add_tail(&iommu->list, &iommu_instance_list);
+	spin_unlock(&iommu_instance_lock);
+}
+
+const struct iommu_ops *iommu_get_instance(struct fwnode_handle *fwnode)
+{
+	struct iommu_instance *instance;
+	const struct iommu_ops *ops = NULL;
+
+	spin_lock(&iommu_instance_lock);
+	list_for_each_entry(instance, &iommu_instance_list, list)
+		if (instance->fwnode == fwnode) {
+			ops = instance->ops;
+			break;
+		}
+	spin_unlock(&iommu_instance_lock);
+	return ops;
+}
+
 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
 		      const struct iommu_ops *ops)
 {
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 5b82862f571f..0f57ddc4ecc2 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -96,45 +96,6 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
 }
 EXPORT_SYMBOL_GPL(of_get_dma_window);
 
-struct of_iommu_node {
-	struct list_head list;
-	struct device_node *np;
-	const struct iommu_ops *ops;
-};
-static LIST_HEAD(of_iommu_list);
-static DEFINE_SPINLOCK(of_iommu_lock);
-
-void of_iommu_set_ops(struct device_node *np, const struct iommu_ops *ops)
-{
-	struct of_iommu_node *iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-
-	if (WARN_ON(!iommu))
-		return;
-
-	of_node_get(np);
-	INIT_LIST_HEAD(&iommu->list);
-	iommu->np = np;
-	iommu->ops = ops;
-	spin_lock(&of_iommu_lock);
-	list_add_tail(&iommu->list, &of_iommu_list);
-	spin_unlock(&of_iommu_lock);
-}
-
-const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
-{
-	struct of_iommu_node *node;
-	const struct iommu_ops *ops = NULL;
-
-	spin_lock(&of_iommu_lock);
-	list_for_each_entry(node, &of_iommu_list, list)
-		if (node->np == np) {
-			ops = node->ops;
-			break;
-		}
-	spin_unlock(&of_iommu_lock);
-	return ops;
-}
-
 static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
 {
 	struct of_phandle_args *iommu_spec = data;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 436dc21318af..f2960e4de344 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -351,6 +351,9 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
 		      const struct iommu_ops *ops);
 void iommu_fwspec_free(struct device *dev);
 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
+void iommu_register_instance(struct fwnode_handle *fwnode,
+			     const struct iommu_ops *ops);
+const struct iommu_ops *iommu_get_instance(struct fwnode_handle *fwnode);
 
 #else /* CONFIG_IOMMU_API */
 
@@ -580,6 +583,17 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids,
 	return -ENODEV;
 }
 
+static inline void iommu_register_instance(struct fwnode_handle *fwnode,
+					   const struct iommu_ops *ops)
+{
+}
+
+static inline
+const struct iommu_ops *iommu_get_instance(struct fwnode_handle *fwnode)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_IOMMU_API */
 
 #endif /* __LINUX_IOMMU_H */
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index e80b9c762a03..6a7fc5051099 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -31,8 +31,16 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 #endif	/* CONFIG_OF_IOMMU */
 
-void of_iommu_set_ops(struct device_node *np, const struct iommu_ops *ops);
-const struct iommu_ops *of_iommu_get_ops(struct device_node *np);
+static inline void of_iommu_set_ops(struct device_node *np,
+				    const struct iommu_ops *ops)
+{
+	iommu_register_instance(&np->fwnode, ops);
+}
+
+static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
+{
+	return iommu_get_instance(&np->fwnode);
+}
 
 extern struct of_device_id __iommu_of_table;
 
-- 
cgit v1.2.3


From d760a1baf20e067d3a063aa134834ddd3d183e2f Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 21 Nov 2016 10:01:39 +0000
Subject: ACPI: Implement acpi_dma_configure

On DT based systems, the of_dma_configure() API implements DMA
configuration for a given device. On ACPI systems an API equivalent to
of_dma_configure() is missing which implies that it is currently not
possible to set-up DMA operations for devices through the ACPI generic
kernel layer.

This patch fills the gap by introducing acpi_dma_configure/deconfigure()
calls that for now are just wrappers around arch_setup_dma_ops() and
arch_teardown_dma_ops() and also updates ACPI and PCI core code to use
the newly introduced acpi_dma_configure/acpi_dma_deconfigure functions.

Since acpi_dma_configure() is used to configure DMA operations, the
function initializes the dma/coherent_dma masks to sane default values
if the current masks are uninitialized (also to keep the default values
consistent with DT systems) to make sure the device has a complete
default DMA set-up.

The DMA range size passed to arch_setup_dma_ops() is sized according
to the device coherent_dma_mask (starting at address 0x0), mirroring the
DT probing path behaviour when a dma-ranges property is not provided
for the device being probed; this changes the current arch_setup_dma_ops()
call parameters in the ACPI probing case, but since arch_setup_dma_ops()
is a NOP on all architectures but ARM/ARM64 this patch does not change
the current kernel behaviour on them.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com> [pci]
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Tomasz Nowicki <tn@semihalf.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Tomasz Nowicki <tn@semihalf.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Tomasz Nowicki <tn@semihalf.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/glue.c     |  4 ++--
 drivers/acpi/scan.c     | 40 ++++++++++++++++++++++++++++++++++++++++
 drivers/pci/probe.c     |  3 +--
 include/acpi/acpi_bus.h |  2 ++
 include/linux/acpi.h    |  5 +++++
 5 files changed, 50 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 5ea5dc219f56..f8d65647ea79 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -227,8 +227,7 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev)
 
 	attr = acpi_get_dma_attr(acpi_dev);
 	if (attr != DEV_DMA_NOT_SUPPORTED)
-		arch_setup_dma_ops(dev, 0, 0, NULL,
-				   attr == DEV_DMA_COHERENT);
+		acpi_dma_configure(dev, attr);
 
 	acpi_physnode_link_name(physical_node_name, node_id);
 	retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
@@ -251,6 +250,7 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev)
 	return 0;
 
  err:
+	acpi_dma_deconfigure(dev);
 	ACPI_COMPANION_SET(dev, NULL);
 	put_device(dev);
 	put_device(&acpi_dev->dev);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 3d1856f1f4d0..45b5710a0404 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1370,6 +1370,46 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
 		return DEV_DMA_NON_COHERENT;
 }
 
+/**
+ * acpi_dma_configure - Set-up DMA configuration for the device.
+ * @dev: The pointer to the device
+ * @attr: device dma attributes
+ */
+void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
+{
+	/*
+	 * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
+	 * setup the correct supported mask.
+	 */
+	if (!dev->coherent_dma_mask)
+		dev->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	/*
+	 * Set it to coherent_dma_mask by default if the architecture
+	 * code has not set it.
+	 */
+	if (!dev->dma_mask)
+		dev->dma_mask = &dev->coherent_dma_mask;
+
+	/*
+	 * Assume dma valid range starts at 0 and covers the whole
+	 * coherent_dma_mask.
+	 */
+	arch_setup_dma_ops(dev, 0, dev->coherent_dma_mask + 1, NULL,
+			   attr == DEV_DMA_COHERENT);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_configure);
+
+/**
+ * acpi_dma_deconfigure - Tear-down DMA configuration for the device.
+ * @dev: The pointer to the device
+ */
+void acpi_dma_deconfigure(struct device *dev)
+{
+	arch_teardown_dma_ops(dev);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_deconfigure);
+
 static void acpi_init_coherency(struct acpi_device *adev)
 {
 	unsigned long long cca = 0;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ab002671fa60..c29e07ad5a7f 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1738,8 +1738,7 @@ static void pci_dma_configure(struct pci_dev *dev)
 		if (attr == DEV_DMA_NOT_SUPPORTED)
 			dev_warn(&dev->dev, "DMA not supported.\n");
 		else
-			arch_setup_dma_ops(&dev->dev, 0, 0, NULL,
-					   attr == DEV_DMA_COHERENT);
+			acpi_dma_configure(&dev->dev, attr);
 	}
 
 	pci_put_host_bridge_device(bridge);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c1a524de67c5..4242c31ffaee 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -573,6 +573,8 @@ struct acpi_pci_root {
 
 bool acpi_dma_supported(struct acpi_device *adev);
 enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
+void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr);
+void acpi_dma_deconfigure(struct device *dev);
 
 struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
 					   u64 address, bool check_children);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 996a29cdaccd..8d15fc59719f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -765,6 +765,11 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
 	return DEV_DMA_NOT_SUPPORTED;
 }
 
+static inline void acpi_dma_configure(struct device *dev,
+				      enum dev_dma_attr attr) { }
+
+static inline void acpi_dma_deconfigure(struct device *dev) { }
+
 #define ACPI_PTR(_ptr)	(NULL)
 
 static inline void acpi_device_set_enumerated(struct acpi_device *adev)
-- 
cgit v1.2.3


From bdca0c077fc5c2a7bb405281263270070c67f917 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 21 Nov 2016 10:01:40 +0000
Subject: ACPI/IORT: Add node match function

Device drivers (eg ARM SMMU) need to know if a specific component
is part of the IORT table, so that kernel data structures are not
initialized at initcalls time if the respective component is not
part of the IORT table.

To this end, this patch adds a trivial function that allows detecting
if a given IORT node type is present or not in the ACPI table, providing
an ACPI IORT equivalent for of_find_matching_node().

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Tomasz Nowicki <tn@semihalf.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Tomasz Nowicki <tn@semihalf.com>
Acked-by: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Tomasz Nowicki <tn@semihalf.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/arm64/iort.c | 15 +++++++++++++++
 include/linux/acpi_iort.h |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 1ac2720da96d..4bb6acbf881f 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -227,6 +227,21 @@ static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type,
 	return NULL;
 }
 
+static acpi_status
+iort_match_type_callback(struct acpi_iort_node *node, void *context)
+{
+	return AE_OK;
+}
+
+bool iort_node_match(u8 type)
+{
+	struct acpi_iort_node *node;
+
+	node = iort_scan_node(type, iort_match_type_callback, NULL);
+
+	return node != NULL;
+}
+
 static acpi_status iort_match_node_callback(struct acpi_iort_node *node,
 					    void *context)
 {
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index d16fddaf230e..17bb078073de 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -28,10 +28,12 @@ void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
 #ifdef CONFIG_ACPI_IORT
 void acpi_iort_init(void);
+bool iort_node_match(u8 type);
 u32 iort_msi_map_rid(struct device *dev, u32 req_id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
 #else
 static inline void acpi_iort_init(void) { }
+static inline bool iort_node_match(u8 type) { return false; }
 static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 { return req_id; }
 static inline struct irq_domain *iort_get_device_domain(struct device *dev,
-- 
cgit v1.2.3


From d6fcd3b149f3eab3b94cc107ca846bea8461cc2f Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 21 Nov 2016 10:01:45 +0000
Subject: iommu/arm-smmu: Add IORT configuration

In ACPI based systems, in order to be able to create platform
devices and initialize them for ARM SMMU components, the IORT
kernel implementation requires a set of static functions to be
used by the IORT kernel layer to configure platform devices for
ARM SMMU components.

Add static configuration functions to the IORT kernel layer for
the ARM SMMU components, so that the ARM SMMU driver can
initialize its respective platform device by relying on the IORT
kernel infrastructure and by adding a corresponding ACPI device
early probe section entry.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Tomasz Nowicki <tn@semihalf.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Tomasz Nowicki <tn@semihalf.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/arm64/iort.c | 71 +++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/arm-smmu.c  | 77 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/acpi_iort.h |  3 ++
 3 files changed, 150 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index fd52e4c05a26..8a8ae5ed05d5 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -548,6 +548,68 @@ static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
 	return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
 }
 
+static int __init arm_smmu_count_resources(struct acpi_iort_node *node)
+{
+	struct acpi_iort_smmu *smmu;
+
+	/* Retrieve SMMU specific data */
+	smmu = (struct acpi_iort_smmu *)node->node_data;
+
+	/*
+	 * Only consider the global fault interrupt and ignore the
+	 * configuration access interrupt.
+	 *
+	 * MMIO address and global fault interrupt resources are always
+	 * present so add them to the context interrupt count as a static
+	 * value.
+	 */
+	return smmu->context_interrupt_count + 2;
+}
+
+static void __init arm_smmu_init_resources(struct resource *res,
+					   struct acpi_iort_node *node)
+{
+	struct acpi_iort_smmu *smmu;
+	int i, hw_irq, trigger, num_res = 0;
+	u64 *ctx_irq, *glb_irq;
+
+	/* Retrieve SMMU specific data */
+	smmu = (struct acpi_iort_smmu *)node->node_data;
+
+	res[num_res].start = smmu->base_address;
+	res[num_res].end = smmu->base_address + smmu->span - 1;
+	res[num_res].flags = IORESOURCE_MEM;
+	num_res++;
+
+	glb_irq = ACPI_ADD_PTR(u64, node, smmu->global_interrupt_offset);
+	/* Global IRQs */
+	hw_irq = IORT_IRQ_MASK(glb_irq[0]);
+	trigger = IORT_IRQ_TRIGGER_MASK(glb_irq[0]);
+
+	acpi_iort_register_irq(hw_irq, "arm-smmu-global", trigger,
+				     &res[num_res++]);
+
+	/* Context IRQs */
+	ctx_irq = ACPI_ADD_PTR(u64, node, smmu->context_interrupt_offset);
+	for (i = 0; i < smmu->context_interrupt_count; i++) {
+		hw_irq = IORT_IRQ_MASK(ctx_irq[i]);
+		trigger = IORT_IRQ_TRIGGER_MASK(ctx_irq[i]);
+
+		acpi_iort_register_irq(hw_irq, "arm-smmu-context", trigger,
+				       &res[num_res++]);
+	}
+}
+
+static bool __init arm_smmu_is_coherent(struct acpi_iort_node *node)
+{
+	struct acpi_iort_smmu *smmu;
+
+	/* Retrieve SMMU specific data */
+	smmu = (struct acpi_iort_smmu *)node->node_data;
+
+	return smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK;
+}
+
 struct iort_iommu_config {
 	const char *name;
 	int (*iommu_init)(struct acpi_iort_node *node);
@@ -564,12 +626,21 @@ static const struct iort_iommu_config iort_arm_smmu_v3_cfg __initconst = {
 	.iommu_init_resources = arm_smmu_v3_init_resources
 };
 
+static const struct iort_iommu_config iort_arm_smmu_cfg __initconst = {
+	.name = "arm-smmu",
+	.iommu_is_coherent = arm_smmu_is_coherent,
+	.iommu_count_resources = arm_smmu_count_resources,
+	.iommu_init_resources = arm_smmu_init_resources
+};
+
 static __init
 const struct iort_iommu_config *iort_get_iommu_cfg(struct acpi_iort_node *node)
 {
 	switch (node->type) {
 	case ACPI_IORT_NODE_SMMU_V3:
 		return &iort_arm_smmu_v3_cfg;
+	case ACPI_IORT_NODE_SMMU:
+		return &iort_arm_smmu_cfg;
 	default:
 		return NULL;
 	}
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index e068c6f4d1d6..41b67ce999bf 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -28,6 +28,8 @@
 
 #define pr_fmt(fmt) "arm-smmu: " fmt
 
+#include <linux/acpi.h>
+#include <linux/acpi_iort.h>
 #include <linux/atomic.h>
 #include <linux/delay.h>
 #include <linux/dma-iommu.h>
@@ -1911,6 +1913,64 @@ static const struct of_device_id arm_smmu_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
 
+#ifdef CONFIG_ACPI
+static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
+{
+	int ret = 0;
+
+	switch (model) {
+	case ACPI_IORT_SMMU_V1:
+	case ACPI_IORT_SMMU_CORELINK_MMU400:
+		smmu->version = ARM_SMMU_V1;
+		smmu->model = GENERIC_SMMU;
+		break;
+	case ACPI_IORT_SMMU_V2:
+		smmu->version = ARM_SMMU_V2;
+		smmu->model = GENERIC_SMMU;
+		break;
+	case ACPI_IORT_SMMU_CORELINK_MMU500:
+		smmu->version = ARM_SMMU_V2;
+		smmu->model = ARM_MMU500;
+		break;
+	default:
+		ret = -ENODEV;
+	}
+
+	return ret;
+}
+
+static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
+				      struct arm_smmu_device *smmu)
+{
+	struct device *dev = smmu->dev;
+	struct acpi_iort_node *node =
+		*(struct acpi_iort_node **)dev_get_platdata(dev);
+	struct acpi_iort_smmu *iort_smmu;
+	int ret;
+
+	/* Retrieve SMMU1/2 specific data */
+	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
+
+	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
+	if (ret < 0)
+		return ret;
+
+	/* Ignore the configuration access interrupt */
+	smmu->num_global_irqs = 1;
+
+	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
+		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
+
+	return 0;
+}
+#else
+static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
+					     struct arm_smmu_device *smmu)
+{
+	return -ENODEV;
+}
+#endif
+
 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
 				    struct arm_smmu_device *smmu)
 {
@@ -1962,7 +2022,11 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	}
 	smmu->dev = dev;
 
-	err = arm_smmu_device_dt_probe(pdev, smmu);
+	if (dev->of_node)
+		err = arm_smmu_device_dt_probe(pdev, smmu);
+	else
+		err = arm_smmu_device_acpi_probe(pdev, smmu);
+
 	if (err)
 		return err;
 
@@ -2110,6 +2174,17 @@ IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", arm_smmu_of_init);
 IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", arm_smmu_of_init);
 IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", arm_smmu_of_init);
 
+#ifdef CONFIG_ACPI
+static int __init arm_smmu_acpi_init(struct acpi_table_header *table)
+{
+	if (iort_node_match(ACPI_IORT_NODE_SMMU))
+		return arm_smmu_init();
+
+	return 0;
+}
+IORT_ACPI_DECLARE(arm_smmu, ACPI_SIG_IORT, arm_smmu_acpi_init);
+#endif
+
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 17bb078073de..79ba1bb50950 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -23,6 +23,9 @@
 #include <linux/fwnode.h>
 #include <linux/irqdomain.h>
 
+#define IORT_IRQ_MASK(irq)		(irq & 0xffffffffULL)
+#define IORT_IRQ_TRIGGER_MASK(irq)	((irq >> 32) & 0xffffffffULL)
+
 int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node);
 void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
-- 
cgit v1.2.3


From 643b8e4d86f8b1a62cf5cd9ea221e9bc0d531d18 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 21 Nov 2016 10:01:48 +0000
Subject: ACPI/IORT: Introduce iort_iommu_configure

DT based systems have a generic kernel API to configure IOMMUs
for devices (ie of_iommu_configure()).

On ARM based ACPI systems, the of_iommu_configure() equivalent can
be implemented atop ACPI IORT kernel API, with the corresponding
functions to map device identifiers to IOMMUs and retrieve the
corresponding IOMMU operations necessary for DMA operations set-up.

By relying on the iommu_fwspec generic kernel infrastructure,
implement the IORT based IOMMU configuration for ARM ACPI systems
and hook it up in the ACPI kernel layer that implements DMA
configuration for a device.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> [ACPI core]
Reviewed-by: Tomasz Nowicki <tn@semihalf.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Tomasz Nowicki <tn@semihalf.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Tomasz Nowicki <tn@semihalf.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/arm64/iort.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/scan.c       |  7 +++-
 include/linux/acpi_iort.h |  6 +++
 3 files changed, 110 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 6aae49c35a95..47bace8eafb6 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -28,6 +28,8 @@
 
 #define IORT_TYPE_MASK(type)	(1 << (type))
 #define IORT_MSI_TYPE		(1 << ACPI_IORT_NODE_ITS_GROUP)
+#define IORT_IOMMU_TYPE		((1 << ACPI_IORT_NODE_SMMU) |	\
+				(1 << ACPI_IORT_NODE_SMMU_V3))
 
 struct iort_its_msi_chip {
 	struct list_head	list;
@@ -501,6 +503,102 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id)
 	return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
 }
 
+static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
+{
+	u32 *rid = data;
+
+	*rid = alias;
+	return 0;
+}
+
+static int arm_smmu_iort_xlate(struct device *dev, u32 streamid,
+			       struct fwnode_handle *fwnode,
+			       const struct iommu_ops *ops)
+{
+	int ret = iommu_fwspec_init(dev, fwnode, ops);
+
+	if (!ret)
+		ret = iommu_fwspec_add_ids(dev, &streamid, 1);
+
+	return ret;
+}
+
+static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
+					struct acpi_iort_node *node,
+					u32 streamid)
+{
+	const struct iommu_ops *ops = NULL;
+	int ret = -ENODEV;
+	struct fwnode_handle *iort_fwnode;
+
+	if (node) {
+		iort_fwnode = iort_get_fwnode(node);
+		if (!iort_fwnode)
+			return NULL;
+
+		ops = iommu_get_instance(iort_fwnode);
+		if (!ops)
+			return NULL;
+
+		ret = arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops);
+	}
+
+	return ret ? NULL : ops;
+}
+
+/**
+ * iort_iommu_configure - Set-up IOMMU configuration for a device.
+ *
+ * @dev: device to configure
+ *
+ * Returns: iommu_ops pointer on configuration success
+ *          NULL on configuration failure
+ */
+const struct iommu_ops *iort_iommu_configure(struct device *dev)
+{
+	struct acpi_iort_node *node, *parent;
+	const struct iommu_ops *ops = NULL;
+	u32 streamid = 0;
+
+	if (dev_is_pci(dev)) {
+		struct pci_bus *bus = to_pci_dev(dev)->bus;
+		u32 rid;
+
+		pci_for_each_dma_alias(to_pci_dev(dev), __get_pci_rid,
+				       &rid);
+
+		node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
+				      iort_match_node_callback, &bus->dev);
+		if (!node)
+			return NULL;
+
+		parent = iort_node_map_rid(node, rid, &streamid,
+					   IORT_IOMMU_TYPE);
+
+		ops = iort_iommu_xlate(dev, parent, streamid);
+
+	} else {
+		int i = 0;
+
+		node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
+				      iort_match_node_callback, dev);
+		if (!node)
+			return NULL;
+
+		parent = iort_node_get_id(node, &streamid,
+					  IORT_IOMMU_TYPE, i++);
+
+		while (parent) {
+			ops = iort_iommu_xlate(dev, parent, streamid);
+
+			parent = iort_node_get_id(node, &streamid,
+						  IORT_IOMMU_TYPE, i++);
+		}
+	}
+
+	return ops;
+}
+
 static void __init acpi_iort_register_irq(int hwirq, const char *name,
 					  int trigger,
 					  struct resource *res)
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 45b5710a0404..80698d3c5feb 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/acpi.h>
+#include <linux/acpi_iort.h>
 #include <linux/signal.h>
 #include <linux/kthread.h>
 #include <linux/dmi.h>
@@ -1377,6 +1378,8 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
  */
 void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
 {
+	const struct iommu_ops *iommu;
+
 	/*
 	 * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
 	 * setup the correct supported mask.
@@ -1391,11 +1394,13 @@ void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
 	if (!dev->dma_mask)
 		dev->dma_mask = &dev->coherent_dma_mask;
 
+	iommu = iort_iommu_configure(dev);
+
 	/*
 	 * Assume dma valid range starts at 0 and covers the whole
 	 * coherent_dma_mask.
 	 */
-	arch_setup_dma_ops(dev, 0, dev->coherent_dma_mask + 1, NULL,
+	arch_setup_dma_ops(dev, 0, dev->coherent_dma_mask + 1, iommu,
 			   attr == DEV_DMA_COHERENT);
 }
 EXPORT_SYMBOL_GPL(acpi_dma_configure);
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 79ba1bb50950..dcb2b601e152 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -34,6 +34,8 @@ void acpi_iort_init(void);
 bool iort_node_match(u8 type);
 u32 iort_msi_map_rid(struct device *dev, u32 req_id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
+/* IOMMU interface */
+const struct iommu_ops *iort_iommu_configure(struct device *dev);
 #else
 static inline void acpi_iort_init(void) { }
 static inline bool iort_node_match(u8 type) { return false; }
@@ -42,6 +44,10 @@ static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 static inline struct irq_domain *iort_get_device_domain(struct device *dev,
 							u32 req_id)
 { return NULL; }
+/* IOMMU interface */
+static inline
+const struct iommu_ops *iort_iommu_configure(struct device *dev)
+{ return NULL; }
 #endif
 
 #define IORT_ACPI_DECLARE(name, table_id, fn)		\
-- 
cgit v1.2.3


From ba58d1020a54933c6b087a3107661c8513556cb8 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Mon, 28 Nov 2016 14:35:19 -0800
Subject: timekeeping: Ignore the bogus sleep time if pm_trace is enabled

Power management suspend/resume tracing (ab)uses the RTC to store
suspend/resume information persistently. As a consequence the RTC value is
clobbered when timekeeping is resumed and tries to inject the sleep time.

Commit a4f8f6667f09 ("timekeeping: Cap array access in timekeeping_debug")
plugged a out of bounds array access in the timekeeping debug code which
was caused by the clobbered RTC value, but we still use the clobbered RTC
value for sleep time injection into kernel timekeeping, which will result
in random adjustments depending on the stored "hash" value.

To prevent this keep track of the RTC clobbering and ignore the invalid RTC
timestamp at resume. If the system resumed successfully clear the flag,
which marks the RTC as unusable, warn the user about the RTC clobber and
recommend to adjust the RTC with 'ntpdate' or 'rdate'.

[jstultz: Fixed up pr_warn formating, and implemented suggestions from Ingo]
[ tglx: Rewrote changelog ]

Originally-from: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Xunlei Pang <xlpang@redhat.com>
Cc: Len Brown <lenb@kernel.org>
Link: http://lkml.kernel.org/r/1480372524-15181-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/rtc.c       |  9 +++++++++
 drivers/base/power/trace.c  | 27 +++++++++++++++++++++++++++
 drivers/rtc/rtc-cmos.c      |  7 +++++++
 include/linux/mc146818rtc.h |  1 +
 include/linux/pm-trace.h    |  9 ++++++++-
 5 files changed, 52 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 79c6311cd912..5b21cb7d84d6 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -64,6 +64,15 @@ void mach_get_cmos_time(struct timespec *now)
 	unsigned int status, year, mon, day, hour, min, sec, century = 0;
 	unsigned long flags;
 
+	/*
+	 * If pm_trace abused the RTC as storage, set the timespec to 0,
+	 * which tells the caller that this RTC value is unusable.
+	 */
+	if (!pm_trace_rtc_valid()) {
+		now->tv_sec = now->tv_nsec = 0;
+		return;
+	}
+
 	spin_lock_irqsave(&rtc_lock, flags);
 
 	/*
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index efec10b49d59..1cda505d6a85 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -10,6 +10,7 @@
 #include <linux/pm-trace.h>
 #include <linux/export.h>
 #include <linux/rtc.h>
+#include <linux/suspend.h>
 
 #include <linux/mc146818rtc.h>
 
@@ -74,6 +75,9 @@
 
 #define DEVSEED (7919)
 
+bool pm_trace_rtc_abused __read_mostly;
+EXPORT_SYMBOL_GPL(pm_trace_rtc_abused);
+
 static unsigned int dev_hash_value;
 
 static int set_magic_time(unsigned int user, unsigned int file, unsigned int device)
@@ -104,6 +108,7 @@ static int set_magic_time(unsigned int user, unsigned int file, unsigned int dev
 	time.tm_min = (n % 20) * 3;
 	n /= 20;
 	mc146818_set_time(&time);
+	pm_trace_rtc_abused = true;
 	return n ? -1 : 0;
 }
 
@@ -239,9 +244,31 @@ int show_trace_dev_match(char *buf, size_t size)
 	return ret;
 }
 
+static int
+pm_trace_notify(struct notifier_block *nb, unsigned long mode, void *_unused)
+{
+	switch (mode) {
+	case PM_POST_HIBERNATION:
+	case PM_POST_SUSPEND:
+		if (pm_trace_rtc_abused) {
+			pm_trace_rtc_abused = false;
+			pr_warn("Possible incorrect RTC due to pm_trace, please use 'ntpdate' or 'rdate' to reset it.\n");
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static struct notifier_block pm_trace_nb = {
+	.notifier_call = pm_trace_notify,
+};
+
 static int early_resume_init(void)
 {
 	hash_value_early_read = read_magic_time();
+	register_pm_notifier(&pm_trace_nb);
 	return 0;
 }
 
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index dd3d59806ffa..b27a18968a1a 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -191,6 +191,13 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
 
 static int cmos_read_time(struct device *dev, struct rtc_time *t)
 {
+	/*
+	 * If pm_trace abused the RTC for storage, set the timespec to 0,
+	 * which tells the caller that this RTC value is unusable.
+	 */
+	if (!pm_trace_rtc_valid())
+		return -EIO;
+
 	/* REVISIT:  if the clock has a "century" register, use
 	 * that instead of the heuristic in mc146818_get_time().
 	 * That'll make Y3K compatility (year > 2070) easy!
diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index a585b4b5fa0e..0661af17a758 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -16,6 +16,7 @@
 #include <asm/mc146818rtc.h>		/* register access macros */
 #include <linux/bcd.h>
 #include <linux/delay.h>
+#include <linux/pm-trace.h>
 
 #ifdef __KERNEL__
 #include <linux/spinlock.h>		/* spinlock_t */
diff --git a/include/linux/pm-trace.h b/include/linux/pm-trace.h
index ecbde7a5548e..7b78793f07d7 100644
--- a/include/linux/pm-trace.h
+++ b/include/linux/pm-trace.h
@@ -1,11 +1,17 @@
 #ifndef PM_TRACE_H
 #define PM_TRACE_H
 
+#include <linux/types.h>
 #ifdef CONFIG_PM_TRACE
 #include <asm/pm-trace.h>
-#include <linux/types.h>
 
 extern int pm_trace_enabled;
+extern bool pm_trace_rtc_abused;
+
+static inline bool pm_trace_rtc_valid(void)
+{
+	return !pm_trace_rtc_abused;
+}
 
 static inline int pm_trace_is_enabled(void)
 {
@@ -24,6 +30,7 @@ extern int show_trace_dev_match(char *buf, size_t size);
 
 #else
 
+static inline bool pm_trace_rtc_valid(void) { return true; }
 static inline int pm_trace_is_enabled(void) { return 0; }
 
 #define TRACE_DEVICE(dev) do { } while (0)
-- 
cgit v1.2.3


From ec4101e8903e318b9fd4e3bbf72b1eaba53c64e1 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@mellanox.com>
Date: Mon, 28 Nov 2016 14:35:20 -0800
Subject: timekeeping/clocksource_cyc2ns: Document intended range limitation

The "cycles" argument should not be an absolute clocksource cycle
value, as the implementation's arithmetic will overflow relatively
easily with wide (64 bit) clocksource counters.

For performance, the implementation is simple and fast, since the
function is intended for only relatively small delta values of
clocksource cycles.

[jstultz: Fixed up to merge against HEAD & commit message tweaks,
 also included rewording suggestion by Ingo]
Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Link: http://lkml.kernel.org/r/1480372524-15181-4-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 08398182f56e..65602d395a52 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -169,7 +169,10 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
  * @mult:	cycle to nanosecond multiplier
  * @shift:	cycle to nanosecond divisor (power of two)
  *
- * Converts cycles to nanoseconds, using the given mult and shift.
+ * Converts clocksource cycles to nanoseconds, using the given @mult and @shift.
+ * The code is optimized for performance and is not intended to work
+ * with absolute clocksource cycles (as those will easily overflow),
+ * but is only intended to be used with relative (delta) clocksource cycles.
  *
  * XXX - This could use some mult_lxl_ll() asm optimization
  */
-- 
cgit v1.2.3


From 948a5312f41658f7b76a598a139ef1f4dea09ca9 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Mon, 28 Nov 2016 14:35:22 -0800
Subject: timekeeping: Add a fast and NMI safe boot clock

This boot clock can be used as a tracing clock and will account for
suspend time.

To keep it NMI safe since we're accessing from tracing, we're not using a
separate timekeeper with updates to monotonic clock and boot offset
protected with seqlocks. This has the following minor side effects:

(1) Its possible that a timestamp be taken after the boot offset is updated
but before the timekeeper is updated. If this happens, the new boot offset
is added to the old timekeeping making the clock appear to update slightly
earlier:
   CPU 0                                        CPU 1
   timekeeping_inject_sleeptime64()
   __timekeeping_inject_sleeptime(tk, delta);
                                                timestamp();
   timekeeping_update(tk, TK_CLEAR_NTP...);

(2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
partially updated.  Since the tk->offs_boot update is a rare event, this
should be a rare occurrence which postprocessing should be able to handle.

Signed-off-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1480372524-15181-6-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timekeeping.h |  1 +
 kernel/time/timekeeping.c   | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 09168c52ab64..361f8bf1429d 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -249,6 +249,7 @@ static inline u64 ktime_get_raw_ns(void)
 
 extern u64 ktime_get_mono_fast_ns(void);
 extern u64 ktime_get_raw_fast_ns(void);
+extern u64 ktime_get_boot_fast_ns(void);
 
 /*
  * Timespec interfaces utilizing the ktime based ones
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 37dec7e3db43..b2286e94c934 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -425,6 +425,35 @@ u64 ktime_get_raw_fast_ns(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
 
+/**
+ * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
+ *
+ * To keep it NMI safe since we're accessing from tracing, we're not using a
+ * separate timekeeper with updates to monotonic clock and boot offset
+ * protected with seqlocks. This has the following minor side effects:
+ *
+ * (1) Its possible that a timestamp be taken after the boot offset is updated
+ * but before the timekeeper is updated. If this happens, the new boot offset
+ * is added to the old timekeeping making the clock appear to update slightly
+ * earlier:
+ *    CPU 0                                        CPU 1
+ *    timekeeping_inject_sleeptime64()
+ *    __timekeeping_inject_sleeptime(tk, delta);
+ *                                                 timestamp();
+ *    timekeeping_update(tk, TK_CLEAR_NTP...);
+ *
+ * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
+ * partially updated.  Since the tk->offs_boot update is a rare event, this
+ * should be a rare occurrence which postprocessing should be able to handle.
+ */
+u64 notrace ktime_get_boot_fast_ns(void)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+
+	return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
+}
+EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
+
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
 
-- 
cgit v1.2.3


From 3dc87dd048dc442bab633e85bfb96c893612d765 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Mon, 28 Nov 2016 22:38:53 +0100
Subject: nvme: lightnvm: attach lightnvm sysfs to nvme block device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, LBA read and write were not supported in the lightnvm
specification. Now that it supports it, lets use the traditional
NVMe gendisk, and attach the lightnvm sysfs geometry export.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/Makefile    |   2 +-
 drivers/lightnvm/core.c      |  15 +---
 drivers/lightnvm/lightnvm.h  |  35 --------
 drivers/lightnvm/sysfs.c     | 198 -------------------------------------------
 drivers/nvme/host/core.c     |  43 +++++-----
 drivers/nvme/host/lightnvm.c | 175 +++++++++++++++++++++++++++++++++++---
 drivers/nvme/host/nvme.h     |  25 +++---
 include/linux/lightnvm.h     |   2 -
 8 files changed, 197 insertions(+), 298 deletions(-)
 delete mode 100644 drivers/lightnvm/lightnvm.h
 delete mode 100644 drivers/lightnvm/sysfs.c

(limited to 'include/linux')

diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
index 1f6b6521016a..a7a0a22cf1a5 100644
--- a/drivers/lightnvm/Makefile
+++ b/drivers/lightnvm/Makefile
@@ -2,6 +2,6 @@
 # Makefile for Open-Channel SSDs.
 #
 
-obj-$(CONFIG_NVM)		:= core.o sysblk.o sysfs.o
+obj-$(CONFIG_NVM)		:= core.o sysblk.o
 obj-$(CONFIG_NVM_GENNVM) 	+= gennvm.o
 obj-$(CONFIG_NVM_RRPC)		+= rrpc.o
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 1cac0f8bc0dc..11117404b5f5 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -27,8 +27,6 @@
 #include <linux/lightnvm.h>
 #include <linux/sched/sysctl.h>
 
-#include "lightnvm.h"
-
 static LIST_HEAD(nvm_tgt_types);
 static DECLARE_RWSEM(nvm_tgtt_lock);
 static LIST_HEAD(nvm_mgrs);
@@ -657,11 +655,6 @@ err:
 	return ret;
 }
 
-static void nvm_exit(struct nvm_dev *dev)
-{
-	nvm_sysfs_unregister_dev(dev);
-}
-
 struct nvm_dev *nvm_alloc_dev(int node)
 {
 	return kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node);
@@ -691,10 +684,6 @@ int nvm_register(struct nvm_dev *dev)
 		}
 	}
 
-	ret = nvm_sysfs_register_dev(dev);
-	if (ret)
-		goto err_ppalist;
-
 	if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
 		ret = nvm_get_sysblock(dev, &dev->sb);
 		if (!ret)
@@ -711,8 +700,6 @@ int nvm_register(struct nvm_dev *dev)
 	up_write(&nvm_lock);
 
 	return 0;
-err_ppalist:
-	dev->ops->destroy_dma_pool(dev->dma_pool);
 err_init:
 	kfree(dev->lun_map);
 	return ret;
@@ -725,7 +712,7 @@ void nvm_unregister(struct nvm_dev *dev)
 	list_del(&dev->devices);
 	up_write(&nvm_lock);
 
-	nvm_exit(dev);
+	nvm_free(dev);
 }
 EXPORT_SYMBOL(nvm_unregister);
 
diff --git a/drivers/lightnvm/lightnvm.h b/drivers/lightnvm/lightnvm.h
deleted file mode 100644
index 305c181509a6..000000000000
--- a/drivers/lightnvm/lightnvm.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2016 CNEX Labs. All rights reserved.
- * Initial release: Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; see the file COPYING.  If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- *
- */
-
-#ifndef LIGHTNVM_H
-#define LIGHTNVM_H
-
-#include <linux/lightnvm.h>
-
-/* core -> sysfs.c */
-int __must_check nvm_sysfs_register_dev(struct nvm_dev *);
-void nvm_sysfs_unregister_dev(struct nvm_dev *);
-int nvm_sysfs_register(void);
-void nvm_sysfs_unregister(void);
-
-/* sysfs > core */
-void nvm_free(struct nvm_dev *);
-
-#endif
diff --git a/drivers/lightnvm/sysfs.c b/drivers/lightnvm/sysfs.c
deleted file mode 100644
index 0338c27ab95a..000000000000
--- a/drivers/lightnvm/sysfs.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/lightnvm.h>
-#include <linux/miscdevice.h>
-#include <linux/kobject.h>
-#include <linux/blk-mq.h>
-
-#include "lightnvm.h"
-
-static ssize_t nvm_dev_attr_show(struct device *dev,
-				 struct device_attribute *dattr, char *page)
-{
-	struct nvm_dev *ndev = container_of(dev, struct nvm_dev, dev);
-	struct nvm_id *id = &ndev->identity;
-	struct nvm_id_group *grp = &id->groups[0];
-	struct attribute *attr = &dattr->attr;
-
-	if (strcmp(attr->name, "version") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id);
-	} else if (strcmp(attr->name, "vendor_opcode") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt);
-	} else if (strcmp(attr->name, "capabilities") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", id->cap);
-	} else if (strcmp(attr->name, "device_mode") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", id->dom);
-	} else if (strcmp(attr->name, "media_manager") == 0) {
-		if (!ndev->mt)
-			return scnprintf(page, PAGE_SIZE, "%s\n", "none");
-		return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name);
-	} else if (strcmp(attr->name, "ppa_format") == 0) {
-		return scnprintf(page, PAGE_SIZE,
-			"0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
-			id->ppaf.ch_offset, id->ppaf.ch_len,
-			id->ppaf.lun_offset, id->ppaf.lun_len,
-			id->ppaf.pln_offset, id->ppaf.pln_len,
-			id->ppaf.blk_offset, id->ppaf.blk_len,
-			id->ppaf.pg_offset, id->ppaf.pg_len,
-			id->ppaf.sect_offset, id->ppaf.sect_len);
-	} else if (strcmp(attr->name, "media_type") == 0) {	/* u8 */
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype);
-	} else if (strcmp(attr->name, "flash_media_type") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype);
-	} else if (strcmp(attr->name, "num_channels") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch);
-	} else if (strcmp(attr->name, "num_luns") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun);
-	} else if (strcmp(attr->name, "num_planes") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln);
-	} else if (strcmp(attr->name, "num_blocks") == 0) {	/* u16 */
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_blk);
-	} else if (strcmp(attr->name, "num_pages") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg);
-	} else if (strcmp(attr->name, "page_size") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz);
-	} else if (strcmp(attr->name, "hw_sector_size") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs);
-	} else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos);
-	} else if (strcmp(attr->name, "read_typ") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt);
-	} else if (strcmp(attr->name, "read_max") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm);
-	} else if (strcmp(attr->name, "prog_typ") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt);
-	} else if (strcmp(attr->name, "prog_max") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm);
-	} else if (strcmp(attr->name, "erase_typ") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet);
-	} else if (strcmp(attr->name, "erase_max") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem);
-	} else if (strcmp(attr->name, "multiplane_modes") == 0) {
-		return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos);
-	} else if (strcmp(attr->name, "media_capabilities") == 0) {
-		return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap);
-	} else if (strcmp(attr->name, "max_phys_secs") == 0) {
-		return scnprintf(page, PAGE_SIZE, "%u\n",
-				ndev->ops->max_phys_sect);
-	} else {
-		return scnprintf(page,
-				 PAGE_SIZE,
-				 "Unhandled attr(%s) in `nvm_dev_attr_show`\n",
-				 attr->name);
-	}
-}
-
-#define NVM_DEV_ATTR_RO(_name)						\
-	DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
-
-static NVM_DEV_ATTR_RO(version);
-static NVM_DEV_ATTR_RO(vendor_opcode);
-static NVM_DEV_ATTR_RO(capabilities);
-static NVM_DEV_ATTR_RO(device_mode);
-static NVM_DEV_ATTR_RO(ppa_format);
-static NVM_DEV_ATTR_RO(media_manager);
-
-static NVM_DEV_ATTR_RO(media_type);
-static NVM_DEV_ATTR_RO(flash_media_type);
-static NVM_DEV_ATTR_RO(num_channels);
-static NVM_DEV_ATTR_RO(num_luns);
-static NVM_DEV_ATTR_RO(num_planes);
-static NVM_DEV_ATTR_RO(num_blocks);
-static NVM_DEV_ATTR_RO(num_pages);
-static NVM_DEV_ATTR_RO(page_size);
-static NVM_DEV_ATTR_RO(hw_sector_size);
-static NVM_DEV_ATTR_RO(oob_sector_size);
-static NVM_DEV_ATTR_RO(read_typ);
-static NVM_DEV_ATTR_RO(read_max);
-static NVM_DEV_ATTR_RO(prog_typ);
-static NVM_DEV_ATTR_RO(prog_max);
-static NVM_DEV_ATTR_RO(erase_typ);
-static NVM_DEV_ATTR_RO(erase_max);
-static NVM_DEV_ATTR_RO(multiplane_modes);
-static NVM_DEV_ATTR_RO(media_capabilities);
-static NVM_DEV_ATTR_RO(max_phys_secs);
-
-#define NVM_DEV_ATTR(_name) (dev_attr_##_name##)
-
-static struct attribute *nvm_dev_attrs[] = {
-	&dev_attr_version.attr,
-	&dev_attr_vendor_opcode.attr,
-	&dev_attr_capabilities.attr,
-	&dev_attr_device_mode.attr,
-	&dev_attr_media_manager.attr,
-
-	&dev_attr_ppa_format.attr,
-	&dev_attr_media_type.attr,
-	&dev_attr_flash_media_type.attr,
-	&dev_attr_num_channels.attr,
-	&dev_attr_num_luns.attr,
-	&dev_attr_num_planes.attr,
-	&dev_attr_num_blocks.attr,
-	&dev_attr_num_pages.attr,
-	&dev_attr_page_size.attr,
-	&dev_attr_hw_sector_size.attr,
-	&dev_attr_oob_sector_size.attr,
-	&dev_attr_read_typ.attr,
-	&dev_attr_read_max.attr,
-	&dev_attr_prog_typ.attr,
-	&dev_attr_prog_max.attr,
-	&dev_attr_erase_typ.attr,
-	&dev_attr_erase_max.attr,
-	&dev_attr_multiplane_modes.attr,
-	&dev_attr_media_capabilities.attr,
-	&dev_attr_max_phys_secs.attr,
-	NULL,
-};
-
-static struct attribute_group nvm_dev_attr_group = {
-	.name = "lightnvm",
-	.attrs = nvm_dev_attrs,
-};
-
-static const struct attribute_group *nvm_dev_attr_groups[] = {
-	&nvm_dev_attr_group,
-	NULL,
-};
-
-static void nvm_dev_release(struct device *device)
-{
-	struct nvm_dev *dev = container_of(device, struct nvm_dev, dev);
-	struct request_queue *q = dev->q;
-
-	pr_debug("nvm/sysfs: `nvm_dev_release`\n");
-
-	blk_mq_unregister_dev(device, q);
-
-	nvm_free(dev);
-}
-
-static struct device_type nvm_type = {
-	.name		= "lightnvm",
-	.groups		= nvm_dev_attr_groups,
-	.release	= nvm_dev_release,
-};
-
-int nvm_sysfs_register_dev(struct nvm_dev *dev)
-{
-	int ret;
-
-	if (!dev->parent_dev)
-		return 0;
-
-	dev->dev.parent = dev->parent_dev;
-	dev_set_name(&dev->dev, "%s", dev->name);
-	dev->dev.type = &nvm_type;
-	device_initialize(&dev->dev);
-	ret = device_add(&dev->dev);
-
-	if (!ret)
-		blk_mq_register_dev(&dev->dev, dev->q);
-
-	return ret;
-}
-
-void nvm_sysfs_unregister_dev(struct nvm_dev *dev)
-{
-	if (dev && dev->parent_dev)
-		kobject_put(&dev->dev.kobj);
-}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e54bb10ead9c..8c644838252e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1673,27 +1673,24 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	if (nvme_revalidate_ns(ns, &id))
 		goto out_free_queue;
 
-	if (nvme_nvm_ns_supported(ns, id)) {
-		if (nvme_nvm_register(ns, disk_name, node,
-							&nvme_ns_attr_group)) {
-			dev_warn(ctrl->dev, "%s: LightNVM init failure\n",
-								__func__);
-			goto out_free_id;
-		}
-	} else {
-		disk = alloc_disk_node(0, node);
-		if (!disk)
-			goto out_free_id;
+	if (nvme_nvm_ns_supported(ns, id) &&
+				nvme_nvm_register(ns, disk_name, node)) {
+		dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__);
+		goto out_free_id;
+	}
 
-		disk->fops = &nvme_fops;
-		disk->private_data = ns;
-		disk->queue = ns->queue;
-		disk->flags = GENHD_FL_EXT_DEVT;
-		memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
-		ns->disk = disk;
+	disk = alloc_disk_node(0, node);
+	if (!disk)
+		goto out_free_id;
 
-		__nvme_revalidate_disk(disk, id);
-	}
+	disk->fops = &nvme_fops;
+	disk->private_data = ns;
+	disk->queue = ns->queue;
+	disk->flags = GENHD_FL_EXT_DEVT;
+	memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
+	ns->disk = disk;
+
+	__nvme_revalidate_disk(disk, id);
 
 	mutex_lock(&ctrl->namespaces_mutex);
 	list_add_tail(&ns->list, &ctrl->namespaces);
@@ -1703,14 +1700,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	kfree(id);
 
-	if (ns->ndev)
-		return;
-
 	device_add_disk(ctrl->device, ns->disk);
 	if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_attr_group))
 		pr_warn("%s: failed to create sysfs group for identification\n",
 			ns->disk->disk_name);
+	if (ns->ndev && nvme_nvm_register_sysfs(ns))
+		pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
+			ns->disk->disk_name);
 	return;
  out_free_id:
 	kfree(id);
@@ -1732,6 +1729,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 			blk_integrity_unregister(ns->disk);
 		sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_attr_group);
+		if (ns->ndev)
+			nvme_nvm_unregister_sysfs(ns);
 		del_gendisk(ns->disk);
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index de3dc87226bb..f23e6fefd262 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -575,12 +575,10 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
 	.max_phys_sect		= 64,
 };
 
-int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node,
-		      const struct attribute_group *attrs)
+int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
 {
 	struct request_queue *q = ns->queue;
 	struct nvm_dev *dev;
-	int ret;
 
 	dev = nvm_alloc_dev(node);
 	if (!dev)
@@ -589,18 +587,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node,
 	dev->q = q;
 	memcpy(dev->name, disk_name, DISK_NAME_LEN);
 	dev->ops = &nvme_nvm_dev_ops;
-	dev->parent_dev = ns->ctrl->device;
 	dev->private_data = ns;
 	ns->ndev = dev;
 
-	ret = nvm_register(dev);
-
-	ns->lba_shift = ilog2(dev->sec_size) - 9;
-
-	if (sysfs_create_group(&dev->dev.kobj, attrs))
-		pr_warn("%s: failed to create sysfs group for identification\n",
-			disk_name);
-	return ret;
+	return nvm_register(dev);
 }
 
 void nvme_nvm_unregister(struct nvme_ns *ns)
@@ -608,6 +598,167 @@ void nvme_nvm_unregister(struct nvme_ns *ns)
 	nvm_unregister(ns->ndev);
 }
 
+static ssize_t nvm_dev_attr_show(struct device *dev,
+				 struct device_attribute *dattr, char *page)
+{
+	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+	struct nvm_dev *ndev = ns->ndev;
+	struct nvm_id *id;
+	struct nvm_id_group *grp;
+	struct attribute *attr;
+
+	if (!ndev)
+		return 0;
+
+	id = &ndev->identity;
+	grp = &id->groups[0];
+	attr = &dattr->attr;
+
+	if (strcmp(attr->name, "version") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id);
+	} else if (strcmp(attr->name, "vendor_opcode") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt);
+	} else if (strcmp(attr->name, "capabilities") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", id->cap);
+	} else if (strcmp(attr->name, "device_mode") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", id->dom);
+	} else if (strcmp(attr->name, "media_manager") == 0) {
+		if (!ndev->mt)
+			return scnprintf(page, PAGE_SIZE, "%s\n", "none");
+		return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name);
+	} else if (strcmp(attr->name, "ppa_format") == 0) {
+		return scnprintf(page, PAGE_SIZE,
+			"0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+			id->ppaf.ch_offset, id->ppaf.ch_len,
+			id->ppaf.lun_offset, id->ppaf.lun_len,
+			id->ppaf.pln_offset, id->ppaf.pln_len,
+			id->ppaf.blk_offset, id->ppaf.blk_len,
+			id->ppaf.pg_offset, id->ppaf.pg_len,
+			id->ppaf.sect_offset, id->ppaf.sect_len);
+	} else if (strcmp(attr->name, "media_type") == 0) {	/* u8 */
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype);
+	} else if (strcmp(attr->name, "flash_media_type") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype);
+	} else if (strcmp(attr->name, "num_channels") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch);
+	} else if (strcmp(attr->name, "num_luns") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun);
+	} else if (strcmp(attr->name, "num_planes") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln);
+	} else if (strcmp(attr->name, "num_blocks") == 0) {	/* u16 */
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_blk);
+	} else if (strcmp(attr->name, "num_pages") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg);
+	} else if (strcmp(attr->name, "page_size") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz);
+	} else if (strcmp(attr->name, "hw_sector_size") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs);
+	} else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos);
+	} else if (strcmp(attr->name, "read_typ") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt);
+	} else if (strcmp(attr->name, "read_max") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm);
+	} else if (strcmp(attr->name, "prog_typ") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt);
+	} else if (strcmp(attr->name, "prog_max") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm);
+	} else if (strcmp(attr->name, "erase_typ") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet);
+	} else if (strcmp(attr->name, "erase_max") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem);
+	} else if (strcmp(attr->name, "multiplane_modes") == 0) {
+		return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos);
+	} else if (strcmp(attr->name, "media_capabilities") == 0) {
+		return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap);
+	} else if (strcmp(attr->name, "max_phys_secs") == 0) {
+		return scnprintf(page, PAGE_SIZE, "%u\n",
+				ndev->ops->max_phys_sect);
+	} else {
+		return scnprintf(page,
+				 PAGE_SIZE,
+				 "Unhandled attr(%s) in `nvm_dev_attr_show`\n",
+				 attr->name);
+	}
+}
+
+#define NVM_DEV_ATTR_RO(_name)						\
+	DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
+
+static NVM_DEV_ATTR_RO(version);
+static NVM_DEV_ATTR_RO(vendor_opcode);
+static NVM_DEV_ATTR_RO(capabilities);
+static NVM_DEV_ATTR_RO(device_mode);
+static NVM_DEV_ATTR_RO(ppa_format);
+static NVM_DEV_ATTR_RO(media_manager);
+
+static NVM_DEV_ATTR_RO(media_type);
+static NVM_DEV_ATTR_RO(flash_media_type);
+static NVM_DEV_ATTR_RO(num_channels);
+static NVM_DEV_ATTR_RO(num_luns);
+static NVM_DEV_ATTR_RO(num_planes);
+static NVM_DEV_ATTR_RO(num_blocks);
+static NVM_DEV_ATTR_RO(num_pages);
+static NVM_DEV_ATTR_RO(page_size);
+static NVM_DEV_ATTR_RO(hw_sector_size);
+static NVM_DEV_ATTR_RO(oob_sector_size);
+static NVM_DEV_ATTR_RO(read_typ);
+static NVM_DEV_ATTR_RO(read_max);
+static NVM_DEV_ATTR_RO(prog_typ);
+static NVM_DEV_ATTR_RO(prog_max);
+static NVM_DEV_ATTR_RO(erase_typ);
+static NVM_DEV_ATTR_RO(erase_max);
+static NVM_DEV_ATTR_RO(multiplane_modes);
+static NVM_DEV_ATTR_RO(media_capabilities);
+static NVM_DEV_ATTR_RO(max_phys_secs);
+
+static struct attribute *nvm_dev_attrs[] = {
+	&dev_attr_version.attr,
+	&dev_attr_vendor_opcode.attr,
+	&dev_attr_capabilities.attr,
+	&dev_attr_device_mode.attr,
+	&dev_attr_media_manager.attr,
+
+	&dev_attr_ppa_format.attr,
+	&dev_attr_media_type.attr,
+	&dev_attr_flash_media_type.attr,
+	&dev_attr_num_channels.attr,
+	&dev_attr_num_luns.attr,
+	&dev_attr_num_planes.attr,
+	&dev_attr_num_blocks.attr,
+	&dev_attr_num_pages.attr,
+	&dev_attr_page_size.attr,
+	&dev_attr_hw_sector_size.attr,
+	&dev_attr_oob_sector_size.attr,
+	&dev_attr_read_typ.attr,
+	&dev_attr_read_max.attr,
+	&dev_attr_prog_typ.attr,
+	&dev_attr_prog_max.attr,
+	&dev_attr_erase_typ.attr,
+	&dev_attr_erase_max.attr,
+	&dev_attr_multiplane_modes.attr,
+	&dev_attr_media_capabilities.attr,
+	&dev_attr_max_phys_secs.attr,
+	NULL,
+};
+
+static const struct attribute_group nvm_dev_attr_group = {
+	.name		= "lightnvm",
+	.attrs		= nvm_dev_attrs,
+};
+
+int nvme_nvm_register_sysfs(struct nvme_ns *ns)
+{
+	return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
+					&nvm_dev_attr_group);
+}
+
+void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
+{
+	sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
+					&nvm_dev_attr_group);
+}
+
 /* move to shared place when used in multiple places. */
 #define PCI_VENDOR_ID_CNEX 0x1d1d
 #define PCI_DEVICE_ID_CNEX_WL 0x2807
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 468fc445bf35..a3d6ffd874af 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -321,36 +321,33 @@ int nvme_sg_get_version_num(int __user *ip);
 
 #ifdef CONFIG_NVM
 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
-int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node,
-		      const struct attribute_group *attrs);
+int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
 void nvme_nvm_unregister(struct nvme_ns *ns);
-
-static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
-{
-	if (dev->type->devnode)
-		return dev_to_disk(dev)->private_data;
-
-	return (container_of(dev, struct nvm_dev, dev))->private_data;
-}
+int nvme_nvm_register_sysfs(struct nvme_ns *ns);
+void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
 #else
 static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
-				    int node,
-				    const struct attribute_group *attrs)
+				    int node)
 {
 	return 0;
 }
 
 static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
-
+static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns)
+{
+	return 0;
+}
+static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {};
 static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
 {
 	return 0;
 }
+#endif /* CONFIG_NVM */
+
 static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
 {
 	return dev_to_disk(dev)->private_data;
 }
-#endif /* CONFIG_NVM */
 
 int __init nvme_core_init(void);
 void nvme_core_exit(void);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index d190786e4ad8..fb2e601d674e 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -352,8 +352,6 @@ struct nvm_dev {
 
 	/* Backend device */
 	struct request_queue *q;
-	struct device dev;
-	struct device *parent_dev;
 	char name[DISK_NAME_LEN];
 	void *private_data;
 
-- 
cgit v1.2.3


From bb3149792e0ed52cf5f457dda4c9bf9c5bda1542 Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:38:54 +0100
Subject: lightnvm: enable to send hint to erase command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Erases might be subject to host hints. An example is multi-plane
programming to erase blocks in parallel. Enable targets to specify this
hint.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c      | 9 ++++++---
 drivers/lightnvm/gennvm.c    | 5 ++---
 drivers/lightnvm/rrpc.c      | 2 +-
 drivers/lightnvm/sysblk.c    | 4 ++--
 drivers/nvme/host/lightnvm.c | 1 +
 include/linux/lightnvm.h     | 7 +++----
 6 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 11117404b5f5..8664fe09cc82 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -202,9 +202,9 @@ int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 }
 EXPORT_SYMBOL(nvm_submit_io);
 
-int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk)
+int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk, int flags)
 {
-	return dev->mt->erase_blk(dev, blk, 0);
+	return dev->mt->erase_blk(dev, blk, flags);
 }
 EXPORT_SYMBOL(nvm_erase_blk);
 
@@ -285,7 +285,8 @@ void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd)
 }
 EXPORT_SYMBOL(nvm_free_rqd_ppalist);
 
-int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas)
+int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas,
+								int flags)
 {
 	struct nvm_rq rqd;
 	int ret;
@@ -301,6 +302,8 @@ int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas)
 
 	nvm_generic_to_addr_mode(dev, &rqd);
 
+	rqd.flags = flags;
+
 	ret = dev->ops->erase_block(dev, &rqd);
 
 	nvm_free_rqd_ppalist(dev, &rqd);
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index b74174c6d021..730d7361f870 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -593,12 +593,11 @@ static int gen_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	return dev->ops->submit_io(dev, rqd);
 }
 
-static int gen_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
-							unsigned long flags)
+static int gen_erase_blk(struct nvm_dev *dev, struct nvm_block *blk, int flags)
 {
 	struct ppa_addr addr = block_to_ppa(dev, blk);
 
-	return nvm_erase_ppa(dev, &addr, 1);
+	return nvm_erase_ppa(dev, &addr, 1, flags);
 }
 
 static int gen_reserve_lun(struct nvm_dev *dev, int lunid)
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 37fcaadbf80c..067e890ae2bf 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -404,7 +404,7 @@ static void rrpc_block_gc(struct work_struct *work)
 	if (rrpc_move_valid_pages(rrpc, rblk))
 		goto put_back;
 
-	if (nvm_erase_blk(dev, rblk->parent))
+	if (nvm_erase_blk(dev, rblk->parent, 0))
 		goto put_back;
 
 	rrpc_put_blk(rrpc, rblk);
diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c
index a75bd28aaca3..d22906757415 100644
--- a/drivers/lightnvm/sysblk.c
+++ b/drivers/lightnvm/sysblk.c
@@ -379,7 +379,7 @@ static int nvm_prepare_new_sysblks(struct nvm_dev *dev, struct sysblk_scan *s)
 		ppa = &s->ppas[scan_ppa_idx(i, nxt_blk)];
 		ppa->g.pg = ppa_to_slc(dev, 0);
 
-		ret = nvm_erase_ppa(dev, ppa, 1);
+		ret = nvm_erase_ppa(dev, ppa, 1, 0);
 		if (ret)
 			return ret;
 
@@ -725,7 +725,7 @@ int nvm_dev_factory(struct nvm_dev *dev, int flags)
 	/* continue to erase until list of blks until empty */
 	while ((ppa_cnt =
 			nvm_fact_get_blks(dev, ppas, max_ppas, blk_bitmap)) > 0)
-		nvm_erase_ppa(dev, ppas, ppa_cnt);
+		nvm_erase_ppa(dev, ppas, ppa_cnt, 0);
 
 	/* mark host reserved blocks free */
 	if (flags & NVM_FACTORY_RESET_HOST_BLKS) {
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f23e6fefd262..037dff5951d4 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -526,6 +526,7 @@ static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd)
 	c.erase.nsid = cpu_to_le32(ns->ns_id);
 	c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa);
 	c.erase.length = cpu_to_le16(rqd->nr_ppas - 1);
+	c.erase.control = cpu_to_le16(rqd->flags);
 
 	return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0);
 }
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index fb2e601d674e..d87be02edc39 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -470,8 +470,7 @@ typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
-								unsigned long);
+typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
 typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
@@ -537,8 +536,8 @@ extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
-extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int);
-extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
+extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
+extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *, int);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
 								void *, int);
-- 
cgit v1.2.3


From a24ba4644b7ae5af3cd2eb6992c237cb4548c45e Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:38:56 +0100
Subject: lightnvm: export set bad block table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bad blocks should be managed by block owners. This would be either
targets for data blocks or sysblk for system blocks.

In order to support this, export two functions: One to mark a block as
an specific type (e.g., bad block) and another to update the bad block
table on the device.

Move bad block management to rrpc.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c   | 27 +++++++++++++++++++++++++++
 drivers/lightnvm/gennvm.c | 24 ------------------------
 drivers/lightnvm/rrpc.c   | 34 +++++++++++++++++++++++++++++++++-
 drivers/lightnvm/sysblk.c | 29 +++++------------------------
 include/linux/lightnvm.h  | 13 ++++++++++++-
 5 files changed, 77 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 8664fe09cc82..6527cf6862fa 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -196,6 +196,33 @@ void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
 }
 EXPORT_SYMBOL(nvm_mark_blk);
 
+int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas,
+								int type)
+{
+	struct nvm_rq rqd;
+	int ret;
+
+	if (nr_ppas > dev->ops->max_phys_sect) {
+		pr_err("nvm: unable to update all sysblocks atomically\n");
+		return -EINVAL;
+	}
+
+	memset(&rqd, 0, sizeof(struct nvm_rq));
+
+	nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
+	nvm_generic_to_addr_mode(dev, &rqd);
+
+	ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
+	nvm_free_rqd_ppalist(dev, &rqd);
+	if (ret) {
+		pr_err("nvm: sysblk failed bb mark\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(nvm_set_bb_tbl);
+
 int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
 	return dev->mt->submit_io(dev, rqd);
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index a7e17faea2cd..e969e3a801c4 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -543,34 +543,10 @@ static void gen_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
 	blk->state = type;
 }
 
-/*
- * mark block bad in gen. It is expected that the target recovers separately
- */
-static void gen_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
-	int bit = -1;
-	int max_secs = dev->ops->max_phys_sect;
-	void *comp_bits = &rqd->ppa_status;
-
-	nvm_addr_to_generic_mode(dev, rqd);
-
-	/* look up blocks and mark them as bad */
-	if (rqd->nr_ppas == 1) {
-		gen_mark_blk(dev, rqd->ppa_addr, NVM_BLK_ST_BAD);
-		return;
-	}
-
-	while ((bit = find_next_bit(comp_bits, max_secs, bit + 1)) < max_secs)
-		gen_mark_blk(dev, rqd->ppa_list[bit], NVM_BLK_ST_BAD);
-}
-
 static void gen_end_io(struct nvm_rq *rqd)
 {
 	struct nvm_tgt_instance *ins = rqd->ins;
 
-	if (rqd->error == NVM_RSP_ERR_FAILWRITE)
-		gen_mark_blk_bad(rqd->dev, rqd);
-
 	ins->tt->end_io(rqd);
 }
 
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 067e890ae2bf..2b71b7e59dac 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -675,6 +675,34 @@ static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
 	queue_work(rrpc->kgc_wq, &gcb->ws_gc);
 }
 
+static void __rrpc_mark_bad_block(struct nvm_dev *dev, struct ppa_addr *ppa)
+{
+		nvm_mark_blk(dev, *ppa, NVM_BLK_ST_BAD);
+		nvm_set_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
+}
+
+static void rrpc_mark_bad_block(struct rrpc *rrpc, struct nvm_rq *rqd)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	void *comp_bits = &rqd->ppa_status;
+	struct ppa_addr ppa, prev_ppa;
+	int nr_ppas = rqd->nr_ppas;
+	int bit;
+
+	if (rqd->nr_ppas == 1)
+		__rrpc_mark_bad_block(dev, &rqd->ppa_addr);
+
+	ppa_set_empty(&prev_ppa);
+	bit = -1;
+	while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) {
+		ppa = rqd->ppa_list[bit];
+		if (ppa_cmp_blk(ppa, prev_ppa))
+			continue;
+
+		__rrpc_mark_bad_block(dev, &ppa);
+	}
+}
+
 static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
 						sector_t laddr, uint8_t npages)
 {
@@ -701,8 +729,12 @@ static void rrpc_end_io(struct nvm_rq *rqd)
 	uint8_t npages = rqd->nr_ppas;
 	sector_t laddr = rrpc_get_laddr(rqd->bio) - npages;
 
-	if (bio_data_dir(rqd->bio) == WRITE)
+	if (bio_data_dir(rqd->bio) == WRITE) {
+		if (rqd->error == NVM_RSP_ERR_FAILWRITE)
+			rrpc_mark_bad_block(rrpc, rqd);
+
 		rrpc_end_io_write(rrpc, rrqd, laddr, npages);
+	}
 
 	bio_put(rqd->bio);
 
diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c
index d22906757415..fa644afb25de 100644
--- a/drivers/lightnvm/sysblk.c
+++ b/drivers/lightnvm/sysblk.c
@@ -267,29 +267,10 @@ static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa,
 	return found;
 }
 
-static int nvm_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s, int type)
+static int nvm_sysblk_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s,
+								int type)
 {
-	struct nvm_rq rqd;
-	int ret;
-
-	if (s->nr_ppas > dev->ops->max_phys_sect) {
-		pr_err("nvm: unable to update all sysblocks atomically\n");
-		return -EINVAL;
-	}
-
-	memset(&rqd, 0, sizeof(struct nvm_rq));
-
-	nvm_set_rqd_ppalist(dev, &rqd, s->ppas, s->nr_ppas, 1);
-	nvm_generic_to_addr_mode(dev, &rqd);
-
-	ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
-	nvm_free_rqd_ppalist(dev, &rqd);
-	if (ret) {
-		pr_err("nvm: sysblk failed bb mark\n");
-		return -EINVAL;
-	}
-
-	return 0;
+	return nvm_set_bb_tbl(dev, s->ppas, s->nr_ppas, type);
 }
 
 static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
@@ -573,7 +554,7 @@ int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
 	if (ret)
 		goto err_mark;
 
-	ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_HOST);
+	ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_HOST);
 	if (ret)
 		goto err_mark;
 
@@ -733,7 +714,7 @@ int nvm_dev_factory(struct nvm_dev *dev, int flags)
 		mutex_lock(&dev->mlock);
 		ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0);
 		if (!ret)
-			ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_FREE);
+			ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_FREE);
 		mutex_unlock(&dev->mlock);
 	}
 err_ppas:
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index d87be02edc39..4480d1c6a1a5 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -423,6 +423,15 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
 	return ppa;
 }
 
+static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
+{
+	if (ppa_empty(ppa1) || ppa_empty(ppa2))
+		return 0;
+
+	return ((ppa1.g.ch == ppa2.g.ch) && (ppa1.g.lun == ppa2.g.lun) &&
+					(ppa1.g.blk == ppa2.g.blk));
+}
+
 static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
 {
 	return dev->lptbl[slc_pg];
@@ -528,7 +537,9 @@ extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
 
-void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type);
+extern void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type);
+extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
+							int nr_ppas, int type);
 
 extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
-- 
cgit v1.2.3


From 402ab9a89d7b5bab08a5534027b39d80085ec19b Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:38:57 +0100
Subject: lightnvm: add ECC error codes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add ECC error codes to enable the appropriate handling in the target.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 4480d1c6a1a5..6b26a3289bce 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -107,6 +107,8 @@ enum {
 	NVM_RSP_NOT_CHANGEABLE	= 0x1,
 	NVM_RSP_ERR_FAILWRITE	= 0x40ff,
 	NVM_RSP_ERR_EMPTYPAGE	= 0x42ff,
+	NVM_RSP_ERR_FAILECC	= 0x4281,
+	NVM_RSP_WARN_HIGHECC	= 0x4700,
 
 	/* Device opcodes */
 	NVM_OP_HBREAD		= 0x02,
-- 
cgit v1.2.3


From 7e4f64a9b3004ce592f21653c3b7781628862232 Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:00 +0100
Subject: lightnvm: cleanup unused target operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup definition leftovers from old gennvm interface

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 6b26a3289bce..e598308882aa 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -477,9 +477,6 @@ typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
 typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
 					      struct nvm_lun *, unsigned long);
 typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
@@ -504,9 +501,6 @@ struct nvmm_type {
 	/* Block administration callbacks */
 	nvmm_get_blk_fn *get_blk;
 	nvmm_put_blk_fn *put_blk;
-	nvmm_open_blk_fn *open_blk;
-	nvmm_close_blk_fn *close_blk;
-	nvmm_flush_blk_fn *flush_blk;
 
 	nvmm_submit_io_fn *submit_io;
 	nvmm_erase_blk_fn *erase_blk;
-- 
cgit v1.2.3


From 0e5c3246dbb96b6870634e7d51b2490f05c976cf Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:01 +0100
Subject: lightnvm: make address conversion functions global
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Targets are assumed to used the same generic ppa format, where the
address is partitioned on ch:lun:block:pg:pl:sec. Thus, make the
function in charge of transforming the ppa address from a linear format
to the generic one available to all targets.

This function will be needed by the media manager in order to do target
mapping translations when targets are divided on different physical
partitions.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/rrpc.c  | 30 ------------------------------
 include/linux/lightnvm.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 60ca9d4bd6e9..9253acc80714 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -136,36 +136,6 @@ static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 	return blk->id * rrpc->dev->sec_per_blk;
 }
 
-static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
-							struct ppa_addr r)
-{
-	struct ppa_addr l;
-	int secs, pgs, blks, luns;
-	sector_t ppa = r.ppa;
-
-	l.ppa = 0;
-
-	div_u64_rem(ppa, dev->sec_per_pg, &secs);
-	l.g.sec = secs;
-
-	sector_div(ppa, dev->sec_per_pg);
-	div_u64_rem(ppa, dev->pgs_per_blk, &pgs);
-	l.g.pg = pgs;
-
-	sector_div(ppa, dev->pgs_per_blk);
-	div_u64_rem(ppa, dev->blks_per_lun, &blks);
-	l.g.blk = blks;
-
-	sector_div(ppa, dev->blks_per_lun);
-	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
-	l.g.lun = luns;
-
-	sector_div(ppa, dev->luns_per_chnl);
-	l.g.ch = ppa;
-
-	return l;
-}
-
 static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
 {
 	struct ppa_addr paddr;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index e598308882aa..98278a9fcb1f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -361,6 +361,36 @@ struct nvm_dev {
 	spinlock_t lock;
 };
 
+static inline struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
+							struct ppa_addr r)
+{
+	struct ppa_addr l;
+	int secs, pgs, blks, luns;
+	sector_t ppa = r.ppa;
+
+	l.ppa = 0;
+
+	div_u64_rem(ppa, dev->sec_per_pg, &secs);
+	l.g.sec = secs;
+
+	sector_div(ppa, dev->sec_per_pg);
+	div_u64_rem(ppa, dev->pgs_per_blk, &pgs);
+	l.g.pg = pgs;
+
+	sector_div(ppa, dev->pgs_per_blk);
+	div_u64_rem(ppa, dev->blks_per_lun, &blks);
+	l.g.blk = blks;
+
+	sector_div(ppa, dev->blks_per_lun);
+	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
+	l.g.lun = luns;
+
+	sector_div(ppa, dev->luns_per_chnl);
+	l.g.ch = ppa;
+
+	return l;
+}
+
 static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
 						struct ppa_addr r)
 {
-- 
cgit v1.2.3


From de93434fcf74d41754a48e45365a5914e00bc0be Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:04 +0100
Subject: lightnvm: remove gen_lun abstraction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gen_lun abstraction in the generic media manager was conceived on
the assumption that a single target would instantiated on top of it.
This has complicated target design to implement multi-instances. Remove
this abstraction and move its logic to nvm_lun, which manages physical
lun geometry and operations.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/gennvm.c | 85 +++++++++++++++++++++++------------------------
 drivers/lightnvm/gennvm.h | 16 +--------
 include/linux/lightnvm.h  | 10 ++++++
 3 files changed, 53 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index aee5b722ba63..3572ebbb50ce 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -223,13 +223,13 @@ static void gen_put_area(struct nvm_dev *dev, sector_t begin)
 static void gen_blocks_free(struct nvm_dev *dev)
 {
 	struct gen_dev *gn = dev->mp;
-	struct gen_lun *lun;
+	struct nvm_lun *lun;
 	int i;
 
 	gen_for_each_lun(gn, lun, i) {
-		if (!lun->vlun.blocks)
+		if (!lun->blocks)
 			break;
-		vfree(lun->vlun.blocks);
+		vfree(lun->blocks);
 	}
 }
 
@@ -242,24 +242,24 @@ static void gen_luns_free(struct nvm_dev *dev)
 
 static int gen_luns_init(struct nvm_dev *dev, struct gen_dev *gn)
 {
-	struct gen_lun *lun;
+	struct nvm_lun *lun;
 	int i;
 
-	gn->luns = kcalloc(dev->nr_luns, sizeof(struct gen_lun), GFP_KERNEL);
+	gn->luns = kcalloc(dev->nr_luns, sizeof(struct nvm_lun), GFP_KERNEL);
 	if (!gn->luns)
 		return -ENOMEM;
 
 	gen_for_each_lun(gn, lun, i) {
-		spin_lock_init(&lun->vlun.lock);
 		INIT_LIST_HEAD(&lun->free_list);
 		INIT_LIST_HEAD(&lun->used_list);
 		INIT_LIST_HEAD(&lun->bb_list);
 
-		lun->reserved_blocks = 2; /* for GC only */
-		lun->vlun.id = i;
-		lun->vlun.lun_id = i % dev->luns_per_chnl;
-		lun->vlun.chnl_id = i / dev->luns_per_chnl;
-		lun->vlun.nr_free_blocks = dev->blks_per_lun;
+		spin_lock_init(&lun->lock);
+
+		lun->id = i;
+		lun->lun_id = i % dev->luns_per_chnl;
+		lun->chnl_id = i / dev->luns_per_chnl;
+		lun->nr_free_blocks = dev->blks_per_lun;
 	}
 	return 0;
 }
@@ -268,7 +268,7 @@ static int gen_block_bb(struct gen_dev *gn, struct ppa_addr ppa,
 							u8 *blks, int nr_blks)
 {
 	struct nvm_dev *dev = gn->dev;
-	struct gen_lun *lun;
+	struct nvm_lun *lun;
 	struct nvm_block *blk;
 	int i;
 
@@ -282,9 +282,10 @@ static int gen_block_bb(struct gen_dev *gn, struct ppa_addr ppa,
 		if (blks[i] == NVM_BLK_T_FREE)
 			continue;
 
-		blk = &lun->vlun.blocks[i];
+		blk = &lun->blocks[i];
 		list_move_tail(&blk->list, &lun->bb_list);
-		lun->vlun.nr_free_blocks--;
+		blk->state = NVM_BLK_ST_BAD;
+		lun->nr_free_blocks--;
 	}
 
 	return 0;
@@ -295,7 +296,7 @@ static int gen_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 	struct nvm_dev *dev = private;
 	struct gen_dev *gn = dev->mp;
 	u64 elba = slba + nlb;
-	struct gen_lun *lun;
+	struct nvm_lun *lun;
 	struct nvm_block *blk;
 	u64 i;
 	int lun_id;
@@ -326,7 +327,7 @@ static int gen_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 
 		/* Calculate block offset into lun */
 		pba = pba - (dev->sec_per_lun * lun_id);
-		blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
+		blk = &lun->blocks[div_u64(pba, dev->sec_per_blk)];
 
 		if (!blk->state) {
 			/* at this point, we don't know anything about the
@@ -335,7 +336,7 @@ static int gen_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 			 */
 			list_move_tail(&blk->list, &lun->used_list);
 			blk->state = NVM_BLK_ST_TGT;
-			lun->vlun.nr_free_blocks--;
+			lun->nr_free_blocks--;
 		}
 	}
 
@@ -344,7 +345,7 @@ static int gen_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 
 static int gen_blocks_init(struct nvm_dev *dev, struct gen_dev *gn)
 {
-	struct gen_lun *lun;
+	struct nvm_lun *lun;
 	struct nvm_block *block;
 	sector_t lun_iter, blk_iter, cur_block_id = 0;
 	int ret, nr_blks;
@@ -356,19 +357,19 @@ static int gen_blocks_init(struct nvm_dev *dev, struct gen_dev *gn)
 		return -ENOMEM;
 
 	gen_for_each_lun(gn, lun, lun_iter) {
-		lun->vlun.blocks = vzalloc(sizeof(struct nvm_block) *
+		lun->blocks = vzalloc(sizeof(struct nvm_block) *
 							dev->blks_per_lun);
-		if (!lun->vlun.blocks) {
+		if (!lun->blocks) {
 			kfree(blks);
 			return -ENOMEM;
 		}
 
 		for (blk_iter = 0; blk_iter < dev->blks_per_lun; blk_iter++) {
-			block = &lun->vlun.blocks[blk_iter];
+			block = &lun->blocks[blk_iter];
 
 			INIT_LIST_HEAD(&block->list);
 
-			block->lun = &lun->vlun;
+			block->lun = lun;
 			block->id = cur_block_id++;
 
 			list_add_tail(&block->list, &lun->free_list);
@@ -378,8 +379,8 @@ static int gen_blocks_init(struct nvm_dev *dev, struct gen_dev *gn)
 			struct ppa_addr ppa;
 
 			ppa.ppa = 0;
-			ppa.g.ch = lun->vlun.chnl_id;
-			ppa.g.lun = lun->vlun.lun_id;
+			ppa.g.ch = lun->chnl_id;
+			ppa.g.lun = lun->lun_id;
 
 			ret = nvm_get_bb_tbl(dev, ppa, blks);
 			if (ret)
@@ -468,41 +469,39 @@ static void gen_unregister(struct nvm_dev *dev)
 }
 
 static struct nvm_block *gen_get_blk(struct nvm_dev *dev,
-				struct nvm_lun *vlun, unsigned long flags)
+				struct nvm_lun *lun, unsigned long flags)
 {
-	struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
 	struct nvm_block *blk = NULL;
 	int is_gc = flags & NVM_IOTYPE_GC;
 
-	spin_lock(&vlun->lock);
+	spin_lock(&lun->lock);
 	if (list_empty(&lun->free_list)) {
 		pr_err_ratelimited("gen: lun %u have no free pages available",
-								lun->vlun.id);
+								lun->id);
 		goto out;
 	}
 
-	if (!is_gc && lun->vlun.nr_free_blocks < lun->reserved_blocks)
+	if (!is_gc && lun->nr_free_blocks < lun->reserved_blocks)
 		goto out;
 
 	blk = list_first_entry(&lun->free_list, struct nvm_block, list);
 
 	list_move_tail(&blk->list, &lun->used_list);
 	blk->state = NVM_BLK_ST_TGT;
-	lun->vlun.nr_free_blocks--;
+	lun->nr_free_blocks--;
 out:
-	spin_unlock(&vlun->lock);
+	spin_unlock(&lun->lock);
 	return blk;
 }
 
 static void gen_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
 {
-	struct nvm_lun *vlun = blk->lun;
-	struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
+	struct nvm_lun *lun = blk->lun;
 
-	spin_lock(&vlun->lock);
+	spin_lock(&lun->lock);
 	if (blk->state & NVM_BLK_ST_TGT) {
 		list_move_tail(&blk->list, &lun->free_list);
-		lun->vlun.nr_free_blocks++;
+		lun->nr_free_blocks++;
 		blk->state = NVM_BLK_ST_FREE;
 	} else if (blk->state & NVM_BLK_ST_BAD) {
 		list_move_tail(&blk->list, &lun->bb_list);
@@ -513,13 +512,13 @@ static void gen_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
 							blk->id, blk->state);
 		list_move_tail(&blk->list, &lun->bb_list);
 	}
-	spin_unlock(&vlun->lock);
+	spin_unlock(&lun->lock);
 }
 
 static void gen_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
 {
 	struct gen_dev *gn = dev->mp;
-	struct gen_lun *lun;
+	struct nvm_lun *lun;
 	struct nvm_block *blk;
 
 	pr_debug("gen: ppa  (ch: %u lun: %u blk: %u pg: %u) -> %u\n",
@@ -537,7 +536,7 @@ static void gen_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
 	}
 
 	lun = &gn->luns[(dev->luns_per_chnl * ppa.g.ch) + ppa.g.lun];
-	blk = &lun->vlun.blocks[ppa.g.blk];
+	blk = &lun->blocks[ppa.g.blk];
 
 	/* will be moved to bb list on put_blk from target */
 	blk->state = type;
@@ -587,23 +586,23 @@ static struct nvm_lun *gen_get_lun(struct nvm_dev *dev, int lunid)
 	if (unlikely(lunid >= dev->nr_luns))
 		return NULL;
 
-	return &gn->luns[lunid].vlun;
+	return &gn->luns[lunid];
 }
 
 static void gen_lun_info_print(struct nvm_dev *dev)
 {
 	struct gen_dev *gn = dev->mp;
-	struct gen_lun *lun;
+	struct nvm_lun *lun;
 	unsigned int i;
 
 
 	gen_for_each_lun(gn, lun, i) {
-		spin_lock(&lun->vlun.lock);
+		spin_lock(&lun->lock);
 
 		pr_info("%s: lun%8u\t%u\n", dev->name, i,
-						lun->vlun.nr_free_blocks);
+						lun->nr_free_blocks);
 
-		spin_unlock(&lun->vlun.lock);
+		spin_unlock(&lun->lock);
 	}
 }
 
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
index 8ecfa817d21d..d167f391fbae 100644
--- a/drivers/lightnvm/gennvm.h
+++ b/drivers/lightnvm/gennvm.h
@@ -20,25 +20,11 @@
 
 #include <linux/lightnvm.h>
 
-struct gen_lun {
-	struct nvm_lun vlun;
-
-	int reserved_blocks;
-	/* lun block lists */
-	struct list_head used_list;	/* In-use blocks */
-	struct list_head free_list;	/* Not used blocks i.e. released
-					 * and ready for use
-					 */
-	struct list_head bb_list;	/* Bad blocks. Mutually exclusive with
-					 * free_list and used_list
-					 */
-};
-
 struct gen_dev {
 	struct nvm_dev *dev;
 
 	int nr_luns;
-	struct gen_lun *luns;
+	struct nvm_lun *luns;
 	struct list_head area_list;
 
 	struct mutex lock;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 98278a9fcb1f..33940bdc18a9 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -275,7 +275,17 @@ struct nvm_lun {
 
 	spinlock_t lock;
 
+	/* lun block lists */
+	struct list_head used_list;	/* In-use blocks */
+	struct list_head free_list;	/* Not used blocks i.e. released
+					 * and ready for use
+					 */
+	struct list_head bb_list;	/* Bad blocks. Mutually exclusive with
+					 * free_list and used_list
+					 */
 	unsigned int nr_free_blocks;	/* Number of unused blocks */
+	int reserved_blocks;
+
 	struct nvm_block *blocks;
 };
 
-- 
cgit v1.2.3


From 8176117b82e49e043d045f214ba7a892fba6b827 Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:05 +0100
Subject: lightnvm: manage lun partitions internally in mm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LUNs are exclusively owned by targets implementing a block device FTL.
Doing this reservation requires at the moment a 2-way callback gennvm
<-> target. The reason behind this is that LUNs were not assumed to
always be exclusively owned by targets. However, this design decision
goes against I/O determinism QoS (two targets would mix I/O on the same
parallel unit in the device).

This patch makes LUN reservation as part of the target creation on the
media manager. This makes that LUNs are always exclusively owned by the
target instantiated on top of them. LUN stripping and/or sharing should
be implemented on the target itself or the layers on top.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/gennvm.c | 62 +++++++++++++++++++++++++++++++++++++----------
 drivers/lightnvm/rrpc.c   |  7 ------
 include/linux/lightnvm.h  |  6 ++---
 3 files changed, 51 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 3572ebbb50ce..9671e11356be 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -35,6 +35,45 @@ static const struct block_device_operations gen_fops = {
 	.owner		= THIS_MODULE,
 };
 
+static int gen_reserve_luns(struct nvm_dev *dev, struct nvm_target *t,
+			    int lun_begin, int lun_end)
+{
+	struct gen_dev *gn = dev->mp;
+	struct nvm_lun *lun;
+	int i;
+
+	for (i = lun_begin; i <= lun_end; i++) {
+		if (test_and_set_bit(i, dev->lun_map)) {
+			pr_err("nvm: lun %d already allocated\n", i);
+			goto err;
+		}
+
+		lun = &gn->luns[i];
+		list_add_tail(&lun->list, &t->lun_list);
+	}
+
+	return 0;
+
+err:
+	while (--i > lun_begin) {
+		lun = &gn->luns[i];
+		clear_bit(i, dev->lun_map);
+		list_del(&lun->list);
+	}
+
+	return -EBUSY;
+}
+
+static void gen_release_luns(struct nvm_dev *dev, struct nvm_target *t)
+{
+	struct nvm_lun *lun, *tmp;
+
+	list_for_each_entry_safe(lun, tmp, &t->lun_list, list) {
+		WARN_ON(!test_and_clear_bit(lun->id, dev->lun_map));
+		list_del(&lun->list);
+	}
+}
+
 static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 {
 	struct gen_dev *gn = dev->mp;
@@ -64,9 +103,14 @@ static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	if (!t)
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&t->lun_list);
+
+	if (gen_reserve_luns(dev, t, s->lun_begin, s->lun_end))
+		goto err_t;
+
 	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
 	if (!tqueue)
-		goto err_t;
+		goto err_reserve;
 	blk_queue_make_request(tqueue, tt->make_rq);
 
 	tdisk = alloc_disk(0);
@@ -105,6 +149,8 @@ err_init:
 	put_disk(tdisk);
 err_queue:
 	blk_cleanup_queue(tqueue);
+err_reserve:
+	gen_release_luns(dev, t);
 err_t:
 	kfree(t);
 	return -ENOMEM;
@@ -122,6 +168,7 @@ static void __gen_remove_target(struct nvm_target *t)
 	if (tt->exit)
 		tt->exit(tdisk->private_data);
 
+	gen_release_luns(t->dev, t);
 	put_disk(tdisk);
 
 	list_del(&t->list);
@@ -253,6 +300,7 @@ static int gen_luns_init(struct nvm_dev *dev, struct gen_dev *gn)
 		INIT_LIST_HEAD(&lun->free_list);
 		INIT_LIST_HEAD(&lun->used_list);
 		INIT_LIST_HEAD(&lun->bb_list);
+		INIT_LIST_HEAD(&lun->list);
 
 		spin_lock_init(&lun->lock);
 
@@ -569,16 +617,6 @@ static int gen_erase_blk(struct nvm_dev *dev, struct nvm_block *blk, int flags)
 	return nvm_erase_ppa(dev, &addr, 1, flags);
 }
 
-static int gen_reserve_lun(struct nvm_dev *dev, int lunid)
-{
-	return test_and_set_bit(lunid, dev->lun_map);
-}
-
-static void gen_release_lun(struct nvm_dev *dev, int lunid)
-{
-	WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
-}
-
 static struct nvm_lun *gen_get_lun(struct nvm_dev *dev, int lunid)
 {
 	struct gen_dev *gn = dev->mp;
@@ -625,8 +663,6 @@ static struct nvmm_type gen = {
 	.mark_blk		= gen_mark_blk,
 
 	.get_lun		= gen_get_lun,
-	.reserve_lun		= gen_reserve_lun,
-	.release_lun		= gen_release_lun,
 	.lun_info_print		= gen_lun_info_print,
 
 	.get_area		= gen_get_area,
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 34d2ebf90ed6..88e0d0677b09 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -1126,7 +1126,6 @@ static void rrpc_core_free(struct rrpc *rrpc)
 
 static void rrpc_luns_free(struct rrpc *rrpc)
 {
-	struct nvm_dev *dev = rrpc->dev;
 	struct nvm_lun *lun;
 	struct rrpc_lun *rlun;
 	int i;
@@ -1139,7 +1138,6 @@ static void rrpc_luns_free(struct rrpc *rrpc)
 		lun = rlun->parent;
 		if (!lun)
 			break;
-		dev->mt->release_lun(dev, lun->id);
 		vfree(rlun->blocks);
 	}
 
@@ -1169,11 +1167,6 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 		int lunid = lun_begin + i;
 		struct nvm_lun *lun;
 
-		if (dev->mt->reserve_lun(dev, lunid)) {
-			pr_err("rrpc: lun %u is already allocated\n", lunid);
-			goto err;
-		}
-
 		lun = dev->mt->get_lun(dev, lunid);
 		if (!lun)
 			goto err;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 33940bdc18a9..89c695483d55 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -210,6 +210,7 @@ struct nvm_id {
 
 struct nvm_target {
 	struct list_head list;
+	struct list_head lun_list;
 	struct nvm_dev *dev;
 	struct nvm_tgt_type *type;
 	struct gendisk *disk;
@@ -273,6 +274,7 @@ struct nvm_lun {
 	int lun_id;
 	int chnl_id;
 
+	struct list_head list;
 	spinlock_t lock;
 
 	/* lun block lists */
@@ -521,8 +523,6 @@ typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
-typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
-typedef void (nvmm_release_lun)(struct nvm_dev *, int);
 typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
@@ -550,8 +550,6 @@ struct nvmm_type {
 
 	/* Configuration management */
 	nvmm_get_lun_fn *get_lun;
-	nvmm_reserve_lun *reserve_lun;
-	nvmm_release_lun *release_lun;
 
 	/* Statistics */
 	nvmm_lun_info_print_fn *lun_info_print;
-- 
cgit v1.2.3


From 8e79b5cb1d3b8eceaf6862995952dd4de431dd99 Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:06 +0100
Subject: lightnvm: move block provisioning to targets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to naturally support multi-target instances on an Open-Channel
SSD, targets should own the LUNs they get blocks from and manage
provisioning internally. This is done in several steps.

This patch moves the block provisioning inside of the target and removes
the get/put block interface from the media manager.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c      |  97 ++++++++++-----------
 drivers/lightnvm/gennvm.c    | 145 +++++++++++++++-----------------
 drivers/lightnvm/rrpc.c      | 196 ++++++++++++++++++++++++++++---------------
 drivers/lightnvm/rrpc.h      |  15 +++-
 drivers/lightnvm/sysblk.c    |  65 +++++++-------
 drivers/nvme/host/lightnvm.c |  11 ++-
 include/linux/lightnvm.h     | 133 +++++++++++++++--------------
 7 files changed, 371 insertions(+), 291 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index d4433d3b4855..493ce034e687 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -176,20 +176,6 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name)
 	return NULL;
 }
 
-struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun,
-							unsigned long flags)
-{
-	return dev->mt->get_blk(dev, lun, flags);
-}
-EXPORT_SYMBOL(nvm_get_blk);
-
-/* Assumes that all valid pages have already been moved on release to bm */
-void nvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
-{
-	return dev->mt->put_blk(dev, blk);
-}
-EXPORT_SYMBOL(nvm_put_blk);
-
 void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
 {
 	return dev->mt->mark_blk(dev, ppa, type);
@@ -266,10 +252,11 @@ EXPORT_SYMBOL(nvm_generic_to_addr_mode);
 int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
 			const struct ppa_addr *ppas, int nr_ppas, int vblk)
 {
+	struct nvm_geo *geo = &dev->geo;
 	int i, plane_cnt, pl_idx;
 	struct ppa_addr ppa;
 
-	if ((!vblk || dev->plane_mode == NVM_PLANE_SINGLE) && nr_ppas == 1) {
+	if ((!vblk || geo->plane_mode == NVM_PLANE_SINGLE) && nr_ppas == 1) {
 		rqd->nr_ppas = nr_ppas;
 		rqd->ppa_addr = ppas[0];
 
@@ -287,7 +274,7 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
 		for (i = 0; i < nr_ppas; i++)
 			rqd->ppa_list[i] = ppas[i];
 	} else {
-		plane_cnt = dev->plane_mode;
+		plane_cnt = geo->plane_mode;
 		rqd->nr_ppas *= plane_cnt;
 
 		for (i = 0; i < nr_ppas; i++) {
@@ -465,17 +452,18 @@ EXPORT_SYMBOL(nvm_submit_ppa);
  */
 int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
 {
+	struct nvm_geo *geo = &dev->geo;
 	int blk, offset, pl, blktype;
 
-	if (nr_blks != dev->blks_per_lun * dev->plane_mode)
+	if (nr_blks != geo->blks_per_lun * geo->plane_mode)
 		return -EINVAL;
 
-	for (blk = 0; blk < dev->blks_per_lun; blk++) {
-		offset = blk * dev->plane_mode;
+	for (blk = 0; blk < geo->blks_per_lun; blk++) {
+		offset = blk * geo->plane_mode;
 		blktype = blks[offset];
 
 		/* Bad blocks on any planes take precedence over other types */
-		for (pl = 0; pl < dev->plane_mode; pl++) {
+		for (pl = 0; pl < geo->plane_mode; pl++) {
 			if (blks[offset + pl] &
 					(NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
 				blktype = blks[offset + pl];
@@ -486,7 +474,7 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
 		blks[blk] = blktype;
 	}
 
-	return dev->blks_per_lun;
+	return geo->blks_per_lun;
 }
 EXPORT_SYMBOL(nvm_bb_tbl_fold);
 
@@ -500,9 +488,10 @@ EXPORT_SYMBOL(nvm_get_bb_tbl);
 
 static int nvm_init_slc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
 {
+	struct nvm_geo *geo = &dev->geo;
 	int i;
 
-	dev->lps_per_blk = dev->pgs_per_blk;
+	dev->lps_per_blk = geo->pgs_per_blk;
 	dev->lptbl = kcalloc(dev->lps_per_blk, sizeof(int), GFP_KERNEL);
 	if (!dev->lptbl)
 		return -ENOMEM;
@@ -548,29 +537,32 @@ static int nvm_core_init(struct nvm_dev *dev)
 {
 	struct nvm_id *id = &dev->identity;
 	struct nvm_id_group *grp = &id->groups[0];
+	struct nvm_geo *geo = &dev->geo;
 	int ret;
 
-	/* device values */
-	dev->nr_chnls = grp->num_ch;
-	dev->luns_per_chnl = grp->num_lun;
-	dev->pgs_per_blk = grp->num_pg;
-	dev->blks_per_lun = grp->num_blk;
-	dev->nr_planes = grp->num_pln;
-	dev->fpg_size = grp->fpg_sz;
-	dev->pfpg_size = grp->fpg_sz * grp->num_pln;
-	dev->sec_size = grp->csecs;
-	dev->oob_size = grp->sos;
-	dev->sec_per_pg = grp->fpg_sz / grp->csecs;
-	dev->mccap = grp->mccap;
-	memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
-
-	dev->plane_mode = NVM_PLANE_SINGLE;
-	dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size;
+	/* Whole device values */
+	geo->nr_chnls = grp->num_ch;
+	geo->luns_per_chnl = grp->num_lun;
+
+	/* Generic device values */
+	geo->pgs_per_blk = grp->num_pg;
+	geo->blks_per_lun = grp->num_blk;
+	geo->nr_planes = grp->num_pln;
+	geo->fpg_size = grp->fpg_sz;
+	geo->pfpg_size = grp->fpg_sz * grp->num_pln;
+	geo->sec_size = grp->csecs;
+	geo->oob_size = grp->sos;
+	geo->sec_per_pg = grp->fpg_sz / grp->csecs;
+	geo->mccap = grp->mccap;
+	memcpy(&geo->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
+
+	geo->plane_mode = NVM_PLANE_SINGLE;
+	geo->max_rq_size = dev->ops->max_phys_sect * geo->sec_size;
 
 	if (grp->mpos & 0x020202)
-		dev->plane_mode = NVM_PLANE_DOUBLE;
+		geo->plane_mode = NVM_PLANE_DOUBLE;
 	if (grp->mpos & 0x040404)
-		dev->plane_mode = NVM_PLANE_QUAD;
+		geo->plane_mode = NVM_PLANE_QUAD;
 
 	if (grp->mtype != 0) {
 		pr_err("nvm: memory type not supported\n");
@@ -578,13 +570,13 @@ static int nvm_core_init(struct nvm_dev *dev)
 	}
 
 	/* calculated values */
-	dev->sec_per_pl = dev->sec_per_pg * dev->nr_planes;
-	dev->sec_per_blk = dev->sec_per_pl * dev->pgs_per_blk;
-	dev->sec_per_lun = dev->sec_per_blk * dev->blks_per_lun;
-	dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
+	geo->sec_per_pl = geo->sec_per_pg * geo->nr_planes;
+	geo->sec_per_blk = geo->sec_per_pl * geo->pgs_per_blk;
+	geo->sec_per_lun = geo->sec_per_blk * geo->blks_per_lun;
+	geo->nr_luns = geo->luns_per_chnl * geo->nr_chnls;
 
-	dev->total_secs = dev->nr_luns * dev->sec_per_lun;
-	dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
+	dev->total_secs = geo->nr_luns * geo->sec_per_lun;
+	dev->lun_map = kcalloc(BITS_TO_LONGS(geo->nr_luns),
 					sizeof(unsigned long), GFP_KERNEL);
 	if (!dev->lun_map)
 		return -ENOMEM;
@@ -611,7 +603,7 @@ static int nvm_core_init(struct nvm_dev *dev)
 	mutex_init(&dev->mlock);
 	spin_lock_init(&dev->lock);
 
-	blk_queue_logical_block_size(dev->q, dev->sec_size);
+	blk_queue_logical_block_size(dev->q, geo->sec_size);
 
 	return 0;
 err_fmtype:
@@ -645,6 +637,7 @@ void nvm_free(struct nvm_dev *dev)
 
 static int nvm_init(struct nvm_dev *dev)
 {
+	struct nvm_geo *geo = &dev->geo;
 	int ret = -EINVAL;
 
 	if (!dev->q || !dev->ops)
@@ -676,9 +669,9 @@ static int nvm_init(struct nvm_dev *dev)
 	}
 
 	pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
-			dev->name, dev->sec_per_pg, dev->nr_planes,
-			dev->pgs_per_blk, dev->blks_per_lun, dev->nr_luns,
-			dev->nr_chnls);
+			dev->name, geo->sec_per_pg, geo->nr_planes,
+			geo->pgs_per_blk, geo->blks_per_lun,
+			geo->nr_luns, geo->nr_chnls);
 	return 0;
 err:
 	pr_err("nvm: failed to initialize nvm\n");
@@ -771,9 +764,9 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
 	}
 	s = &create->conf.s;
 
-	if (s->lun_begin > s->lun_end || s->lun_end > dev->nr_luns) {
+	if (s->lun_begin > s->lun_end || s->lun_end > dev->geo.nr_luns) {
 		pr_err("nvm: lun out of bound (%u:%u > %u)\n",
-			s->lun_begin, s->lun_end, dev->nr_luns);
+			s->lun_begin, s->lun_end, dev->geo.nr_luns);
 		return -EINVAL;
 	}
 
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 9671e11356be..8791a2aaa9e3 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -74,6 +74,36 @@ static void gen_release_luns(struct nvm_dev *dev, struct nvm_target *t)
 	}
 }
 
+static void gen_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
+{
+	kfree(tgt_dev);
+}
+
+static struct nvm_tgt_dev *gen_create_tgt_dev(struct nvm_dev *dev,
+					      int lun_begin, int lun_end)
+{
+	struct nvm_tgt_dev *tgt_dev = NULL;
+	int nr_luns = lun_end - lun_begin + 1;
+
+	tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
+	if (!tgt_dev)
+		goto out;
+
+	memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
+	tgt_dev->geo.nr_chnls = (nr_luns / (dev->geo.luns_per_chnl + 1)) + 1;
+	tgt_dev->geo.nr_luns = nr_luns;
+	tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun;
+	tgt_dev->q = dev->q;
+	tgt_dev->ops = dev->ops;
+	tgt_dev->mt = dev->mt;
+	memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
+
+	tgt_dev->parent = dev;
+
+out:
+	return tgt_dev;
+}
+
 static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 {
 	struct gen_dev *gn = dev->mp;
@@ -82,6 +112,7 @@ static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	struct gendisk *tdisk;
 	struct nvm_tgt_type *tt;
 	struct nvm_target *t;
+	struct nvm_tgt_dev *tgt_dev;
 	void *targetdata;
 
 	tt = nvm_find_target_type(create->tgttype, 1);
@@ -108,9 +139,13 @@ static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	if (gen_reserve_luns(dev, t, s->lun_begin, s->lun_end))
 		goto err_t;
 
+	tgt_dev = gen_create_tgt_dev(dev, s->lun_begin, s->lun_end);
+	if (!tgt_dev)
+		goto err_reserve;
+
 	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
 	if (!tqueue)
-		goto err_reserve;
+		goto err_dev;
 	blk_queue_make_request(tqueue, tt->make_rq);
 
 	tdisk = alloc_disk(0);
@@ -124,7 +159,7 @@ static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	tdisk->fops = &gen_fops;
 	tdisk->queue = tqueue;
 
-	targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end);
+	targetdata = tt->init(tgt_dev, tdisk, s->lun_begin, s->lun_end);
 	if (IS_ERR(targetdata))
 		goto err_init;
 
@@ -138,7 +173,7 @@ static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 
 	t->type = tt;
 	t->disk = tdisk;
-	t->dev = dev;
+	t->dev = tgt_dev;
 
 	mutex_lock(&gn->lock);
 	list_add_tail(&t->list, &gn->targets);
@@ -149,6 +184,8 @@ err_init:
 	put_disk(tdisk);
 err_queue:
 	blk_cleanup_queue(tqueue);
+err_dev:
+	kfree(tgt_dev);
 err_reserve:
 	gen_release_luns(dev, t);
 err_t:
@@ -168,7 +205,8 @@ static void __gen_remove_target(struct nvm_target *t)
 	if (tt->exit)
 		tt->exit(tdisk->private_data);
 
-	gen_release_luns(t->dev, t);
+	gen_release_luns(t->dev->parent, t);
+	gen_remove_tgt_dev(t->dev);
 	put_disk(tdisk);
 
 	list_del(&t->list);
@@ -207,10 +245,11 @@ static int gen_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
 
 static int gen_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct gen_dev *gn = dev->mp;
 	struct gen_area *area, *prev, *next;
 	sector_t begin = 0;
-	sector_t max_sectors = (dev->sec_size * dev->total_secs) >> 9;
+	sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9;
 
 	if (len > max_sectors)
 		return -EINVAL;
@@ -289,10 +328,11 @@ static void gen_luns_free(struct nvm_dev *dev)
 
 static int gen_luns_init(struct nvm_dev *dev, struct gen_dev *gn)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct nvm_lun *lun;
 	int i;
 
-	gn->luns = kcalloc(dev->nr_luns, sizeof(struct nvm_lun), GFP_KERNEL);
+	gn->luns = kcalloc(geo->nr_luns, sizeof(struct nvm_lun), GFP_KERNEL);
 	if (!gn->luns)
 		return -ENOMEM;
 
@@ -305,9 +345,9 @@ static int gen_luns_init(struct nvm_dev *dev, struct gen_dev *gn)
 		spin_lock_init(&lun->lock);
 
 		lun->id = i;
-		lun->lun_id = i % dev->luns_per_chnl;
-		lun->chnl_id = i / dev->luns_per_chnl;
-		lun->nr_free_blocks = dev->blks_per_lun;
+		lun->lun_id = i % geo->luns_per_chnl;
+		lun->chnl_id = i / geo->luns_per_chnl;
+		lun->nr_free_blocks = geo->blks_per_lun;
 	}
 	return 0;
 }
@@ -324,7 +364,7 @@ static int gen_block_bb(struct gen_dev *gn, struct ppa_addr ppa,
 	if (nr_blks < 0)
 		return nr_blks;
 
-	lun = &gn->luns[(dev->luns_per_chnl * ppa.g.ch) + ppa.g.lun];
+	lun = &gn->luns[(dev->geo.luns_per_chnl * ppa.g.ch) + ppa.g.lun];
 
 	for (i = 0; i < nr_blks; i++) {
 		if (blks[i] == NVM_BLK_T_FREE)
@@ -342,6 +382,7 @@ static int gen_block_bb(struct gen_dev *gn, struct ppa_addr ppa,
 static int gen_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 {
 	struct nvm_dev *dev = private;
+	struct nvm_geo *geo = &dev->geo;
 	struct gen_dev *gn = dev->mp;
 	u64 elba = slba + nlb;
 	struct nvm_lun *lun;
@@ -370,12 +411,12 @@ static int gen_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 			continue;
 
 		/* resolve block from physical address */
-		lun_id = div_u64(pba, dev->sec_per_lun);
+		lun_id = div_u64(pba, geo->sec_per_lun);
 		lun = &gn->luns[lun_id];
 
 		/* Calculate block offset into lun */
-		pba = pba - (dev->sec_per_lun * lun_id);
-		blk = &lun->blocks[div_u64(pba, dev->sec_per_blk)];
+		pba = pba - (geo->sec_per_lun * lun_id);
+		blk = &lun->blocks[div_u64(pba, geo->sec_per_blk)];
 
 		if (!blk->state) {
 			/* at this point, we don't know anything about the
@@ -393,26 +434,27 @@ static int gen_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 
 static int gen_blocks_init(struct nvm_dev *dev, struct gen_dev *gn)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct nvm_lun *lun;
 	struct nvm_block *block;
 	sector_t lun_iter, blk_iter, cur_block_id = 0;
 	int ret, nr_blks;
 	u8 *blks;
 
-	nr_blks = dev->blks_per_lun * dev->plane_mode;
+	nr_blks = geo->blks_per_lun * geo->plane_mode;
 	blks = kmalloc(nr_blks, GFP_KERNEL);
 	if (!blks)
 		return -ENOMEM;
 
 	gen_for_each_lun(gn, lun, lun_iter) {
 		lun->blocks = vzalloc(sizeof(struct nvm_block) *
-							dev->blks_per_lun);
+							geo->blks_per_lun);
 		if (!lun->blocks) {
 			kfree(blks);
 			return -ENOMEM;
 		}
 
-		for (blk_iter = 0; blk_iter < dev->blks_per_lun; blk_iter++) {
+		for (blk_iter = 0; blk_iter < geo->blks_per_lun; blk_iter++) {
 			block = &lun->blocks[blk_iter];
 
 			INIT_LIST_HEAD(&block->list);
@@ -474,7 +516,7 @@ static int gen_register(struct nvm_dev *dev)
 		return -ENOMEM;
 
 	gn->dev = dev;
-	gn->nr_luns = dev->nr_luns;
+	gn->nr_luns = dev->geo.nr_luns;
 	INIT_LIST_HEAD(&gn->area_list);
 	mutex_init(&gn->lock);
 	INIT_LIST_HEAD(&gn->targets);
@@ -506,7 +548,7 @@ static void gen_unregister(struct nvm_dev *dev)
 
 	mutex_lock(&gn->lock);
 	list_for_each_entry_safe(t, tmp, &gn->targets, list) {
-		if (t->dev != dev)
+		if (t->dev->parent != dev)
 			continue;
 		__gen_remove_target(t);
 	}
@@ -516,55 +558,9 @@ static void gen_unregister(struct nvm_dev *dev)
 	module_put(THIS_MODULE);
 }
 
-static struct nvm_block *gen_get_blk(struct nvm_dev *dev,
-				struct nvm_lun *lun, unsigned long flags)
-{
-	struct nvm_block *blk = NULL;
-	int is_gc = flags & NVM_IOTYPE_GC;
-
-	spin_lock(&lun->lock);
-	if (list_empty(&lun->free_list)) {
-		pr_err_ratelimited("gen: lun %u have no free pages available",
-								lun->id);
-		goto out;
-	}
-
-	if (!is_gc && lun->nr_free_blocks < lun->reserved_blocks)
-		goto out;
-
-	blk = list_first_entry(&lun->free_list, struct nvm_block, list);
-
-	list_move_tail(&blk->list, &lun->used_list);
-	blk->state = NVM_BLK_ST_TGT;
-	lun->nr_free_blocks--;
-out:
-	spin_unlock(&lun->lock);
-	return blk;
-}
-
-static void gen_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
-{
-	struct nvm_lun *lun = blk->lun;
-
-	spin_lock(&lun->lock);
-	if (blk->state & NVM_BLK_ST_TGT) {
-		list_move_tail(&blk->list, &lun->free_list);
-		lun->nr_free_blocks++;
-		blk->state = NVM_BLK_ST_FREE;
-	} else if (blk->state & NVM_BLK_ST_BAD) {
-		list_move_tail(&blk->list, &lun->bb_list);
-		blk->state = NVM_BLK_ST_BAD;
-	} else {
-		WARN_ON_ONCE(1);
-		pr_err("gen: erroneous block type (%lu -> %u)\n",
-							blk->id, blk->state);
-		list_move_tail(&blk->list, &lun->bb_list);
-	}
-	spin_unlock(&lun->lock);
-}
-
 static void gen_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct gen_dev *gn = dev->mp;
 	struct nvm_lun *lun;
 	struct nvm_block *blk;
@@ -572,18 +568,18 @@ static void gen_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
 	pr_debug("gen: ppa  (ch: %u lun: %u blk: %u pg: %u) -> %u\n",
 			ppa.g.ch, ppa.g.lun, ppa.g.blk, ppa.g.pg, type);
 
-	if (unlikely(ppa.g.ch > dev->nr_chnls ||
-					ppa.g.lun > dev->luns_per_chnl ||
-					ppa.g.blk > dev->blks_per_lun)) {
+	if (unlikely(ppa.g.ch > geo->nr_chnls ||
+					ppa.g.lun > geo->luns_per_chnl ||
+					ppa.g.blk > geo->blks_per_lun)) {
 		WARN_ON_ONCE(1);
 		pr_err("gen: ppa broken (ch: %u > %u lun: %u > %u blk: %u > %u",
-				ppa.g.ch, dev->nr_chnls,
-				ppa.g.lun, dev->luns_per_chnl,
-				ppa.g.blk, dev->blks_per_lun);
+				ppa.g.ch, geo->nr_chnls,
+				ppa.g.lun, geo->luns_per_chnl,
+				ppa.g.blk, geo->blks_per_lun);
 		return;
 	}
 
-	lun = &gn->luns[(dev->luns_per_chnl * ppa.g.ch) + ppa.g.lun];
+	lun = &gn->luns[(geo->luns_per_chnl * ppa.g.ch) + ppa.g.lun];
 	blk = &lun->blocks[ppa.g.blk];
 
 	/* will be moved to bb list on put_blk from target */
@@ -621,7 +617,7 @@ static struct nvm_lun *gen_get_lun(struct nvm_dev *dev, int lunid)
 {
 	struct gen_dev *gn = dev->mp;
 
-	if (unlikely(lunid >= dev->nr_luns))
+	if (unlikely(lunid >= dev->geo.nr_luns))
 		return NULL;
 
 	return &gn->luns[lunid];
@@ -654,9 +650,6 @@ static struct nvmm_type gen = {
 	.create_tgt		= gen_create_tgt,
 	.remove_tgt		= gen_remove_tgt,
 
-	.get_blk		= gen_get_blk,
-	.put_blk		= gen_put_blk,
-
 	.submit_io		= gen_submit_io,
 	.erase_blk		= gen_erase_blk,
 
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 88e0d0677b09..5377c7a987aa 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -28,6 +28,7 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
 
 static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
 {
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_block *rblk = a->rblk;
 	unsigned int pg_offset;
 
@@ -38,7 +39,7 @@ static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
 
 	spin_lock(&rblk->lock);
 
-	div_u64_rem(a->addr, rrpc->dev->sec_per_blk, &pg_offset);
+	div_u64_rem(a->addr, dev->geo.sec_per_blk, &pg_offset);
 	WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
 	rblk->nr_invalid_pages++;
 
@@ -116,32 +117,36 @@ static void rrpc_discard(struct rrpc *rrpc, struct bio *bio)
 
 static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
-	return (rblk->next_page == rrpc->dev->sec_per_blk);
+	struct nvm_tgt_dev *dev = rrpc->dev;
+
+	return (rblk->next_page == dev->geo.sec_per_blk);
 }
 
 /* Calculate relative addr for the given block, considering instantiated LUNs */
 static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct nvm_block *blk = rblk->parent;
-	int lun_blk = blk->id % (rrpc->dev->blks_per_lun * rrpc->nr_luns);
+	int lun_blk = blk->id % (dev->geo.blks_per_lun * rrpc->nr_luns);
 
-	return lun_blk * rrpc->dev->sec_per_blk;
+	return lun_blk * dev->geo.sec_per_blk;
 }
 
 /* Calculate global addr for the given block */
 static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct nvm_block *blk = rblk->parent;
 
-	return blk->id * rrpc->dev->sec_per_blk;
+	return blk->id * dev->geo.sec_per_blk;
 }
 
-static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
+static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_tgt_dev *dev, u64 addr)
 {
 	struct ppa_addr paddr;
 
 	paddr.ppa = addr;
-	return linear_to_generic_addr(dev, paddr);
+	return linear_to_generic_addr(&dev->geo, paddr);
 }
 
 /* requires lun->lock taken */
@@ -158,21 +163,52 @@ static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *new_rblk,
 	*cur_rblk = new_rblk;
 }
 
+static struct nvm_block *__rrpc_get_blk(struct rrpc *rrpc,
+							struct rrpc_lun *rlun)
+{
+	struct nvm_lun *lun = rlun->parent;
+	struct nvm_block *blk = NULL;
+
+	if (list_empty(&lun->free_list))
+		goto out;
+
+	blk = list_first_entry(&lun->free_list, struct nvm_block, list);
+
+	list_move_tail(&blk->list, &lun->used_list);
+	blk->state = NVM_BLK_ST_TGT;
+	lun->nr_free_blocks--;
+
+out:
+	return blk;
+}
+
 static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
 							unsigned long flags)
 {
+	struct nvm_tgt_dev *dev = rrpc->dev;
+	struct nvm_lun *lun = rlun->parent;
 	struct nvm_block *blk;
 	struct rrpc_block *rblk;
+	int is_gc = flags & NVM_IOTYPE_GC;
+
+	spin_lock(&rlun->lock);
+	if (!is_gc && lun->nr_free_blocks < rlun->reserved_blocks) {
+		pr_err("nvm: rrpc: cannot give block to non GC request\n");
+		spin_unlock(&rlun->lock);
+		return NULL;
+	}
 
-	blk = nvm_get_blk(rrpc->dev, rlun->parent, flags);
+	blk = __rrpc_get_blk(rrpc, rlun);
 	if (!blk) {
-		pr_err("nvm: rrpc: cannot get new block from media manager\n");
+		pr_err("nvm: rrpc: cannot get new block\n");
+		spin_unlock(&rlun->lock);
 		return NULL;
 	}
+	spin_unlock(&rlun->lock);
 
 	rblk = rrpc_get_rblk(rlun, blk->id);
 	blk->priv = rblk;
-	bitmap_zero(rblk->invalid_pages, rrpc->dev->sec_per_blk);
+	bitmap_zero(rblk->invalid_pages, dev->geo.sec_per_blk);
 	rblk->next_page = 0;
 	rblk->nr_invalid_pages = 0;
 	atomic_set(&rblk->data_cmnt_size, 0);
@@ -182,7 +218,25 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
 
 static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
-	nvm_put_blk(rrpc->dev, rblk->parent);
+	struct nvm_block *blk = rblk->parent;
+	struct rrpc_lun *rlun = rblk->rlun;
+	struct nvm_lun *lun = rlun->parent;
+
+	spin_lock(&rlun->lock);
+	if (blk->state & NVM_BLK_ST_TGT) {
+		list_move_tail(&blk->list, &lun->free_list);
+		lun->nr_free_blocks++;
+		blk->state = NVM_BLK_ST_FREE;
+	} else if (blk->state & NVM_BLK_ST_BAD) {
+		list_move_tail(&blk->list, &lun->bb_list);
+		blk->state = NVM_BLK_ST_BAD;
+	} else {
+		WARN_ON_ONCE(1);
+		pr_err("rrpc: erroneous block type (%lu -> %u)\n",
+							blk->id, blk->state);
+		list_move_tail(&blk->list, &lun->bb_list);
+	}
+	spin_unlock(&rlun->lock);
 }
 
 static void rrpc_put_blks(struct rrpc *rrpc)
@@ -250,13 +304,14 @@ static void rrpc_end_sync_bio(struct bio *bio)
  */
 static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
-	struct request_queue *q = rrpc->dev->q;
+	struct nvm_tgt_dev *dev = rrpc->dev;
+	struct request_queue *q = dev->q;
 	struct rrpc_rev_addr *rev;
 	struct nvm_rq *rqd;
 	struct bio *bio;
 	struct page *page;
 	int slot;
-	int nr_sec_per_blk = rrpc->dev->sec_per_blk;
+	int nr_sec_per_blk = dev->geo.sec_per_blk;
 	u64 phys_addr;
 	DECLARE_COMPLETION_ONSTACK(wait);
 
@@ -366,7 +421,7 @@ static void rrpc_block_gc(struct work_struct *work)
 	struct rrpc *rrpc = gcb->rrpc;
 	struct rrpc_block *rblk = gcb->rblk;
 	struct rrpc_lun *rlun = rblk->rlun;
-	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 
 	mempool_free(gcb, rrpc->gcb_pool);
 	pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
@@ -374,7 +429,7 @@ static void rrpc_block_gc(struct work_struct *work)
 	if (rrpc_move_valid_pages(rrpc, rblk))
 		goto put_back;
 
-	if (nvm_erase_blk(dev, rblk->parent, 0))
+	if (nvm_erase_blk(dev->parent, rblk->parent, 0))
 		goto put_back;
 
 	rrpc_put_blk(rrpc, rblk);
@@ -420,11 +475,12 @@ static void rrpc_lun_gc(struct work_struct *work)
 {
 	struct rrpc_lun *rlun = container_of(work, struct rrpc_lun, ws_gc);
 	struct rrpc *rrpc = rlun->rrpc;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct nvm_lun *lun = rlun->parent;
 	struct rrpc_block_gc *gcb;
 	unsigned int nr_blocks_need;
 
-	nr_blocks_need = rrpc->dev->blks_per_lun / GC_LIMIT_INVERSE;
+	nr_blocks_need = dev->geo.blks_per_lun / GC_LIMIT_INVERSE;
 
 	if (nr_blocks_need < rrpc->nr_luns)
 		nr_blocks_need = rrpc->nr_luns;
@@ -645,15 +701,15 @@ static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
 	queue_work(rrpc->kgc_wq, &gcb->ws_gc);
 }
 
-static void __rrpc_mark_bad_block(struct nvm_dev *dev, struct ppa_addr *ppa)
+static void __rrpc_mark_bad_block(struct nvm_tgt_dev *dev, struct ppa_addr *ppa)
 {
-		nvm_mark_blk(dev, *ppa, NVM_BLK_ST_BAD);
-		nvm_set_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
+		nvm_mark_blk(dev->parent, *ppa, NVM_BLK_ST_BAD);
+		nvm_set_bb_tbl(dev->parent, ppa, 1, NVM_BLK_T_GRWN_BAD);
 }
 
 static void rrpc_mark_bad_block(struct rrpc *rrpc, struct nvm_rq *rqd)
 {
-	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	void *comp_bits = &rqd->ppa_status;
 	struct ppa_addr ppa, prev_ppa;
 	int nr_ppas = rqd->nr_ppas;
@@ -676,6 +732,7 @@ static void rrpc_mark_bad_block(struct rrpc *rrpc, struct nvm_rq *rqd)
 static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
 						sector_t laddr, uint8_t npages)
 {
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_addr *p;
 	struct rrpc_block *rblk;
 	struct nvm_lun *lun;
@@ -687,7 +744,7 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
 		lun = rblk->parent->lun;
 
 		cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
-		if (unlikely(cmnt_size == rrpc->dev->sec_per_blk))
+		if (unlikely(cmnt_size == dev->geo.sec_per_blk))
 			rrpc_run_gc(rrpc, rblk);
 	}
 }
@@ -695,6 +752,7 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
 static void rrpc_end_io(struct nvm_rq *rqd)
 {
 	struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
 	uint8_t npages = rqd->nr_ppas;
 	sector_t laddr = rrpc_get_laddr(rqd->bio) - npages;
@@ -714,7 +772,7 @@ static void rrpc_end_io(struct nvm_rq *rqd)
 	rrpc_unlock_rq(rrpc, rqd);
 
 	if (npages > 1)
-		nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
+		nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
 
 	mempool_free(rqd, rrpc->rq_pool);
 }
@@ -722,6 +780,7 @@ static void rrpc_end_io(struct nvm_rq *rqd)
 static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 			struct nvm_rq *rqd, unsigned long flags, int npages)
 {
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
 	struct rrpc_addr *gp;
 	sector_t laddr = rrpc_get_laddr(bio);
@@ -729,7 +788,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 	int i;
 
 	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
-		nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
+		nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
 		return NVM_IO_REQUEUE;
 	}
 
@@ -739,12 +798,11 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 		gp = &rrpc->trans_map[laddr + i];
 
 		if (gp->rblk) {
-			rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
-								gp->addr);
+			rqd->ppa_list[i] = rrpc_ppa_to_gaddr(dev, gp->addr);
 		} else {
 			BUG_ON(is_gc);
 			rrpc_unlock_laddr(rrpc, r);
-			nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
+			nvm_dev_dma_free(dev->parent, rqd->ppa_list,
 							rqd->dma_ppa_list);
 			return NVM_IO_DONE;
 		}
@@ -784,6 +842,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
 static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 			struct nvm_rq *rqd, unsigned long flags, int npages)
 {
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
 	struct rrpc_addr *p;
 	sector_t laddr = rrpc_get_laddr(bio);
@@ -791,7 +850,7 @@ static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 	int i;
 
 	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
-		nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
+		nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
 		return NVM_IO_REQUEUE;
 	}
 
@@ -801,14 +860,13 @@ static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 		if (!p) {
 			BUG_ON(is_gc);
 			rrpc_unlock_laddr(rrpc, r);
-			nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
+			nvm_dev_dma_free(dev->parent, rqd->ppa_list,
 							rqd->dma_ppa_list);
 			rrpc_gc_kick(rrpc);
 			return NVM_IO_REQUEUE;
 		}
 
-		rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
-								p->addr);
+		rqd->ppa_list[i] = rrpc_ppa_to_gaddr(dev, p->addr);
 	}
 
 	rqd->opcode = NVM_OP_HBWRITE;
@@ -843,8 +901,10 @@ static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio,
 static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio,
 			struct nvm_rq *rqd, unsigned long flags, uint8_t npages)
 {
+	struct nvm_tgt_dev *dev = rrpc->dev;
+
 	if (npages > 1) {
-		rqd->ppa_list = nvm_dev_dma_alloc(rrpc->dev, GFP_KERNEL,
+		rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
 							&rqd->dma_ppa_list);
 		if (!rqd->ppa_list) {
 			pr_err("rrpc: not able to allocate ppa list\n");
@@ -867,14 +927,15 @@ static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio,
 static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
 				struct nvm_rq *rqd, unsigned long flags)
 {
-	int err;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_rq *rrq = nvm_rq_to_pdu(rqd);
 	uint8_t nr_pages = rrpc_get_pages(bio);
 	int bio_size = bio_sectors(bio) << 9;
+	int err;
 
-	if (bio_size < rrpc->dev->sec_size)
+	if (bio_size < dev->geo.sec_size)
 		return NVM_IO_ERR;
-	else if (bio_size > rrpc->dev->max_rq_size)
+	else if (bio_size > dev->geo.max_rq_size)
 		return NVM_IO_ERR;
 
 	err = rrpc_setup_rq(rrpc, bio, rqd, flags, nr_pages);
@@ -887,15 +948,15 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
 	rqd->nr_ppas = nr_pages;
 	rrq->flags = flags;
 
-	err = nvm_submit_io(rrpc->dev, rqd);
+	err = nvm_submit_io(dev->parent, rqd);
 	if (err) {
 		pr_err("rrpc: I/O submission failed: %d\n", err);
 		bio_put(bio);
 		if (!(flags & NVM_IOTYPE_GC)) {
 			rrpc_unlock_rq(rrpc, rqd);
 			if (rqd->nr_ppas > 1)
-				nvm_dev_dma_free(rrpc->dev,
-			rqd->ppa_list, rqd->dma_ppa_list);
+				nvm_dev_dma_free(dev->parent,
+					rqd->ppa_list, rqd->dma_ppa_list);
 		}
 		return NVM_IO_ERR;
 	}
@@ -997,17 +1058,11 @@ static void rrpc_map_free(struct rrpc *rrpc)
 static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 {
 	struct rrpc *rrpc = (struct rrpc *)private;
-	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_addr *addr = rrpc->trans_map + slba;
 	struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
-	u64 elba = slba + nlb;
 	u64 i;
 
-	if (unlikely(elba > dev->total_secs)) {
-		pr_err("nvm: L2P data from device is out of bounds!\n");
-		return -EINVAL;
-	}
-
 	for (i = 0; i < nlb; i++) {
 		u64 pba = le64_to_cpu(entries[i]);
 		unsigned int mod;
@@ -1037,7 +1092,7 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 
 static int rrpc_map_init(struct rrpc *rrpc)
 {
-	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	sector_t i;
 	int ret;
 
@@ -1062,7 +1117,7 @@ static int rrpc_map_init(struct rrpc *rrpc)
 		return 0;
 
 	/* Bring up the mapping table from device */
-	ret = dev->ops->get_l2p_tbl(dev, rrpc->soffset, rrpc->nr_sects,
+	ret = dev->ops->get_l2p_tbl(dev->parent, rrpc->soffset, rrpc->nr_sects,
 					rrpc_l2p_update, rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not read L2P table.\n");
@@ -1102,7 +1157,7 @@ static int rrpc_core_init(struct rrpc *rrpc)
 	if (!rrpc->page_pool)
 		return -ENOMEM;
 
-	rrpc->gcb_pool = mempool_create_slab_pool(rrpc->dev->nr_luns,
+	rrpc->gcb_pool = mempool_create_slab_pool(rrpc->dev->geo.nr_luns,
 								rrpc_gcb_cache);
 	if (!rrpc->gcb_pool)
 		return -ENOMEM;
@@ -1146,11 +1201,12 @@ static void rrpc_luns_free(struct rrpc *rrpc)
 
 static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 {
-	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_tgt_dev *dev = rrpc->dev;
+	struct nvm_geo *geo = &dev->geo;
 	struct rrpc_lun *rlun;
 	int i, j, ret = -EINVAL;
 
-	if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
+	if (geo->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
 		pr_err("rrpc: number of pages per block too high.");
 		return -EINVAL;
 	}
@@ -1167,20 +1223,20 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 		int lunid = lun_begin + i;
 		struct nvm_lun *lun;
 
-		lun = dev->mt->get_lun(dev, lunid);
+		lun = dev->mt->get_lun(dev->parent, lunid);
 		if (!lun)
 			goto err;
 
 		rlun = &rrpc->luns[i];
 		rlun->parent = lun;
 		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
-						rrpc->dev->blks_per_lun);
+							geo->blks_per_lun);
 		if (!rlun->blocks) {
 			ret = -ENOMEM;
 			goto err;
 		}
 
-		for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
+		for (j = 0; j < geo->blks_per_lun; j++) {
 			struct rrpc_block *rblk = &rlun->blocks[j];
 			struct nvm_block *blk = &lun->blocks[j];
 
@@ -1190,6 +1246,8 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 			spin_lock_init(&rblk->lock);
 		}
 
+		rlun->reserved_blocks = 2; /* for GC only */
+
 		rlun->rrpc = rrpc;
 		INIT_LIST_HEAD(&rlun->prio_list);
 		INIT_LIST_HEAD(&rlun->wblk_list);
@@ -1206,27 +1264,27 @@ err:
 /* returns 0 on success and stores the beginning address in *begin */
 static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
 {
-	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct nvmm_type *mt = dev->mt;
-	sector_t size = rrpc->nr_sects * dev->sec_size;
+	sector_t size = rrpc->nr_sects * dev->geo.sec_size;
 	int ret;
 
 	size >>= 9;
 
-	ret = mt->get_area(dev, begin, size);
+	ret = mt->get_area(dev->parent, begin, size);
 	if (!ret)
-		*begin >>= (ilog2(dev->sec_size) - 9);
+		*begin >>= (ilog2(dev->geo.sec_size) - 9);
 
 	return ret;
 }
 
 static void rrpc_area_free(struct rrpc *rrpc)
 {
-	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct nvmm_type *mt = dev->mt;
-	sector_t begin = rrpc->soffset << (ilog2(dev->sec_size) - 9);
+	sector_t begin = rrpc->soffset << (ilog2(dev->geo.sec_size) - 9);
 
-	mt->put_area(dev, begin);
+	mt->put_area(dev->parent, begin);
 }
 
 static void rrpc_free(struct rrpc *rrpc)
@@ -1255,11 +1313,11 @@ static void rrpc_exit(void *private)
 static sector_t rrpc_capacity(void *private)
 {
 	struct rrpc *rrpc = private;
-	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	sector_t reserved, provisioned;
 
 	/* cur, gc, and two emergency blocks for each lun */
-	reserved = rrpc->nr_luns * dev->sec_per_blk * 4;
+	reserved = rrpc->nr_luns * dev->geo.sec_per_blk * 4;
 	provisioned = rrpc->nr_sects - reserved;
 
 	if (reserved > rrpc->nr_sects) {
@@ -1278,13 +1336,13 @@ static sector_t rrpc_capacity(void *private)
  */
 static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
-	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	int offset;
 	struct rrpc_addr *laddr;
 	u64 bpaddr, paddr, pladdr;
 
 	bpaddr = block_to_rel_addr(rrpc, rblk);
-	for (offset = 0; offset < dev->sec_per_blk; offset++) {
+	for (offset = 0; offset < dev->geo.sec_per_blk; offset++) {
 		paddr = bpaddr + offset;
 
 		pladdr = rrpc->rev_trans_map[paddr].addr;
@@ -1304,6 +1362,7 @@ static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
 
 static int rrpc_blocks_init(struct rrpc *rrpc)
 {
+	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_lun *rlun;
 	struct rrpc_block *rblk;
 	int lun_iter, blk_iter;
@@ -1311,7 +1370,7 @@ static int rrpc_blocks_init(struct rrpc *rrpc)
 	for (lun_iter = 0; lun_iter < rrpc->nr_luns; lun_iter++) {
 		rlun = &rrpc->luns[lun_iter];
 
-		for (blk_iter = 0; blk_iter < rrpc->dev->blks_per_lun;
+		for (blk_iter = 0; blk_iter < dev->geo.blks_per_lun;
 								blk_iter++) {
 			rblk = &rlun->blocks[blk_iter];
 			rrpc_block_map_update(rrpc, rblk);
@@ -1350,11 +1409,12 @@ err:
 
 static struct nvm_tgt_type tt_rrpc;
 
-static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
+static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 						int lun_begin, int lun_end)
 {
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
+	struct nvm_geo *geo = &dev->geo;
 	struct rrpc *rrpc;
 	sector_t soffset;
 	int ret;
@@ -1377,8 +1437,8 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 	spin_lock_init(&rrpc->bio_lock);
 	INIT_WORK(&rrpc->ws_requeue, rrpc_requeue);
 
-	rrpc->nr_luns = lun_end - lun_begin + 1;
-	rrpc->nr_sects = (unsigned long long)dev->sec_per_lun * rrpc->nr_luns;
+	rrpc->nr_luns = geo->nr_luns;
+	rrpc->nr_sects = (unsigned long long)geo->sec_per_lun * rrpc->nr_luns;
 
 	/* simple round-robin strategy */
 	atomic_set(&rrpc->next_lun, -1);
@@ -1396,7 +1456,7 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 		goto err;
 	}
 
-	rrpc->poffset = dev->sec_per_lun * lun_begin;
+	rrpc->poffset = geo->sec_per_lun * lun_begin;
 
 	ret = rrpc_core_init(rrpc);
 	if (ret) {
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index bed49baa7841..242d4c109eb6 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -52,9 +52,12 @@ struct rrpc_rq {
 };
 
 struct rrpc_block {
+	unsigned long id;
 	struct nvm_block *parent;
 	struct rrpc_lun *rlun;
-	struct list_head prio;
+
+	struct list_head prio;		/* LUN CG list */
+	struct list_head list;		/* LUN free, used, bb list */
 
 #define MAX_INVALID_PAGES_STORAGE 8
 	/* Bitmap for invalid page intries */
@@ -64,6 +67,8 @@ struct rrpc_block {
 	/* number of pages that are invalid, wrt host page size */
 	unsigned int nr_invalid_pages;
 
+	int state;
+
 	spinlock_t lock;
 	atomic_t data_cmnt_size; /* data pages committed to stable storage */
 };
@@ -71,6 +76,7 @@ struct rrpc_block {
 struct rrpc_lun {
 	struct rrpc *rrpc;
 	struct nvm_lun *parent;
+
 	struct rrpc_block *cur, *gc_cur;
 	struct rrpc_block *blocks;	/* Reference to block allocation */
 
@@ -79,6 +85,8 @@ struct rrpc_lun {
 
 	struct work_struct ws_gc;
 
+	int reserved_blocks;
+
 	spinlock_t lock;
 };
 
@@ -86,7 +94,7 @@ struct rrpc {
 	/* instance must be kept in top to resolve rrpc in unprep */
 	struct nvm_tgt_instance instance;
 
-	struct nvm_dev *dev;
+	struct nvm_tgt_dev *dev;
 	struct gendisk *disk;
 
 	sector_t soffset; /* logical sector offset */
@@ -151,7 +159,8 @@ static inline struct rrpc_block *rrpc_get_rblk(struct rrpc_lun *rlun,
 								int blk_id)
 {
 	struct rrpc *rrpc = rlun->rrpc;
-	int lun_blk = blk_id % rrpc->dev->blks_per_lun;
+	struct nvm_tgt_dev *dev = rrpc->dev;
+	int lun_blk = blk_id % dev->geo.blks_per_lun;
 
 	return &rlun->blocks[lun_blk];
 }
diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c
index fa644afb25de..12002bf4efc2 100644
--- a/drivers/lightnvm/sysblk.c
+++ b/drivers/lightnvm/sysblk.c
@@ -62,7 +62,8 @@ static void nvm_cpu_to_sysblk(struct nvm_system_block *sb,
 
 static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
 {
-	int nr_rows = min_t(int, MAX_SYSBLKS, dev->nr_chnls);
+	struct nvm_geo *geo = &dev->geo;
+	int nr_rows = min_t(int, MAX_SYSBLKS, geo->nr_chnls);
 	int i;
 
 	for (i = 0; i < nr_rows; i++)
@@ -71,7 +72,7 @@ static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
 	/* if possible, place sysblk at first channel, middle channel and last
 	 * channel of the device. If not, create only one or two sys blocks
 	 */
-	switch (dev->nr_chnls) {
+	switch (geo->nr_chnls) {
 	case 2:
 		sysblk_ppas[1].g.ch = 1;
 		/* fall-through */
@@ -80,8 +81,8 @@ static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
 		break;
 	default:
 		sysblk_ppas[0].g.ch = 0;
-		sysblk_ppas[1].g.ch = dev->nr_chnls / 2;
-		sysblk_ppas[2].g.ch = dev->nr_chnls - 1;
+		sysblk_ppas[1].g.ch = geo->nr_chnls / 2;
+		sysblk_ppas[2].g.ch = geo->nr_chnls - 1;
 		break;
 	}
 
@@ -162,11 +163,12 @@ static int sysblk_get_host_blks(struct nvm_dev *dev, struct ppa_addr ppa,
 static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s,
 				struct ppa_addr *ppas, int get_free)
 {
+	struct nvm_geo *geo = &dev->geo;
 	int i, nr_blks, ret = 0;
 	u8 *blks;
 
 	s->nr_ppas = 0;
-	nr_blks = dev->blks_per_lun * dev->plane_mode;
+	nr_blks = geo->blks_per_lun * geo->plane_mode;
 
 	blks = kmalloc(nr_blks, GFP_KERNEL);
 	if (!blks)
@@ -210,13 +212,14 @@ err_get:
 static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa,
 						struct nvm_system_block *sblk)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct nvm_system_block *cur;
 	int pg, ret, found = 0;
 
 	/* the full buffer for a flash page is allocated. Only the first of it
 	 * contains the system block information
 	 */
-	cur = kmalloc(dev->pfpg_size, GFP_KERNEL);
+	cur = kmalloc(geo->pfpg_size, GFP_KERNEL);
 	if (!cur)
 		return -ENOMEM;
 
@@ -225,7 +228,7 @@ static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa,
 		ppa->g.pg = ppa_to_slc(dev, pg);
 
 		ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE,
-							cur, dev->pfpg_size);
+							cur, geo->pfpg_size);
 		if (ret) {
 			if (ret == NVM_RSP_ERR_EMPTYPAGE) {
 				pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n",
@@ -276,6 +279,7 @@ static int nvm_sysblk_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s,
 static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
 							struct sysblk_scan *s)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct nvm_system_block nvmsb;
 	void *buf;
 	int i, sect, ret = 0;
@@ -283,12 +287,12 @@ static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
 
 	nvm_cpu_to_sysblk(&nvmsb, info);
 
-	buf = kzalloc(dev->pfpg_size, GFP_KERNEL);
+	buf = kzalloc(geo->pfpg_size, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 	memcpy(buf, &nvmsb, sizeof(struct nvm_system_block));
 
-	ppas = kcalloc(dev->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL);
+	ppas = kcalloc(geo->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL);
 	if (!ppas) {
 		ret = -ENOMEM;
 		goto err;
@@ -305,15 +309,15 @@ static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
 							ppas[0].g.pg);
 
 		/* Expand to all sectors within a flash page */
-		if (dev->sec_per_pg > 1) {
-			for (sect = 1; sect < dev->sec_per_pg; sect++) {
+		if (geo->sec_per_pg > 1) {
+			for (sect = 1; sect < geo->sec_per_pg; sect++) {
 				ppas[sect].ppa = ppas[0].ppa;
 				ppas[sect].g.sec = sect;
 			}
 		}
 
-		ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PWRITE,
-					NVM_IO_SLC_MODE, buf, dev->pfpg_size);
+		ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PWRITE,
+					NVM_IO_SLC_MODE, buf, geo->pfpg_size);
 		if (ret) {
 			pr_err("nvm: sysblk failed program (%u %u %u)\n",
 							ppas[0].g.ch,
@@ -322,8 +326,8 @@ static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
 			break;
 		}
 
-		ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PREAD,
-					NVM_IO_SLC_MODE, buf, dev->pfpg_size);
+		ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PREAD,
+					NVM_IO_SLC_MODE, buf, geo->pfpg_size);
 		if (ret) {
 			pr_err("nvm: sysblk failed read (%u %u %u)\n",
 							ppas[0].g.ch,
@@ -527,6 +531,7 @@ err_sysblk:
 
 int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
 	struct sysblk_scan s;
 	int ret;
@@ -541,7 +546,7 @@ int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
 	if (!dev->ops->get_bb_tbl || !dev->ops->set_bb_tbl)
 		return -EINVAL;
 
-	if (!(dev->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) {
+	if (!(geo->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) {
 		pr_err("nvm: memory does not support SLC access\n");
 		return -EINVAL;
 	}
@@ -571,11 +576,11 @@ static int factory_nblks(int nblks)
 	return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
 }
 
-static unsigned int factory_blk_offset(struct nvm_dev *dev, struct ppa_addr ppa)
+static unsigned int factory_blk_offset(struct nvm_geo *geo, struct ppa_addr ppa)
 {
-	int nblks = factory_nblks(dev->blks_per_lun);
+	int nblks = factory_nblks(geo->blks_per_lun);
 
-	return ((ppa.g.ch * dev->luns_per_chnl * nblks) + (ppa.g.lun * nblks)) /
+	return ((ppa.g.ch * geo->luns_per_chnl * nblks) + (ppa.g.lun * nblks)) /
 								BITS_PER_LONG;
 }
 
@@ -589,7 +594,7 @@ static int nvm_factory_blks(struct nvm_dev *dev, struct ppa_addr ppa,
 	if (nr_blks < 0)
 		return nr_blks;
 
-	lunoff = factory_blk_offset(dev, ppa);
+	lunoff = factory_blk_offset(&dev->geo, ppa);
 
 	/* non-set bits correspond to the block must be erased */
 	for (i = 0; i < nr_blks; i++) {
@@ -618,19 +623,19 @@ static int nvm_factory_blks(struct nvm_dev *dev, struct ppa_addr ppa,
 static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list,
 					int max_ppas, unsigned long *blk_bitmap)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr ppa;
 	int ch, lun, blkid, idx, done = 0, ppa_cnt = 0;
 	unsigned long *offset;
 
 	while (!done) {
 		done = 1;
-		nvm_for_each_lun_ppa(dev, ppa, ch, lun) {
-			idx = factory_blk_offset(dev, ppa);
+		nvm_for_each_lun_ppa(geo, ppa, ch, lun) {
+			idx = factory_blk_offset(geo, ppa);
 			offset = &blk_bitmap[idx];
 
-			blkid = find_first_zero_bit(offset,
-						dev->blks_per_lun);
-			if (blkid >= dev->blks_per_lun)
+			blkid = find_first_zero_bit(offset, geo->blks_per_lun);
+			if (blkid >= geo->blks_per_lun)
 				continue;
 			set_bit(blkid, offset);
 
@@ -655,16 +660,17 @@ static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list,
 static int nvm_fact_select_blks(struct nvm_dev *dev, unsigned long *blk_bitmap,
 								int flags)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr ppa;
 	int ch, lun, nr_blks, ret = 0;
 	u8 *blks;
 
-	nr_blks = dev->blks_per_lun * dev->plane_mode;
+	nr_blks = geo->blks_per_lun * geo->plane_mode;
 	blks = kmalloc(nr_blks, GFP_KERNEL);
 	if (!blks)
 		return -ENOMEM;
 
-	nvm_for_each_lun_ppa(dev, ppa, ch, lun) {
+	nvm_for_each_lun_ppa(geo, ppa, ch, lun) {
 		ret = nvm_get_bb_tbl(dev, ppa, blks);
 		if (ret)
 			pr_err("nvm: failed bb tbl for ch%u lun%u\n",
@@ -682,14 +688,15 @@ static int nvm_fact_select_blks(struct nvm_dev *dev, unsigned long *blk_bitmap,
 
 int nvm_dev_factory(struct nvm_dev *dev, int flags)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr *ppas;
 	int ppa_cnt, ret = -ENOMEM;
-	int max_ppas = dev->ops->max_phys_sect / dev->nr_planes;
+	int max_ppas = dev->ops->max_phys_sect / geo->nr_planes;
 	struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
 	struct sysblk_scan s;
 	unsigned long *blk_bitmap;
 
-	blk_bitmap = kzalloc(factory_nblks(dev->blks_per_lun) * dev->nr_luns,
+	blk_bitmap = kzalloc(factory_nblks(geo->blks_per_lun) * geo->nr_luns,
 								GFP_KERNEL);
 	if (!blk_bitmap)
 		return ret;
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 037dff5951d4..1cdc8124c8c0 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -352,6 +352,7 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
 
 	while (nlb) {
 		u32 cmd_nlb = min(nlb_pr_rq, nlb);
+		u64 elba = slba + cmd_nlb;
 
 		c.l2p.slba = cpu_to_le64(cmd_slba);
 		c.l2p.nlb = cpu_to_le32(cmd_nlb);
@@ -365,6 +366,11 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
 			goto out;
 		}
 
+		if (unlikely(elba > nvmdev->total_secs)) {
+			pr_err("nvm: L2P data from device is out of bounds!\n");
+			return -EINVAL;
+		}
+
 		if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
 			ret = -EINTR;
 			goto out;
@@ -383,11 +389,12 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
 								u8 *blks)
 {
 	struct request_queue *q = nvmdev->q;
+	struct nvm_geo *geo = &nvmdev->geo;
 	struct nvme_ns *ns = q->queuedata;
 	struct nvme_ctrl *ctrl = ns->ctrl;
 	struct nvme_nvm_command c = {};
 	struct nvme_nvm_bb_tbl *bb_tbl;
-	int nr_blks = nvmdev->blks_per_lun * nvmdev->plane_mode;
+	int nr_blks = geo->blks_per_lun * geo->plane_mode;
 	int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
 	int ret = 0;
 
@@ -428,7 +435,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
 		goto out;
 	}
 
-	memcpy(blks, bb_tbl->blk, nvmdev->blks_per_lun * nvmdev->plane_mode);
+	memcpy(blks, bb_tbl->blk, geo->blks_per_lun * geo->plane_mode);
 out:
 	kfree(bb_tbl);
 	return ret;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 89c695483d55..1f1588c2557e 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -211,7 +211,7 @@ struct nvm_id {
 struct nvm_target {
 	struct list_head list;
 	struct list_head lun_list;
-	struct nvm_dev *dev;
+	struct nvm_tgt_dev *dev;
 	struct nvm_tgt_type *type;
 	struct gendisk *disk;
 };
@@ -286,7 +286,6 @@ struct nvm_lun {
 					 * free_list and used_list
 					 */
 	unsigned int nr_free_blocks;	/* Number of unused blocks */
-	int reserved_blocks;
 
 	struct nvm_block *blocks;
 };
@@ -315,22 +314,12 @@ struct nvm_sb_info {
 	struct ppa_addr		fs_ppa;
 };
 
-struct nvm_dev {
-	struct nvm_dev_ops *ops;
-
-	struct list_head devices;
-
-	/* Media manager */
-	struct nvmm_type *mt;
-	void *mp;
-
-	/* System blocks */
-	struct nvm_sb_info sb;
-
-	/* Device information */
+/* Device generic information */
+struct nvm_geo {
 	int nr_chnls;
+	int nr_luns;
+	int luns_per_chnl; /* -1 if channels are not symmetric */
 	int nr_planes;
-	int luns_per_chnl;
 	int sec_per_pg; /* only sectors for a single page */
 	int pgs_per_blk;
 	int blks_per_lun;
@@ -350,14 +339,43 @@ struct nvm_dev {
 	int sec_per_pl; /* all sectors across planes */
 	int sec_per_blk;
 	int sec_per_lun;
+};
+
+struct nvm_tgt_dev {
+	/* Device information */
+	struct nvm_geo geo;
+
+	sector_t total_secs;
+
+	struct nvm_id identity;
+	struct request_queue *q;
+
+	struct nvmm_type *mt;
+	struct nvm_dev_ops *ops;
+
+	void *parent;
+};
+
+struct nvm_dev {
+	struct nvm_dev_ops *ops;
+
+	struct list_head devices;
+
+	/* Media manager */
+	struct nvmm_type *mt;
+	void *mp;
+
+	/* System blocks */
+	struct nvm_sb_info sb;
+
+	/* Device information */
+	struct nvm_geo geo;
 
 	/* lower page table */
 	int lps_per_blk;
 	int *lptbl;
 
-	unsigned long total_blocks;
 	unsigned long total_secs;
-	int nr_luns;
 
 	unsigned long *lun_map;
 	void *dma_pool;
@@ -373,7 +391,7 @@ struct nvm_dev {
 	spinlock_t lock;
 };
 
-static inline struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
+static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
 							struct ppa_addr r)
 {
 	struct ppa_addr l;
@@ -382,22 +400,22 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
 
 	l.ppa = 0;
 
-	div_u64_rem(ppa, dev->sec_per_pg, &secs);
+	div_u64_rem(ppa, geo->sec_per_pg, &secs);
 	l.g.sec = secs;
 
-	sector_div(ppa, dev->sec_per_pg);
-	div_u64_rem(ppa, dev->pgs_per_blk, &pgs);
+	sector_div(ppa, geo->sec_per_pg);
+	div_u64_rem(ppa, geo->pgs_per_blk, &pgs);
 	l.g.pg = pgs;
 
-	sector_div(ppa, dev->pgs_per_blk);
-	div_u64_rem(ppa, dev->blks_per_lun, &blks);
+	sector_div(ppa, geo->pgs_per_blk);
+	div_u64_rem(ppa, geo->blks_per_lun, &blks);
 	l.g.blk = blks;
 
-	sector_div(ppa, dev->blks_per_lun);
-	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
+	sector_div(ppa, geo->blks_per_lun);
+	div_u64_rem(ppa, geo->luns_per_chnl, &luns);
 	l.g.lun = luns;
 
-	sector_div(ppa, dev->luns_per_chnl);
+	sector_div(ppa, geo->luns_per_chnl);
 	l.g.ch = ppa;
 
 	return l;
@@ -406,14 +424,15 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
 static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
 						struct ppa_addr r)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr l;
 
-	l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset;
-	l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset;
-	l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset;
-	l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset;
-	l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset;
-	l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset;
+	l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset;
+	l.ppa |= ((u64)r.g.pg) << geo->ppaf.pg_offset;
+	l.ppa |= ((u64)r.g.sec) << geo->ppaf.sect_offset;
+	l.ppa |= ((u64)r.g.pl) << geo->ppaf.pln_offset;
+	l.ppa |= ((u64)r.g.lun) << geo->ppaf.lun_offset;
+	l.ppa |= ((u64)r.g.ch) << geo->ppaf.ch_offset;
 
 	return l;
 }
@@ -421,24 +440,25 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
 static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
 						struct ppa_addr r)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr l;
 
 	l.ppa = 0;
 	/*
 	 * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
 	 */
-	l.g.blk = (r.ppa >> dev->ppaf.blk_offset) &
-					(((1 << dev->ppaf.blk_len) - 1));
-	l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) &
-					(((1 << dev->ppaf.pg_len) - 1));
-	l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) &
-					(((1 << dev->ppaf.sect_len) - 1));
-	l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) &
-					(((1 << dev->ppaf.pln_len) - 1));
-	l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) &
-					(((1 << dev->ppaf.lun_len) - 1));
-	l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) &
-					(((1 << dev->ppaf.ch_len) - 1));
+	l.g.blk = (r.ppa >> geo->ppaf.blk_offset) &
+					(((1 << geo->ppaf.blk_len) - 1));
+	l.g.pg |= (r.ppa >> geo->ppaf.pg_offset) &
+					(((1 << geo->ppaf.pg_len) - 1));
+	l.g.sec |= (r.ppa >> geo->ppaf.sect_offset) &
+					(((1 << geo->ppaf.sect_len) - 1));
+	l.g.pl |= (r.ppa >> geo->ppaf.pln_offset) &
+					(((1 << geo->ppaf.pln_len) - 1));
+	l.g.lun |= (r.ppa >> geo->ppaf.lun_offset) &
+					(((1 << geo->ppaf.lun_len) - 1));
+	l.g.ch |= (r.ppa >> geo->ppaf.ch_offset) &
+					(((1 << geo->ppaf.ch_len) - 1));
 
 	return l;
 }
@@ -456,11 +476,12 @@ static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
 static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
 							struct nvm_block *blk)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr ppa;
 	struct nvm_lun *lun = blk->lun;
 
 	ppa.ppa = 0;
-	ppa.g.blk = blk->id % dev->blks_per_lun;
+	ppa.g.blk = blk->id % geo->blks_per_lun;
 	ppa.g.lun = lun->lun_id;
 	ppa.g.ch = lun->chnl_id;
 
@@ -483,7 +504,8 @@ static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
 
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, int,
+				int);
 typedef void (nvm_tgt_exit_fn)(void *);
 
 struct nvm_tgt_type {
@@ -516,9 +538,6 @@ typedef void (nvmm_unregister_fn)(struct nvm_dev *);
 
 typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
 typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
-typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
-					      struct nvm_lun *, unsigned long);
-typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
@@ -538,10 +557,6 @@ struct nvmm_type {
 	nvmm_create_tgt_fn *create_tgt;
 	nvmm_remove_tgt_fn *remove_tgt;
 
-	/* Block administration callbacks */
-	nvmm_get_blk_fn *get_blk;
-	nvmm_put_blk_fn *put_blk;
-
 	nvmm_submit_io_fn *submit_io;
 	nvmm_erase_blk_fn *erase_blk;
 
@@ -563,10 +578,6 @@ struct nvmm_type {
 extern int nvm_register_mgr(struct nvmm_type *);
 extern void nvm_unregister_mgr(struct nvmm_type *);
 
-extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
-								unsigned long);
-extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
-
 extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
@@ -611,10 +622,10 @@ extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *);
 
 extern int nvm_dev_factory(struct nvm_dev *, int flags);
 
-#define nvm_for_each_lun_ppa(dev, ppa, chid, lunid)			\
-	for ((chid) = 0, (ppa).ppa = 0; (chid) < (dev)->nr_chnls;	\
+#define nvm_for_each_lun_ppa(geo, ppa, chid, lunid)			\
+	for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls;	\
 					(chid)++, (ppa).g.ch = (chid))	\
-		for ((lunid) = 0; (lunid) < (dev)->luns_per_chnl;	\
+		for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl;	\
 					(lunid)++, (ppa).g.lun = (lunid))
 
 #else /* CONFIG_NVM */
-- 
cgit v1.2.3


From 0ac4072eb10c9627415eb1ca511121156e20012c Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:07 +0100
Subject: lightnvm: remove get_lun operation on gennvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since LUNs are managed internally on the target, there is no need for
the media manager to implement a get_lun operation.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/gennvm.c | 13 +------------
 drivers/lightnvm/rrpc.c   | 23 +++++++++++------------
 include/linux/lightnvm.h  |  8 ++------
 3 files changed, 14 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 8791a2aaa9e3..3cf5d5947070 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -159,7 +159,7 @@ static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	tdisk->fops = &gen_fops;
 	tdisk->queue = tqueue;
 
-	targetdata = tt->init(tgt_dev, tdisk, s->lun_begin, s->lun_end);
+	targetdata = tt->init(tgt_dev, tdisk, &t->lun_list);
 	if (IS_ERR(targetdata))
 		goto err_init;
 
@@ -613,16 +613,6 @@ static int gen_erase_blk(struct nvm_dev *dev, struct nvm_block *blk, int flags)
 	return nvm_erase_ppa(dev, &addr, 1, flags);
 }
 
-static struct nvm_lun *gen_get_lun(struct nvm_dev *dev, int lunid)
-{
-	struct gen_dev *gn = dev->mp;
-
-	if (unlikely(lunid >= dev->geo.nr_luns))
-		return NULL;
-
-	return &gn->luns[lunid];
-}
-
 static void gen_lun_info_print(struct nvm_dev *dev)
 {
 	struct gen_dev *gn = dev->mp;
@@ -655,7 +645,6 @@ static struct nvmm_type gen = {
 
 	.mark_blk		= gen_mark_blk,
 
-	.get_lun		= gen_get_lun,
 	.lun_info_print		= gen_lun_info_print,
 
 	.get_area		= gen_get_area,
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 5377c7a987aa..165b9d396493 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -1199,10 +1199,11 @@ static void rrpc_luns_free(struct rrpc *rrpc)
 	kfree(rrpc->luns);
 }
 
-static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
+static int rrpc_luns_init(struct rrpc *rrpc, struct list_head *lun_list)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct nvm_geo *geo = &dev->geo;
+	struct nvm_lun *lun;
 	struct rrpc_lun *rlun;
 	int i, j, ret = -EINVAL;
 
@@ -1218,16 +1219,11 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 	if (!rrpc->luns)
 		return -ENOMEM;
 
-	/* 1:1 mapping */
-	for (i = 0; i < rrpc->nr_luns; i++) {
-		int lunid = lun_begin + i;
-		struct nvm_lun *lun;
+	i = 0;
 
-		lun = dev->mt->get_lun(dev->parent, lunid);
-		if (!lun)
-			goto err;
-
-		rlun = &rrpc->luns[i];
+	/* 1:1 mapping */
+	list_for_each_entry(lun, lun_list, list) {
+		rlun = &rrpc->luns[i++];
 		rlun->parent = lun;
 		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
 							geo->blks_per_lun);
@@ -1256,6 +1252,8 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 		spin_lock_init(&rlun->lock);
 	}
 
+	WARN_ON(i != rrpc->nr_luns);
+
 	return 0;
 err:
 	return ret;
@@ -1410,12 +1408,13 @@ err:
 static struct nvm_tgt_type tt_rrpc;
 
 static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
-						int lun_begin, int lun_end)
+						struct list_head *lun_list)
 {
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
 	struct nvm_geo *geo = &dev->geo;
 	struct rrpc *rrpc;
+	int lun_begin = (list_first_entry(lun_list, struct nvm_lun, list))->id;
 	sector_t soffset;
 	int ret;
 
@@ -1450,7 +1449,7 @@ static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 	}
 	rrpc->soffset = soffset;
 
-	ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
+	ret = rrpc_luns_init(rrpc, lun_list);
 	if (ret) {
 		pr_err("nvm: rrpc: could not initialize luns\n");
 		goto err;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 1f1588c2557e..e56c35227249 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -504,8 +504,8 @@ static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
 
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, int,
-				int);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
+				struct list_head *lun_list);
 typedef void (nvm_tgt_exit_fn)(void *);
 
 struct nvm_tgt_type {
@@ -541,7 +541,6 @@ typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
-typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
 typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
@@ -563,9 +562,6 @@ struct nvmm_type {
 	/* Bad block mgmt */
 	nvmm_mark_blk_fn *mark_blk;
 
-	/* Configuration management */
-	nvmm_get_lun_fn *get_lun;
-
 	/* Statistics */
 	nvmm_lun_info_print_fn *lun_info_print;
 
-- 
cgit v1.2.3


From eec44565e9ab13bbf5b48864a68871eabf1115c1 Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:08 +0100
Subject: lightnvm: remove debug lun statistics from gennvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since LUNs are managed internally on targets, the media manager has no
access to the free LUN lists. Thus, debug functions that show LUN
information on the device should not be implemented on the media
manager, but rather on the target in itself.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/gennvm.c | 19 -------------------
 include/linux/lightnvm.h  |  5 -----
 2 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 3cf5d5947070..dd9afd7ade16 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -613,23 +613,6 @@ static int gen_erase_blk(struct nvm_dev *dev, struct nvm_block *blk, int flags)
 	return nvm_erase_ppa(dev, &addr, 1, flags);
 }
 
-static void gen_lun_info_print(struct nvm_dev *dev)
-{
-	struct gen_dev *gn = dev->mp;
-	struct nvm_lun *lun;
-	unsigned int i;
-
-
-	gen_for_each_lun(gn, lun, i) {
-		spin_lock(&lun->lock);
-
-		pr_info("%s: lun%8u\t%u\n", dev->name, i,
-						lun->nr_free_blocks);
-
-		spin_unlock(&lun->lock);
-	}
-}
-
 static struct nvmm_type gen = {
 	.name			= "gennvm",
 	.version		= {0, 1, 0},
@@ -645,8 +628,6 @@ static struct nvmm_type gen = {
 
 	.mark_blk		= gen_mark_blk,
 
-	.lun_info_print		= gen_lun_info_print,
-
 	.get_area		= gen_get_area,
 	.put_area		= gen_put_area,
 
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index e56c35227249..ed04fa642371 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -541,8 +541,6 @@ typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
-typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
-
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
 
@@ -562,9 +560,6 @@ struct nvmm_type {
 	/* Bad block mgmt */
 	nvmm_mark_blk_fn *mark_blk;
 
-	/* Statistics */
-	nvmm_lun_info_print_fn *lun_info_print;
-
 	nvmm_get_area_fn *get_area;
 	nvmm_put_area_fn *put_area;
 
-- 
cgit v1.2.3


From 2a02e627c245bfa987b97707123d7747d7b0e486 Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:09 +0100
Subject: lightnvm: eliminate nvm_block abstraction on mm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to naturally support multi-target instances on an Open-Channel
SSD, targets should own the LUNs they get blocks from and manage
provisioning internally. This is done in several steps.

A part of this transformation is that targets manage their blocks
internally. This patch eliminates the nvm_block abstraction and moves
block management to the target logic. The rrpc target is transformed.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c   |  10 +-
 drivers/lightnvm/gennvm.c | 205 +---------------------------------------
 drivers/lightnvm/rrpc.c   | 234 +++++++++++++++++++++++++++++++++-------------
 drivers/lightnvm/rrpc.h   |  23 +++--
 include/linux/lightnvm.h  |  48 +---------
 5 files changed, 191 insertions(+), 329 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 493ce034e687..691b16ffda88 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -176,12 +176,6 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name)
 	return NULL;
 }
 
-void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
-{
-	return dev->mt->mark_blk(dev, ppa, type);
-}
-EXPORT_SYMBOL(nvm_mark_blk);
-
 int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas,
 								int type)
 {
@@ -215,9 +209,9 @@ int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 }
 EXPORT_SYMBOL(nvm_submit_io);
 
-int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk, int flags)
+int nvm_erase_blk(struct nvm_dev *dev, struct ppa_addr *p, int flags)
 {
-	return dev->mt->erase_blk(dev, blk, flags);
+	return dev->mt->erase_blk(dev, p, flags);
 }
 EXPORT_SYMBOL(nvm_erase_blk);
 
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index dd9afd7ade16..19c3924ffa10 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -306,19 +306,6 @@ static void gen_put_area(struct nvm_dev *dev, sector_t begin)
 	spin_unlock(&dev->lock);
 }
 
-static void gen_blocks_free(struct nvm_dev *dev)
-{
-	struct gen_dev *gn = dev->mp;
-	struct nvm_lun *lun;
-	int i;
-
-	gen_for_each_lun(gn, lun, i) {
-		if (!lun->blocks)
-			break;
-		vfree(lun->blocks);
-	}
-}
-
 static void gen_luns_free(struct nvm_dev *dev)
 {
 	struct gen_dev *gn = dev->mp;
@@ -337,167 +324,17 @@ static int gen_luns_init(struct nvm_dev *dev, struct gen_dev *gn)
 		return -ENOMEM;
 
 	gen_for_each_lun(gn, lun, i) {
-		INIT_LIST_HEAD(&lun->free_list);
-		INIT_LIST_HEAD(&lun->used_list);
-		INIT_LIST_HEAD(&lun->bb_list);
 		INIT_LIST_HEAD(&lun->list);
 
-		spin_lock_init(&lun->lock);
-
 		lun->id = i;
 		lun->lun_id = i % geo->luns_per_chnl;
 		lun->chnl_id = i / geo->luns_per_chnl;
-		lun->nr_free_blocks = geo->blks_per_lun;
-	}
-	return 0;
-}
-
-static int gen_block_bb(struct gen_dev *gn, struct ppa_addr ppa,
-							u8 *blks, int nr_blks)
-{
-	struct nvm_dev *dev = gn->dev;
-	struct nvm_lun *lun;
-	struct nvm_block *blk;
-	int i;
-
-	nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks);
-	if (nr_blks < 0)
-		return nr_blks;
-
-	lun = &gn->luns[(dev->geo.luns_per_chnl * ppa.g.ch) + ppa.g.lun];
-
-	for (i = 0; i < nr_blks; i++) {
-		if (blks[i] == NVM_BLK_T_FREE)
-			continue;
-
-		blk = &lun->blocks[i];
-		list_move_tail(&blk->list, &lun->bb_list);
-		blk->state = NVM_BLK_ST_BAD;
-		lun->nr_free_blocks--;
-	}
-
-	return 0;
-}
-
-static int gen_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
-{
-	struct nvm_dev *dev = private;
-	struct nvm_geo *geo = &dev->geo;
-	struct gen_dev *gn = dev->mp;
-	u64 elba = slba + nlb;
-	struct nvm_lun *lun;
-	struct nvm_block *blk;
-	u64 i;
-	int lun_id;
-
-	if (unlikely(elba > dev->total_secs)) {
-		pr_err("gen: L2P data from device is out of bounds!\n");
-		return -EINVAL;
-	}
-
-	for (i = 0; i < nlb; i++) {
-		u64 pba = le64_to_cpu(entries[i]);
-
-		if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) {
-			pr_err("gen: L2P data entry is out of bounds!\n");
-			return -EINVAL;
-		}
-
-		/* Address zero is a special one. The first page on a disk is
-		 * protected. It often holds internal device boot
-		 * information.
-		 */
-		if (!pba)
-			continue;
-
-		/* resolve block from physical address */
-		lun_id = div_u64(pba, geo->sec_per_lun);
-		lun = &gn->luns[lun_id];
-
-		/* Calculate block offset into lun */
-		pba = pba - (geo->sec_per_lun * lun_id);
-		blk = &lun->blocks[div_u64(pba, geo->sec_per_blk)];
-
-		if (!blk->state) {
-			/* at this point, we don't know anything about the
-			 * block. It's up to the FTL on top to re-etablish the
-			 * block state. The block is assumed to be open.
-			 */
-			list_move_tail(&blk->list, &lun->used_list);
-			blk->state = NVM_BLK_ST_TGT;
-			lun->nr_free_blocks--;
-		}
 	}
-
-	return 0;
-}
-
-static int gen_blocks_init(struct nvm_dev *dev, struct gen_dev *gn)
-{
-	struct nvm_geo *geo = &dev->geo;
-	struct nvm_lun *lun;
-	struct nvm_block *block;
-	sector_t lun_iter, blk_iter, cur_block_id = 0;
-	int ret, nr_blks;
-	u8 *blks;
-
-	nr_blks = geo->blks_per_lun * geo->plane_mode;
-	blks = kmalloc(nr_blks, GFP_KERNEL);
-	if (!blks)
-		return -ENOMEM;
-
-	gen_for_each_lun(gn, lun, lun_iter) {
-		lun->blocks = vzalloc(sizeof(struct nvm_block) *
-							geo->blks_per_lun);
-		if (!lun->blocks) {
-			kfree(blks);
-			return -ENOMEM;
-		}
-
-		for (blk_iter = 0; blk_iter < geo->blks_per_lun; blk_iter++) {
-			block = &lun->blocks[blk_iter];
-
-			INIT_LIST_HEAD(&block->list);
-
-			block->lun = lun;
-			block->id = cur_block_id++;
-
-			list_add_tail(&block->list, &lun->free_list);
-		}
-
-		if (dev->ops->get_bb_tbl) {
-			struct ppa_addr ppa;
-
-			ppa.ppa = 0;
-			ppa.g.ch = lun->chnl_id;
-			ppa.g.lun = lun->lun_id;
-
-			ret = nvm_get_bb_tbl(dev, ppa, blks);
-			if (ret)
-				pr_err("gen: could not get BB table\n");
-
-			ret = gen_block_bb(gn, ppa, blks, nr_blks);
-			if (ret)
-				pr_err("gen: BB table map failed\n");
-		}
-	}
-
-	if ((dev->identity.dom & NVM_RSP_L2P) && dev->ops->get_l2p_tbl) {
-		ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs,
-							gen_block_map, dev);
-		if (ret) {
-			pr_err("gen: could not read L2P table.\n");
-			pr_warn("gen: default block initialization");
-		}
-	}
-
-	kfree(blks);
 	return 0;
 }
 
 static void gen_free(struct nvm_dev *dev)
 {
-	gen_blocks_free(dev);
 	gen_luns_free(dev);
 	kfree(dev->mp);
 	dev->mp = NULL;
@@ -528,12 +365,6 @@ static int gen_register(struct nvm_dev *dev)
 		goto err;
 	}
 
-	ret = gen_blocks_init(dev, gn);
-	if (ret) {
-		pr_err("gen: could not initialize blocks\n");
-		goto err;
-	}
-
 	return 1;
 err:
 	gen_free(dev);
@@ -558,34 +389,6 @@ static void gen_unregister(struct nvm_dev *dev)
 	module_put(THIS_MODULE);
 }
 
-static void gen_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
-{
-	struct nvm_geo *geo = &dev->geo;
-	struct gen_dev *gn = dev->mp;
-	struct nvm_lun *lun;
-	struct nvm_block *blk;
-
-	pr_debug("gen: ppa  (ch: %u lun: %u blk: %u pg: %u) -> %u\n",
-			ppa.g.ch, ppa.g.lun, ppa.g.blk, ppa.g.pg, type);
-
-	if (unlikely(ppa.g.ch > geo->nr_chnls ||
-					ppa.g.lun > geo->luns_per_chnl ||
-					ppa.g.blk > geo->blks_per_lun)) {
-		WARN_ON_ONCE(1);
-		pr_err("gen: ppa broken (ch: %u > %u lun: %u > %u blk: %u > %u",
-				ppa.g.ch, geo->nr_chnls,
-				ppa.g.lun, geo->luns_per_chnl,
-				ppa.g.blk, geo->blks_per_lun);
-		return;
-	}
-
-	lun = &gn->luns[(geo->luns_per_chnl * ppa.g.ch) + ppa.g.lun];
-	blk = &lun->blocks[ppa.g.blk];
-
-	/* will be moved to bb list on put_blk from target */
-	blk->state = type;
-}
-
 static void gen_end_io(struct nvm_rq *rqd)
 {
 	struct nvm_tgt_instance *ins = rqd->ins;
@@ -606,11 +409,9 @@ static int gen_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	return dev->ops->submit_io(dev, rqd);
 }
 
-static int gen_erase_blk(struct nvm_dev *dev, struct nvm_block *blk, int flags)
+static int gen_erase_blk(struct nvm_dev *dev, struct ppa_addr *p, int flags)
 {
-	struct ppa_addr addr = block_to_ppa(dev, blk);
-
-	return nvm_erase_ppa(dev, &addr, 1, flags);
+	return nvm_erase_ppa(dev, p, 1, flags);
 }
 
 static struct nvmm_type gen = {
@@ -626,8 +427,6 @@ static struct nvmm_type gen = {
 	.submit_io		= gen_submit_io,
 	.erase_blk		= gen_erase_blk,
 
-	.mark_blk		= gen_mark_blk,
-
 	.get_area		= gen_get_area,
 	.put_area		= gen_put_area,
 
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 165b9d396493..a352285dffc0 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -126,19 +126,21 @@ static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
 static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
-	struct nvm_block *blk = rblk->parent;
-	int lun_blk = blk->id % (dev->geo.blks_per_lun * rrpc->nr_luns);
+	struct rrpc_lun *rlun = rblk->rlun;
+	struct nvm_lun *lun = rlun->parent;
 
-	return lun_blk * dev->geo.sec_per_blk;
+	return lun->id * dev->geo.sec_per_blk;
 }
 
 /* Calculate global addr for the given block */
 static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
-	struct nvm_block *blk = rblk->parent;
+	struct nvm_geo *geo = &dev->geo;
+	struct rrpc_lun *rlun = rblk->rlun;
+	struct nvm_lun *lun = rlun->parent;
 
-	return blk->id * dev->geo.sec_per_blk;
+	return lun->id * geo->sec_per_lun + rblk->id * geo->sec_per_blk;
 }
 
 static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_tgt_dev *dev, u64 addr)
@@ -163,51 +165,46 @@ static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *new_rblk,
 	*cur_rblk = new_rblk;
 }
 
-static struct nvm_block *__rrpc_get_blk(struct rrpc *rrpc,
+static struct rrpc_block *__rrpc_get_blk(struct rrpc *rrpc,
 							struct rrpc_lun *rlun)
 {
-	struct nvm_lun *lun = rlun->parent;
-	struct nvm_block *blk = NULL;
+	struct rrpc_block *rblk = NULL;
 
-	if (list_empty(&lun->free_list))
+	if (list_empty(&rlun->free_list))
 		goto out;
 
-	blk = list_first_entry(&lun->free_list, struct nvm_block, list);
+	rblk = list_first_entry(&rlun->free_list, struct rrpc_block, list);
 
-	list_move_tail(&blk->list, &lun->used_list);
-	blk->state = NVM_BLK_ST_TGT;
-	lun->nr_free_blocks--;
+	list_move_tail(&rblk->list, &rlun->used_list);
+	rblk->state = NVM_BLK_ST_TGT;
+	rlun->nr_free_blocks--;
 
 out:
-	return blk;
+	return rblk;
 }
 
 static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
 							unsigned long flags)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
-	struct nvm_lun *lun = rlun->parent;
-	struct nvm_block *blk;
 	struct rrpc_block *rblk;
 	int is_gc = flags & NVM_IOTYPE_GC;
 
 	spin_lock(&rlun->lock);
-	if (!is_gc && lun->nr_free_blocks < rlun->reserved_blocks) {
+	if (!is_gc && rlun->nr_free_blocks < rlun->reserved_blocks) {
 		pr_err("nvm: rrpc: cannot give block to non GC request\n");
 		spin_unlock(&rlun->lock);
 		return NULL;
 	}
 
-	blk = __rrpc_get_blk(rrpc, rlun);
-	if (!blk) {
+	rblk = __rrpc_get_blk(rrpc, rlun);
+	if (!rblk) {
 		pr_err("nvm: rrpc: cannot get new block\n");
 		spin_unlock(&rlun->lock);
 		return NULL;
 	}
 	spin_unlock(&rlun->lock);
 
-	rblk = rrpc_get_rblk(rlun, blk->id);
-	blk->priv = rblk;
 	bitmap_zero(rblk->invalid_pages, dev->geo.sec_per_blk);
 	rblk->next_page = 0;
 	rblk->nr_invalid_pages = 0;
@@ -218,23 +215,23 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
 
 static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
-	struct nvm_block *blk = rblk->parent;
 	struct rrpc_lun *rlun = rblk->rlun;
 	struct nvm_lun *lun = rlun->parent;
 
 	spin_lock(&rlun->lock);
-	if (blk->state & NVM_BLK_ST_TGT) {
-		list_move_tail(&blk->list, &lun->free_list);
-		lun->nr_free_blocks++;
-		blk->state = NVM_BLK_ST_FREE;
-	} else if (blk->state & NVM_BLK_ST_BAD) {
-		list_move_tail(&blk->list, &lun->bb_list);
-		blk->state = NVM_BLK_ST_BAD;
+	if (rblk->state & NVM_BLK_ST_TGT) {
+		list_move_tail(&rblk->list, &rlun->free_list);
+		rlun->nr_free_blocks++;
+		rblk->state = NVM_BLK_ST_FREE;
+	} else if (rblk->state & NVM_BLK_ST_BAD) {
+		list_move_tail(&rblk->list, &rlun->bb_list);
+		rblk->state = NVM_BLK_ST_BAD;
 	} else {
 		WARN_ON_ONCE(1);
-		pr_err("rrpc: erroneous block type (%lu -> %u)\n",
-							blk->id, blk->state);
-		list_move_tail(&blk->list, &lun->bb_list);
+		pr_err("rrpc: erroneous type (ch:%d,lun:%d,blk%d-> %u)\n",
+						lun->chnl_id, lun->lun_id,
+						rblk->id, rblk->state);
+		list_move_tail(&rblk->list, &rlun->bb_list);
 	}
 	spin_unlock(&rlun->lock);
 }
@@ -334,7 +331,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 					    nr_sec_per_blk)) < nr_sec_per_blk) {
 
 		/* Lock laddr */
-		phys_addr = rblk->parent->id * nr_sec_per_blk + slot;
+		phys_addr = rrpc_blk_to_ppa(rrpc, rblk) + slot;
 
 try:
 		spin_lock(&rrpc->rev_lock);
@@ -422,14 +419,22 @@ static void rrpc_block_gc(struct work_struct *work)
 	struct rrpc_block *rblk = gcb->rblk;
 	struct rrpc_lun *rlun = rblk->rlun;
 	struct nvm_tgt_dev *dev = rrpc->dev;
+	struct ppa_addr ppa;
 
 	mempool_free(gcb, rrpc->gcb_pool);
-	pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
+	pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' being reclaimed\n",
+			rlun->parent->chnl_id, rlun->parent->lun_id,
+			rblk->id);
 
 	if (rrpc_move_valid_pages(rrpc, rblk))
 		goto put_back;
 
-	if (nvm_erase_blk(dev->parent, rblk->parent, 0))
+	ppa.ppa = 0;
+	ppa.g.ch = rlun->parent->chnl_id;
+	ppa.g.lun = rlun->parent->lun_id;
+	ppa.g.blk = rblk->id;
+
+	if (nvm_erase_blk(dev->parent, &ppa, 0))
 		goto put_back;
 
 	rrpc_put_blk(rrpc, rblk);
@@ -445,7 +450,7 @@ put_back:
 /* the block with highest number of invalid pages, will be in the beginning
  * of the list
  */
-static struct rrpc_block *rblock_max_invalid(struct rrpc_block *ra,
+static struct rrpc_block *rblk_max_invalid(struct rrpc_block *ra,
 							struct rrpc_block *rb)
 {
 	if (ra->nr_invalid_pages == rb->nr_invalid_pages)
@@ -460,13 +465,13 @@ static struct rrpc_block *rblock_max_invalid(struct rrpc_block *ra,
 static struct rrpc_block *block_prio_find_max(struct rrpc_lun *rlun)
 {
 	struct list_head *prio_list = &rlun->prio_list;
-	struct rrpc_block *rblock, *max;
+	struct rrpc_block *rblk, *max;
 
 	BUG_ON(list_empty(prio_list));
 
 	max = list_first_entry(prio_list, struct rrpc_block, prio);
-	list_for_each_entry(rblock, prio_list, prio)
-		max = rblock_max_invalid(max, rblock);
+	list_for_each_entry(rblk, prio_list, prio)
+		max = rblk_max_invalid(max, rblk);
 
 	return max;
 }
@@ -476,7 +481,6 @@ static void rrpc_lun_gc(struct work_struct *work)
 	struct rrpc_lun *rlun = container_of(work, struct rrpc_lun, ws_gc);
 	struct rrpc *rrpc = rlun->rrpc;
 	struct nvm_tgt_dev *dev = rrpc->dev;
-	struct nvm_lun *lun = rlun->parent;
 	struct rrpc_block_gc *gcb;
 	unsigned int nr_blocks_need;
 
@@ -486,26 +490,28 @@ static void rrpc_lun_gc(struct work_struct *work)
 		nr_blocks_need = rrpc->nr_luns;
 
 	spin_lock(&rlun->lock);
-	while (nr_blocks_need > lun->nr_free_blocks &&
+	while (nr_blocks_need > rlun->nr_free_blocks &&
 					!list_empty(&rlun->prio_list)) {
-		struct rrpc_block *rblock = block_prio_find_max(rlun);
-		struct nvm_block *block = rblock->parent;
+		struct rrpc_block *rblk = block_prio_find_max(rlun);
 
-		if (!rblock->nr_invalid_pages)
+		if (!rblk->nr_invalid_pages)
 			break;
 
 		gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
 		if (!gcb)
 			break;
 
-		list_del_init(&rblock->prio);
+		list_del_init(&rblk->prio);
 
-		BUG_ON(!block_is_full(rrpc, rblock));
+		WARN_ON(!block_is_full(rrpc, rblk));
 
-		pr_debug("rrpc: selected block '%lu' for GC\n", block->id);
+		pr_debug("rrpc: selected block 'ch:%d,lun:%d,blk:%d' for GC\n",
+					rlun->parent->chnl_id,
+					rlun->parent->lun_id,
+					rblk->id);
 
 		gcb->rrpc = rrpc;
-		gcb->rblk = rblock;
+		gcb->rblk = rblk;
 		INIT_WORK(&gcb->ws_gc, rrpc_block_gc);
 
 		queue_work(rrpc->kgc_wq, &gcb->ws_gc);
@@ -530,8 +536,10 @@ static void rrpc_gc_queue(struct work_struct *work)
 	spin_unlock(&rlun->lock);
 
 	mempool_free(gcb, rrpc->gcb_pool);
-	pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
-							rblk->parent->id);
+	pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' full, allow GC (sched)\n",
+					rlun->parent->chnl_id,
+					rlun->parent->lun_id,
+					rblk->id);
 }
 
 static const struct block_device_operations rrpc_fops = {
@@ -555,8 +563,7 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
 	 * estimate.
 	 */
 	rrpc_for_each_lun(rrpc, rlun, i) {
-		if (rlun->parent->nr_free_blocks >
-					max_free->parent->nr_free_blocks)
+		if (rlun->nr_free_blocks > max_free->nr_free_blocks)
 			max_free = rlun;
 	}
 
@@ -613,14 +620,12 @@ static struct rrpc_addr *rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
 {
 	struct rrpc_lun *rlun;
 	struct rrpc_block *rblk, **cur_rblk;
-	struct nvm_lun *lun;
 	u64 paddr;
 	int gc_force = 0;
 
 	rlun = rrpc_get_lun_rr(rrpc, is_gc);
-	lun = rlun->parent;
 
-	if (!is_gc && lun->nr_free_blocks < rrpc->nr_luns * 4)
+	if (!is_gc && rlun->nr_free_blocks < rrpc->nr_luns * 4)
 		return NULL;
 
 	/*
@@ -701,22 +706,44 @@ static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
 	queue_work(rrpc->kgc_wq, &gcb->ws_gc);
 }
 
-static void __rrpc_mark_bad_block(struct nvm_tgt_dev *dev, struct ppa_addr *ppa)
+static struct rrpc_lun *rrpc_ppa_to_lun(struct rrpc *rrpc, struct ppa_addr p)
 {
-		nvm_mark_blk(dev->parent, *ppa, NVM_BLK_ST_BAD);
-		nvm_set_bb_tbl(dev->parent, ppa, 1, NVM_BLK_T_GRWN_BAD);
+	struct rrpc_lun *rlun = NULL;
+	int i;
+
+	for (i = 0; i < rrpc->nr_luns; i++) {
+		if (rrpc->luns[i].parent->chnl_id == p.g.ch &&
+				rrpc->luns[i].parent->lun_id == p.g.lun) {
+			rlun = &rrpc->luns[i];
+			break;
+		}
+	}
+
+	return rlun;
 }
 
-static void rrpc_mark_bad_block(struct rrpc *rrpc, struct nvm_rq *rqd)
+static void __rrpc_mark_bad_block(struct rrpc *rrpc, struct ppa_addr ppa)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
+	struct rrpc_lun *rlun;
+	struct rrpc_block *rblk;
+
+	rlun = rrpc_ppa_to_lun(rrpc, ppa);
+	rblk = &rlun->blocks[ppa.g.blk];
+	rblk->state = NVM_BLK_ST_BAD;
+
+	nvm_set_bb_tbl(dev->parent, &ppa, 1, NVM_BLK_T_GRWN_BAD);
+}
+
+static void rrpc_mark_bad_block(struct rrpc *rrpc, struct nvm_rq *rqd)
+{
 	void *comp_bits = &rqd->ppa_status;
 	struct ppa_addr ppa, prev_ppa;
 	int nr_ppas = rqd->nr_ppas;
 	int bit;
 
 	if (rqd->nr_ppas == 1)
-		__rrpc_mark_bad_block(dev, &rqd->ppa_addr);
+		__rrpc_mark_bad_block(rrpc, rqd->ppa_addr);
 
 	ppa_set_empty(&prev_ppa);
 	bit = -1;
@@ -725,7 +752,7 @@ static void rrpc_mark_bad_block(struct rrpc *rrpc, struct nvm_rq *rqd)
 		if (ppa_cmp_blk(ppa, prev_ppa))
 			continue;
 
-		__rrpc_mark_bad_block(dev, &ppa);
+		__rrpc_mark_bad_block(rrpc, ppa);
 	}
 }
 
@@ -735,13 +762,11 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
 	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_addr *p;
 	struct rrpc_block *rblk;
-	struct nvm_lun *lun;
 	int cmnt_size, i;
 
 	for (i = 0; i < npages; i++) {
 		p = &rrpc->trans_map[laddr + i];
 		rblk = p->rblk;
-		lun = rblk->parent->lun;
 
 		cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
 		if (unlikely(cmnt_size == dev->geo.sec_per_blk))
@@ -1061,16 +1086,21 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_addr *addr = rrpc->trans_map + slba;
 	struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
+	struct rrpc_lun *rlun;
+	struct rrpc_block *rblk;
 	u64 i;
 
 	for (i = 0; i < nlb; i++) {
+		struct ppa_addr gaddr;
 		u64 pba = le64_to_cpu(entries[i]);
 		unsigned int mod;
+
 		/* LNVM treats address-spaces as silos, LBA and PBA are
 		 * equally large and zero-indexed.
 		 */
 		if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) {
 			pr_err("nvm: L2P data entry is out of bounds!\n");
+			pr_err("nvm: Maybe loaded an old target L2P\n");
 			return -EINVAL;
 		}
 
@@ -1085,6 +1115,25 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 
 		addr[i].addr = pba;
 		raddr[mod].addr = slba + i;
+
+		gaddr = rrpc_ppa_to_gaddr(dev, pba);
+		rlun = rrpc_ppa_to_lun(rrpc, gaddr);
+		if (!rlun) {
+			pr_err("rrpc: l2p corruption on lba %llu\n",
+							slba + i);
+			return -EINVAL;
+		}
+
+		rblk = &rlun->blocks[gaddr.g.blk];
+		if (!rblk->state) {
+			/* at this point, we don't know anything about the
+			 * block. It's up to the FTL on top to re-etablish the
+			 * block state. The block is assumed to be open.
+			 */
+			list_move_tail(&rblk->list, &rlun->used_list);
+			rblk->state = NVM_BLK_ST_TGT;
+			rlun->nr_free_blocks--;
+		}
 	}
 
 	return 0;
@@ -1199,6 +1248,51 @@ static void rrpc_luns_free(struct rrpc *rrpc)
 	kfree(rrpc->luns);
 }
 
+static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun)
+{
+	struct nvm_geo *geo = &dev->geo;
+	struct rrpc_block *rblk;
+	struct ppa_addr ppa;
+	u8 *blks;
+	int nr_blks;
+	int i;
+	int ret;
+
+	nr_blks = geo->blks_per_lun * geo->plane_mode;
+	blks = kmalloc(nr_blks, GFP_KERNEL);
+	if (!blks)
+		return -ENOMEM;
+
+	ppa.ppa = 0;
+	ppa.g.ch = rlun->parent->chnl_id;
+	ppa.g.lun = rlun->parent->lun_id;
+
+	ret = nvm_get_bb_tbl(dev->parent, ppa, blks);
+	if (ret) {
+		pr_err("rrpc: could not get BB table\n");
+		goto out;
+	}
+
+	nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
+	if (nr_blks < 0)
+		return nr_blks;
+
+	rlun->nr_free_blocks = geo->blks_per_lun;
+	for (i = 0; i < nr_blks; i++) {
+		if (blks[i] == NVM_BLK_T_FREE)
+			continue;
+
+		rblk = &rlun->blocks[i];
+		list_move_tail(&rblk->list, &rlun->bb_list);
+		rblk->state = NVM_BLK_ST_BAD;
+		rlun->nr_free_blocks--;
+	}
+
+out:
+	kfree(blks);
+	return ret;
+}
+
 static int rrpc_luns_init(struct rrpc *rrpc, struct list_head *lun_list)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
@@ -1232,16 +1326,26 @@ static int rrpc_luns_init(struct rrpc *rrpc, struct list_head *lun_list)
 			goto err;
 		}
 
+		INIT_LIST_HEAD(&rlun->free_list);
+		INIT_LIST_HEAD(&rlun->used_list);
+		INIT_LIST_HEAD(&rlun->bb_list);
+
 		for (j = 0; j < geo->blks_per_lun; j++) {
 			struct rrpc_block *rblk = &rlun->blocks[j];
-			struct nvm_block *blk = &lun->blocks[j];
 
-			rblk->parent = blk;
+			rblk->id = j;
 			rblk->rlun = rlun;
+			rblk->state = NVM_BLK_T_FREE;
 			INIT_LIST_HEAD(&rblk->prio);
+			INIT_LIST_HEAD(&rblk->list);
 			spin_lock_init(&rblk->lock);
+
+			list_add_tail(&rblk->list, &rlun->free_list);
 		}
 
+		if (rrpc_bb_discovery(dev, rlun))
+			goto err;
+
 		rlun->reserved_blocks = 2; /* for GC only */
 
 		rlun->rrpc = rrpc;
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index 242d4c109eb6..c55cec9a1a26 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -52,8 +52,7 @@ struct rrpc_rq {
 };
 
 struct rrpc_block {
-	unsigned long id;
-	struct nvm_block *parent;
+	int id;				/* id inside of LUN */
 	struct rrpc_lun *rlun;
 
 	struct list_head prio;		/* LUN CG list */
@@ -83,6 +82,16 @@ struct rrpc_lun {
 	struct list_head prio_list;	/* Blocks that may be GC'ed */
 	struct list_head wblk_list;	/* Queued blocks to be written to */
 
+	/* lun block lists */
+	struct list_head used_list;	/* In-use blocks */
+	struct list_head free_list;	/* Not used blocks i.e. released
+					 * and ready for use
+					 */
+	struct list_head bb_list;	/* Bad blocks. Mutually exclusive with
+					 * free_list and used_list
+					 */
+	unsigned int nr_free_blocks;	/* Number of unused blocks */
+
 	struct work_struct ws_gc;
 
 	int reserved_blocks;
@@ -155,14 +164,14 @@ struct rrpc_rev_addr {
 	u64 addr;
 };
 
-static inline struct rrpc_block *rrpc_get_rblk(struct rrpc_lun *rlun,
-								int blk_id)
+static inline u64 rrpc_blk_to_ppa(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
-	struct rrpc *rrpc = rlun->rrpc;
 	struct nvm_tgt_dev *dev = rrpc->dev;
-	int lun_blk = blk_id % dev->geo.blks_per_lun;
+	struct nvm_geo *geo = &dev->geo;
+	struct rrpc_lun *rlun = rblk->rlun;
+	struct nvm_lun *lun = rlun->parent;
 
-	return &rlun->blocks[lun_blk];
+	return (lun->id * geo->sec_per_lun) + (rblk->id * geo->sec_per_blk);
 }
 
 static inline sector_t rrpc_get_laddr(struct bio *bio)
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index ed04fa642371..cc210cc85c6d 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -266,8 +266,6 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
 	return rqdata + 1;
 }
 
-struct nvm_block;
-
 struct nvm_lun {
 	int id;
 
@@ -275,19 +273,6 @@ struct nvm_lun {
 	int chnl_id;
 
 	struct list_head list;
-	spinlock_t lock;
-
-	/* lun block lists */
-	struct list_head used_list;	/* In-use blocks */
-	struct list_head free_list;	/* Not used blocks i.e. released
-					 * and ready for use
-					 */
-	struct list_head bb_list;	/* Bad blocks. Mutually exclusive with
-					 * free_list and used_list
-					 */
-	unsigned int nr_free_blocks;	/* Number of unused blocks */
-
-	struct nvm_block *blocks;
 };
 
 enum {
@@ -296,15 +281,6 @@ enum {
 	NVM_BLK_ST_BAD =	0x8,	/* Bad block */
 };
 
-struct nvm_block {
-	struct list_head list;
-	struct nvm_lun *lun;
-	unsigned long id;
-
-	void *priv;
-	int state;
-};
-
 /* system block cpu representation */
 struct nvm_sb_info {
 	unsigned long		seqnr;
@@ -473,21 +449,6 @@ static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
 	ppa_addr->ppa = ADDR_EMPTY;
 }
 
-static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
-							struct nvm_block *blk)
-{
-	struct nvm_geo *geo = &dev->geo;
-	struct ppa_addr ppa;
-	struct nvm_lun *lun = blk->lun;
-
-	ppa.ppa = 0;
-	ppa.g.blk = blk->id % geo->blks_per_lun;
-	ppa.g.lun = lun->lun_id;
-	ppa.g.ch = lun->chnl_id;
-
-	return ppa;
-}
-
 static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
 {
 	if (ppa_empty(ppa1) || ppa_empty(ppa2))
@@ -539,8 +500,7 @@ typedef void (nvmm_unregister_fn)(struct nvm_dev *);
 typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
 typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
-typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
+typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct ppa_addr *, int);
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
 
@@ -557,9 +517,6 @@ struct nvmm_type {
 	nvmm_submit_io_fn *submit_io;
 	nvmm_erase_blk_fn *erase_blk;
 
-	/* Bad block mgmt */
-	nvmm_mark_blk_fn *mark_blk;
-
 	nvmm_get_area_fn *get_area;
 	nvmm_put_area_fn *put_area;
 
@@ -573,7 +530,6 @@ extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
 
-extern void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type);
 extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
 							int nr_ppas, int type);
 
@@ -584,7 +540,7 @@ extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
-extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *, int);
+extern int nvm_erase_blk(struct nvm_dev *, struct ppa_addr *, int);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
 								void *, int);
-- 
cgit v1.2.3


From 8e53624d44c1de31b1b0d4f500703669418a4c67 Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:10 +0100
Subject: lightnvm: eliminate nvm_lun abstraction in mm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to naturally support multi-target instances on an Open-Channel
SSD, targets should own the LUNs they get blocks from and manage
provisioning internally. This is done in several steps.

Since targets own the LUNs the are instantiated on top of and manage the
free block list internally, there is no need for a LUN abstraction in
the media manager. LUNs are intrinsically managed as in the physical
layout (ch:0,lun:0, ..., ch:0,lun:n, ch:1,lun:0, ch:1,lun:n, ...,
ch:m,lun:0, ch:m,lun:n) and given to the targets based on the target
creation ioctl. This simplifies LUN management and clears the path for a
partition manager to sit directly underneath LightNVM targets.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c      |  14 +-
 drivers/lightnvm/gennvm.c    | 336 ++++++++++++++++++++++++++++++++++---------
 drivers/lightnvm/gennvm.h    |  20 ++-
 drivers/lightnvm/rrpc.c      | 137 +++++++++---------
 drivers/lightnvm/rrpc.h      |  32 ++++-
 drivers/nvme/host/lightnvm.c |   3 +
 include/linux/lightnvm.h     |  41 +++---
 7 files changed, 419 insertions(+), 164 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 691b16ffda88..23d582f82219 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -203,15 +203,19 @@ int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas,
 }
 EXPORT_SYMBOL(nvm_set_bb_tbl);
 
-int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
-	return dev->mt->submit_io(dev, rqd);
+	struct nvm_dev *dev = tgt_dev->parent;
+
+	return dev->mt->submit_io(tgt_dev, rqd);
 }
 EXPORT_SYMBOL(nvm_submit_io);
 
-int nvm_erase_blk(struct nvm_dev *dev, struct ppa_addr *p, int flags)
+int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p, int flags)
 {
-	return dev->mt->erase_blk(dev, p, flags);
+	struct nvm_dev *dev = tgt_dev->parent;
+
+	return dev->mt->erase_blk(tgt_dev, p, flags);
 }
 EXPORT_SYMBOL(nvm_erase_blk);
 
@@ -350,7 +354,7 @@ static int __nvm_submit_ppa(struct nvm_dev *dev, struct nvm_rq *rqd, int opcode,
 
 	nvm_generic_to_addr_mode(dev, rqd);
 
-	rqd->dev = dev;
+	rqd->dev = NULL;
 	rqd->opcode = opcode;
 	rqd->flags = flags;
 	rqd->bio = bio;
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 19c3924ffa10..5d7c8c47bef8 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -38,8 +38,6 @@ static const struct block_device_operations gen_fops = {
 static int gen_reserve_luns(struct nvm_dev *dev, struct nvm_target *t,
 			    int lun_begin, int lun_end)
 {
-	struct gen_dev *gn = dev->mp;
-	struct nvm_lun *lun;
 	int i;
 
 	for (i = lun_begin; i <= lun_end; i++) {
@@ -47,35 +45,50 @@ static int gen_reserve_luns(struct nvm_dev *dev, struct nvm_target *t,
 			pr_err("nvm: lun %d already allocated\n", i);
 			goto err;
 		}
-
-		lun = &gn->luns[i];
-		list_add_tail(&lun->list, &t->lun_list);
 	}
 
 	return 0;
 
 err:
-	while (--i > lun_begin) {
-		lun = &gn->luns[i];
+	while (--i > lun_begin)
 		clear_bit(i, dev->lun_map);
-		list_del(&lun->list);
-	}
 
 	return -EBUSY;
 }
 
-static void gen_release_luns(struct nvm_dev *dev, struct nvm_target *t)
+static void gen_release_luns_err(struct nvm_dev *dev, int lun_begin,
+				 int lun_end)
 {
-	struct nvm_lun *lun, *tmp;
+	int i;
 
-	list_for_each_entry_safe(lun, tmp, &t->lun_list, list) {
-		WARN_ON(!test_and_clear_bit(lun->id, dev->lun_map));
-		list_del(&lun->list);
-	}
+	for (i = lun_begin; i <= lun_end; i++)
+		WARN_ON(!test_and_clear_bit(i, dev->lun_map));
 }
 
 static void gen_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
 {
+	struct nvm_dev *dev = tgt_dev->parent;
+	struct gen_dev_map *dev_map = tgt_dev->map;
+	int i, j;
+
+	for (i = 0; i < dev_map->nr_chnls; i++) {
+		struct gen_ch_map *ch_map = &dev_map->chnls[i];
+		int *lun_offs = ch_map->lun_offs;
+		int ch = i + ch_map->ch_off;
+
+		for (j = 0; j < ch_map->nr_luns; j++) {
+			int lun = j + lun_offs[j];
+			int lunid = (ch * dev->geo.luns_per_chnl) + lun;
+
+			WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
+		}
+
+		kfree(ch_map->lun_offs);
+	}
+
+	kfree(dev_map->chnls);
+	kfree(dev_map);
+	kfree(tgt_dev->luns);
 	kfree(tgt_dev);
 }
 
@@ -83,24 +96,103 @@ static struct nvm_tgt_dev *gen_create_tgt_dev(struct nvm_dev *dev,
 					      int lun_begin, int lun_end)
 {
 	struct nvm_tgt_dev *tgt_dev = NULL;
+	struct gen_dev_map *dev_rmap = dev->rmap;
+	struct gen_dev_map *dev_map;
+	struct ppa_addr *luns;
 	int nr_luns = lun_end - lun_begin + 1;
+	int luns_left = nr_luns;
+	int nr_chnls = nr_luns / dev->geo.luns_per_chnl;
+	int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl;
+	int bch = lun_begin / dev->geo.luns_per_chnl;
+	int blun = lun_begin % dev->geo.luns_per_chnl;
+	int lunid = 0;
+	int lun_balanced = 1;
+	int prev_nr_luns;
+	int i, j;
+
+	nr_chnls = nr_luns / dev->geo.luns_per_chnl;
+	nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1;
+
+	dev_map = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL);
+	if (!dev_map)
+		goto err_dev;
+
+	dev_map->chnls = kcalloc(nr_chnls, sizeof(struct gen_ch_map),
+								GFP_KERNEL);
+	if (!dev_map->chnls)
+		goto err_chnls;
+
+	luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL);
+	if (!luns)
+		goto err_luns;
+
+	prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ?
+					dev->geo.luns_per_chnl : luns_left;
+	for (i = 0; i < nr_chnls; i++) {
+		struct gen_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
+		int *lun_roffs = ch_rmap->lun_offs;
+		struct gen_ch_map *ch_map = &dev_map->chnls[i];
+		int *lun_offs;
+		int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ?
+					dev->geo.luns_per_chnl : luns_left;
+
+		if (lun_balanced && prev_nr_luns != luns_in_chnl)
+			lun_balanced = 0;
+
+		ch_map->ch_off = ch_rmap->ch_off = bch;
+		ch_map->nr_luns = luns_in_chnl;
+
+		lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
+		if (!lun_offs)
+			goto err_ch;
+
+		for (j = 0; j < luns_in_chnl; j++) {
+			luns[lunid].ppa = 0;
+			luns[lunid].g.ch = i;
+			luns[lunid++].g.lun = j;
+
+			lun_offs[j] = blun;
+			lun_roffs[j + blun] = blun;
+		}
+
+		ch_map->lun_offs = lun_offs;
+
+		/* when starting a new channel, lun offset is reset */
+		blun = 0;
+		luns_left -= luns_in_chnl;
+	}
+
+	dev_map->nr_chnls = nr_chnls;
 
 	tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
 	if (!tgt_dev)
-		goto out;
+		goto err_ch;
 
 	memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
-	tgt_dev->geo.nr_chnls = (nr_luns / (dev->geo.luns_per_chnl + 1)) + 1;
+	/* Target device only owns a portion of the physical device */
+	tgt_dev->geo.nr_chnls = nr_chnls;
 	tgt_dev->geo.nr_luns = nr_luns;
+	tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1;
 	tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun;
 	tgt_dev->q = dev->q;
 	tgt_dev->ops = dev->ops;
 	tgt_dev->mt = dev->mt;
+	tgt_dev->map = dev_map;
+	tgt_dev->luns = luns;
 	memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
 
 	tgt_dev->parent = dev;
 
-out:
+	return tgt_dev;
+err_ch:
+	while (--i > 0)
+		kfree(dev_map->chnls[i].lun_offs);
+	kfree(luns);
+err_luns:
+	kfree(dev_map->chnls);
+err_chnls:
+	kfree(dev_map);
+err_dev:
 	return tgt_dev;
 }
 
@@ -134,14 +226,14 @@ static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	if (!t)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&t->lun_list);
-
 	if (gen_reserve_luns(dev, t, s->lun_begin, s->lun_end))
 		goto err_t;
 
 	tgt_dev = gen_create_tgt_dev(dev, s->lun_begin, s->lun_end);
-	if (!tgt_dev)
+	if (!tgt_dev) {
+		pr_err("nvm: could not create target device\n");
 		goto err_reserve;
+	}
 
 	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
 	if (!tqueue)
@@ -159,7 +251,7 @@ static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	tdisk->fops = &gen_fops;
 	tdisk->queue = tqueue;
 
-	targetdata = tt->init(tgt_dev, tdisk, &t->lun_list);
+	targetdata = tt->init(tgt_dev, tdisk);
 	if (IS_ERR(targetdata))
 		goto err_init;
 
@@ -187,7 +279,7 @@ err_queue:
 err_dev:
 	kfree(tgt_dev);
 err_reserve:
-	gen_release_luns(dev, t);
+	gen_release_luns_err(dev, s->lun_begin, s->lun_end);
 err_t:
 	kfree(t);
 	return -ENOMEM;
@@ -205,7 +297,6 @@ static void __gen_remove_target(struct nvm_target *t)
 	if (tt->exit)
 		tt->exit(tdisk->private_data);
 
-	gen_release_luns(t->dev->parent, t);
 	gen_remove_tgt_dev(t->dev);
 	put_disk(tdisk);
 
@@ -306,51 +397,54 @@ static void gen_put_area(struct nvm_dev *dev, sector_t begin)
 	spin_unlock(&dev->lock);
 }
 
-static void gen_luns_free(struct nvm_dev *dev)
-{
-	struct gen_dev *gn = dev->mp;
-
-	kfree(gn->luns);
-}
-
-static int gen_luns_init(struct nvm_dev *dev, struct gen_dev *gn)
-{
-	struct nvm_geo *geo = &dev->geo;
-	struct nvm_lun *lun;
-	int i;
-
-	gn->luns = kcalloc(geo->nr_luns, sizeof(struct nvm_lun), GFP_KERNEL);
-	if (!gn->luns)
-		return -ENOMEM;
-
-	gen_for_each_lun(gn, lun, i) {
-		INIT_LIST_HEAD(&lun->list);
-
-		lun->id = i;
-		lun->lun_id = i % geo->luns_per_chnl;
-		lun->chnl_id = i / geo->luns_per_chnl;
-	}
-	return 0;
-}
-
 static void gen_free(struct nvm_dev *dev)
 {
-	gen_luns_free(dev);
 	kfree(dev->mp);
+	kfree(dev->rmap);
 	dev->mp = NULL;
 }
 
 static int gen_register(struct nvm_dev *dev)
 {
 	struct gen_dev *gn;
-	int ret;
+	struct gen_dev_map *dev_rmap;
+	int i, j;
 
 	if (!try_module_get(THIS_MODULE))
 		return -ENODEV;
 
 	gn = kzalloc(sizeof(struct gen_dev), GFP_KERNEL);
 	if (!gn)
-		return -ENOMEM;
+		goto err_gn;
+
+	dev_rmap = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL);
+	if (!dev_rmap)
+		goto err_rmap;
+
+	dev_rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct gen_ch_map),
+								GFP_KERNEL);
+	if (!dev_rmap->chnls)
+		goto err_chnls;
+
+	for (i = 0; i < dev->geo.nr_chnls; i++) {
+		struct gen_ch_map *ch_rmap;
+		int *lun_roffs;
+		int luns_in_chnl = dev->geo.luns_per_chnl;
+
+		ch_rmap = &dev_rmap->chnls[i];
+
+		ch_rmap->ch_off = -1;
+		ch_rmap->nr_luns = luns_in_chnl;
+
+		lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
+		if (!lun_roffs)
+			goto err_ch;
+
+		for (j = 0; j < luns_in_chnl; j++)
+			lun_roffs[j] = -1;
+
+		ch_rmap->lun_offs = lun_roffs;
+	}
 
 	gn->dev = dev;
 	gn->nr_luns = dev->geo.nr_luns;
@@ -358,18 +452,19 @@ static int gen_register(struct nvm_dev *dev)
 	mutex_init(&gn->lock);
 	INIT_LIST_HEAD(&gn->targets);
 	dev->mp = gn;
-
-	ret = gen_luns_init(dev, gn);
-	if (ret) {
-		pr_err("gen: could not initialize luns\n");
-		goto err;
-	}
+	dev->rmap = dev_rmap;
 
 	return 1;
-err:
+err_ch:
+	while (--i >= 0)
+		kfree(dev_rmap->chnls[i].lun_offs);
+err_chnls:
+	kfree(dev_rmap);
+err_rmap:
 	gen_free(dev);
+err_gn:
 	module_put(THIS_MODULE);
-	return ret;
+	return -ENOMEM;
 }
 
 static void gen_unregister(struct nvm_dev *dev)
@@ -389,29 +484,137 @@ static void gen_unregister(struct nvm_dev *dev)
 	module_put(THIS_MODULE);
 }
 
+enum {
+	TRANS_TGT_TO_DEV =	0x0,
+	TRANS_DEV_TO_TGT =	0x1,
+};
+
+
+static int gen_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
+{
+	struct gen_dev_map *dev_map = tgt_dev->map;
+	struct gen_ch_map *ch_map = &dev_map->chnls[p->g.ch];
+	int lun_off = ch_map->lun_offs[p->g.lun];
+	struct nvm_dev *dev = tgt_dev->parent;
+	struct gen_dev_map *dev_rmap = dev->rmap;
+	struct gen_ch_map *ch_rmap;
+	int lun_roff;
+
+	p->g.ch += ch_map->ch_off;
+	p->g.lun += lun_off;
+
+	ch_rmap = &dev_rmap->chnls[p->g.ch];
+	lun_roff = ch_rmap->lun_offs[p->g.lun];
+
+	if (unlikely(ch_rmap->ch_off < 0 || lun_roff < 0)) {
+		pr_err("nvm: corrupted device partition table\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int gen_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
+{
+	struct nvm_dev *dev = tgt_dev->parent;
+	struct gen_dev_map *dev_rmap = dev->rmap;
+	struct gen_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch];
+	int lun_roff = ch_rmap->lun_offs[p->g.lun];
+
+	p->g.ch -= ch_rmap->ch_off;
+	p->g.lun -= lun_roff;
+
+	return 0;
+}
+
+static int gen_trans_rq(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
+			int flag)
+{
+	gen_trans_fn *f;
+	int i;
+	int ret = 0;
+
+	f = (flag == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt;
+
+	if (rqd->nr_ppas == 1)
+		return f(tgt_dev, &rqd->ppa_addr);
+
+	for (i = 0; i < rqd->nr_ppas; i++) {
+		ret = f(tgt_dev, &rqd->ppa_list[i]);
+		if (ret)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
 static void gen_end_io(struct nvm_rq *rqd)
 {
+	struct nvm_tgt_dev *tgt_dev = rqd->dev;
 	struct nvm_tgt_instance *ins = rqd->ins;
 
+	/* Convert address space */
+	if (tgt_dev)
+		gen_trans_rq(tgt_dev, rqd, TRANS_DEV_TO_TGT);
+
 	ins->tt->end_io(rqd);
 }
 
-static int gen_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+static int gen_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
+	struct nvm_dev *dev = tgt_dev->parent;
+
 	if (!dev->ops->submit_io)
 		return -ENODEV;
 
 	/* Convert address space */
+	gen_trans_rq(tgt_dev, rqd, TRANS_TGT_TO_DEV);
 	nvm_generic_to_addr_mode(dev, rqd);
 
-	rqd->dev = dev;
+	rqd->dev = tgt_dev;
 	rqd->end_io = gen_end_io;
 	return dev->ops->submit_io(dev, rqd);
 }
 
-static int gen_erase_blk(struct nvm_dev *dev, struct ppa_addr *p, int flags)
+static int gen_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p,
+			 int flags)
 {
-	return nvm_erase_ppa(dev, p, 1, flags);
+	/* Convert address space */
+	gen_map_to_dev(tgt_dev, p);
+
+	return nvm_erase_ppa(tgt_dev->parent, p, 1, flags);
+}
+
+static void gen_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
+			       int len)
+{
+	struct nvm_geo *geo = &dev->geo;
+	struct gen_dev_map *dev_rmap = dev->rmap;
+	u64 i;
+
+	for (i = 0; i < len; i++) {
+		struct gen_ch_map *ch_rmap;
+		int *lun_roffs;
+		struct ppa_addr gaddr;
+		u64 pba = le64_to_cpu(entries[i]);
+		int off;
+		u64 diff;
+
+		if (!pba)
+			continue;
+
+		gaddr = linear_to_generic_addr(geo, pba);
+		ch_rmap = &dev_rmap->chnls[gaddr.g.ch];
+		lun_roffs = ch_rmap->lun_offs;
+
+		off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun;
+
+		diff = ((ch_rmap->ch_off * geo->luns_per_chnl) +
+				(lun_roffs[gaddr.g.lun])) * geo->sec_per_lun;
+
+		entries[i] -= cpu_to_le64(diff);
+	}
 }
 
 static struct nvmm_type gen = {
@@ -430,6 +633,7 @@ static struct nvmm_type gen = {
 	.get_area		= gen_get_area,
 	.put_area		= gen_put_area,
 
+	.part_to_tgt		= gen_part_to_tgt,
 };
 
 static int __init gen_module_init(void)
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
index d167f391fbae..6a4b3f368848 100644
--- a/drivers/lightnvm/gennvm.h
+++ b/drivers/lightnvm/gennvm.h
@@ -24,19 +24,37 @@ struct gen_dev {
 	struct nvm_dev *dev;
 
 	int nr_luns;
-	struct nvm_lun *luns;
 	struct list_head area_list;
 
 	struct mutex lock;
 	struct list_head targets;
 };
 
+/* Map between virtual and physical channel and lun */
+struct gen_ch_map {
+	int ch_off;
+	int nr_luns;
+	int *lun_offs;
+};
+
+struct gen_dev_map {
+	struct gen_ch_map *chnls;
+	int nr_chnls;
+};
+
 struct gen_area {
 	struct list_head list;
 	sector_t begin;
 	sector_t end;	/* end is excluded */
 };
 
+static inline void *ch_map_to_lun_offs(struct gen_ch_map *ch_map)
+{
+	return ch_map + 1;
+}
+
+typedef int (gen_trans_fn)(struct nvm_tgt_dev *, struct ppa_addr *);
+
 #define gen_for_each_lun(bm, lun, i) \
 		for ((i) = 0, lun = &(bm)->luns[0]; \
 			(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index a352285dffc0..75ed12ae0f47 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -45,7 +45,7 @@ static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
 
 	spin_unlock(&rblk->lock);
 
-	rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
+	rrpc->rev_trans_map[a->addr].addr = ADDR_EMPTY;
 }
 
 static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
@@ -127,28 +127,25 @@ static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_lun *rlun = rblk->rlun;
-	struct nvm_lun *lun = rlun->parent;
 
-	return lun->id * dev->geo.sec_per_blk;
+	return rlun->id * dev->geo.sec_per_blk;
 }
 
-/* Calculate global addr for the given block */
-static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
+static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_tgt_dev *dev,
+					 struct rrpc_addr *gp)
 {
-	struct nvm_tgt_dev *dev = rrpc->dev;
-	struct nvm_geo *geo = &dev->geo;
+	struct rrpc_block *rblk = gp->rblk;
 	struct rrpc_lun *rlun = rblk->rlun;
-	struct nvm_lun *lun = rlun->parent;
-
-	return lun->id * geo->sec_per_lun + rblk->id * geo->sec_per_blk;
-}
-
-static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_tgt_dev *dev, u64 addr)
-{
+	u64 addr = gp->addr;
 	struct ppa_addr paddr;
 
 	paddr.ppa = addr;
-	return linear_to_generic_addr(&dev->geo, paddr);
+	paddr = rrpc_linear_to_generic_addr(&dev->geo, paddr);
+	paddr.g.ch = rlun->bppa.g.ch;
+	paddr.g.lun = rlun->bppa.g.lun;
+	paddr.g.blk = rblk->id;
+
+	return paddr;
 }
 
 /* requires lun->lock taken */
@@ -216,7 +213,6 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
 static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
 	struct rrpc_lun *rlun = rblk->rlun;
-	struct nvm_lun *lun = rlun->parent;
 
 	spin_lock(&rlun->lock);
 	if (rblk->state & NVM_BLK_ST_TGT) {
@@ -229,8 +225,8 @@ static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
 	} else {
 		WARN_ON_ONCE(1);
 		pr_err("rrpc: erroneous type (ch:%d,lun:%d,blk%d-> %u)\n",
-						lun->chnl_id, lun->lun_id,
-						rblk->id, rblk->state);
+					rlun->bppa.g.ch, rlun->bppa.g.lun,
+					rblk->id, rblk->state);
 		list_move_tail(&rblk->list, &rlun->bb_list);
 	}
 	spin_unlock(&rlun->lock);
@@ -336,7 +332,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 try:
 		spin_lock(&rrpc->rev_lock);
 		/* Get logical address from physical to logical table */
-		rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
+		rev = &rrpc->rev_trans_map[phys_addr];
 		/* already updated by previous regular write */
 		if (rev->addr == ADDR_EMPTY) {
 			spin_unlock(&rrpc->rev_lock);
@@ -423,18 +419,18 @@ static void rrpc_block_gc(struct work_struct *work)
 
 	mempool_free(gcb, rrpc->gcb_pool);
 	pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' being reclaimed\n",
-			rlun->parent->chnl_id, rlun->parent->lun_id,
+			rlun->bppa.g.ch, rlun->bppa.g.lun,
 			rblk->id);
 
 	if (rrpc_move_valid_pages(rrpc, rblk))
 		goto put_back;
 
 	ppa.ppa = 0;
-	ppa.g.ch = rlun->parent->chnl_id;
-	ppa.g.lun = rlun->parent->lun_id;
+	ppa.g.ch = rlun->bppa.g.ch;
+	ppa.g.lun = rlun->bppa.g.lun;
 	ppa.g.blk = rblk->id;
 
-	if (nvm_erase_blk(dev->parent, &ppa, 0))
+	if (nvm_erase_blk(dev, &ppa, 0))
 		goto put_back;
 
 	rrpc_put_blk(rrpc, rblk);
@@ -506,8 +502,7 @@ static void rrpc_lun_gc(struct work_struct *work)
 		WARN_ON(!block_is_full(rrpc, rblk));
 
 		pr_debug("rrpc: selected block 'ch:%d,lun:%d,blk:%d' for GC\n",
-					rlun->parent->chnl_id,
-					rlun->parent->lun_id,
+					rlun->bppa.g.ch, rlun->bppa.g.lun,
 					rblk->id);
 
 		gcb->rrpc = rrpc;
@@ -537,8 +532,7 @@ static void rrpc_gc_queue(struct work_struct *work)
 
 	mempool_free(gcb, rrpc->gcb_pool);
 	pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' full, allow GC (sched)\n",
-					rlun->parent->chnl_id,
-					rlun->parent->lun_id,
+					rlun->bppa.g.ch, rlun->bppa.g.lun,
 					rblk->id);
 }
 
@@ -586,7 +580,7 @@ static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
 	gp->addr = paddr;
 	gp->rblk = rblk;
 
-	rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
+	rev = &rrpc->rev_trans_map[gp->addr];
 	rev->addr = laddr;
 	spin_unlock(&rrpc->rev_lock);
 
@@ -601,7 +595,7 @@ static u64 rrpc_alloc_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 	if (block_is_full(rrpc, rblk))
 		goto out;
 
-	addr = block_to_addr(rrpc, rblk) + rblk->next_page;
+	addr = rblk->next_page;
 
 	rblk->next_page++;
 out:
@@ -615,18 +609,22 @@ out:
  * Returns rrpc_addr with the physical address and block. Returns NULL if no
  * blocks in the next rlun are available.
  */
-static struct rrpc_addr *rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
+static struct ppa_addr rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
 								int is_gc)
 {
+	struct nvm_tgt_dev *tgt_dev = rrpc->dev;
 	struct rrpc_lun *rlun;
 	struct rrpc_block *rblk, **cur_rblk;
+	struct rrpc_addr *p;
+	struct ppa_addr ppa;
 	u64 paddr;
 	int gc_force = 0;
 
+	ppa.ppa = ADDR_EMPTY;
 	rlun = rrpc_get_lun_rr(rrpc, is_gc);
 
 	if (!is_gc && rlun->nr_free_blocks < rrpc->nr_luns * 4)
-		return NULL;
+		return ppa;
 
 	/*
 	 * page allocation steps:
@@ -683,10 +681,15 @@ new_blk:
 	}
 
 	pr_err("rrpc: failed to allocate new block\n");
-	return NULL;
+	return ppa;
 done:
 	spin_unlock(&rlun->lock);
-	return rrpc_update_map(rrpc, laddr, rblk, paddr);
+	p = rrpc_update_map(rrpc, laddr, rblk, paddr);
+	if (!p)
+		return ppa;
+
+	/* return global address */
+	return rrpc_ppa_to_gaddr(tgt_dev, p);
 }
 
 static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
@@ -712,8 +715,8 @@ static struct rrpc_lun *rrpc_ppa_to_lun(struct rrpc *rrpc, struct ppa_addr p)
 	int i;
 
 	for (i = 0; i < rrpc->nr_luns; i++) {
-		if (rrpc->luns[i].parent->chnl_id == p.g.ch &&
-				rrpc->luns[i].parent->lun_id == p.g.lun) {
+		if (rrpc->luns[i].bppa.g.ch == p.g.ch &&
+				rrpc->luns[i].bppa.g.lun == p.g.lun) {
 			rlun = &rrpc->luns[i];
 			break;
 		}
@@ -823,7 +826,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 		gp = &rrpc->trans_map[laddr + i];
 
 		if (gp->rblk) {
-			rqd->ppa_list[i] = rrpc_ppa_to_gaddr(dev, gp->addr);
+			rqd->ppa_list[i] = rrpc_ppa_to_gaddr(dev, gp);
 		} else {
 			BUG_ON(is_gc);
 			rrpc_unlock_laddr(rrpc, r);
@@ -852,7 +855,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
 	gp = &rrpc->trans_map[laddr];
 
 	if (gp->rblk) {
-		rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
+		rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp);
 	} else {
 		BUG_ON(is_gc);
 		rrpc_unlock_rq(rrpc, rqd);
@@ -869,7 +872,7 @@ static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
-	struct rrpc_addr *p;
+	struct ppa_addr p;
 	sector_t laddr = rrpc_get_laddr(bio);
 	int is_gc = flags & NVM_IOTYPE_GC;
 	int i;
@@ -882,7 +885,7 @@ static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 	for (i = 0; i < npages; i++) {
 		/* We assume that mapping occurs at 4KB granularity */
 		p = rrpc_map_page(rrpc, laddr + i, is_gc);
-		if (!p) {
+		if (p.ppa == ADDR_EMPTY) {
 			BUG_ON(is_gc);
 			rrpc_unlock_laddr(rrpc, r);
 			nvm_dev_dma_free(dev->parent, rqd->ppa_list,
@@ -891,7 +894,7 @@ static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 			return NVM_IO_REQUEUE;
 		}
 
-		rqd->ppa_list[i] = rrpc_ppa_to_gaddr(dev, p->addr);
+		rqd->ppa_list[i] = p;
 	}
 
 	rqd->opcode = NVM_OP_HBWRITE;
@@ -902,7 +905,7 @@ static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio,
 				struct nvm_rq *rqd, unsigned long flags)
 {
-	struct rrpc_addr *p;
+	struct ppa_addr p;
 	int is_gc = flags & NVM_IOTYPE_GC;
 	sector_t laddr = rrpc_get_laddr(bio);
 
@@ -910,14 +913,14 @@ static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio,
 		return NVM_IO_REQUEUE;
 
 	p = rrpc_map_page(rrpc, laddr, is_gc);
-	if (!p) {
+	if (p.ppa == ADDR_EMPTY) {
 		BUG_ON(is_gc);
 		rrpc_unlock_rq(rrpc, rqd);
 		rrpc_gc_kick(rrpc);
 		return NVM_IO_REQUEUE;
 	}
 
-	rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, p->addr);
+	rqd->ppa_addr = p;
 	rqd->opcode = NVM_OP_HBWRITE;
 
 	return NVM_IO_OK;
@@ -973,7 +976,7 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
 	rqd->nr_ppas = nr_pages;
 	rrq->flags = flags;
 
-	err = nvm_submit_io(dev->parent, rqd);
+	err = nvm_submit_io(dev, rqd);
 	if (err) {
 		pr_err("rrpc: I/O submission failed: %d\n", err);
 		bio_put(bio);
@@ -1113,10 +1116,7 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 
 		div_u64_rem(pba, rrpc->nr_sects, &mod);
 
-		addr[i].addr = pba;
-		raddr[mod].addr = slba + i;
-
-		gaddr = rrpc_ppa_to_gaddr(dev, pba);
+		gaddr = rrpc_recov_addr(dev->parent, pba);
 		rlun = rrpc_ppa_to_lun(rrpc, gaddr);
 		if (!rlun) {
 			pr_err("rrpc: l2p corruption on lba %llu\n",
@@ -1134,6 +1134,10 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 			rblk->state = NVM_BLK_ST_TGT;
 			rlun->nr_free_blocks--;
 		}
+
+		addr[i].addr = pba;
+		addr[i].rblk = rblk;
+		raddr[mod].addr = slba + i;
 	}
 
 	return 0;
@@ -1230,7 +1234,6 @@ static void rrpc_core_free(struct rrpc *rrpc)
 
 static void rrpc_luns_free(struct rrpc *rrpc)
 {
-	struct nvm_lun *lun;
 	struct rrpc_lun *rlun;
 	int i;
 
@@ -1239,9 +1242,6 @@ static void rrpc_luns_free(struct rrpc *rrpc)
 
 	for (i = 0; i < rrpc->nr_luns; i++) {
 		rlun = &rrpc->luns[i];
-		lun = rlun->parent;
-		if (!lun)
-			break;
 		vfree(rlun->blocks);
 	}
 
@@ -1264,8 +1264,8 @@ static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun)
 		return -ENOMEM;
 
 	ppa.ppa = 0;
-	ppa.g.ch = rlun->parent->chnl_id;
-	ppa.g.lun = rlun->parent->lun_id;
+	ppa.g.ch = rlun->bppa.g.ch;
+	ppa.g.lun = rlun->bppa.g.lun;
 
 	ret = nvm_get_bb_tbl(dev->parent, ppa, blks);
 	if (ret) {
@@ -1293,11 +1293,17 @@ out:
 	return ret;
 }
 
-static int rrpc_luns_init(struct rrpc *rrpc, struct list_head *lun_list)
+static void rrpc_set_lun_ppa(struct rrpc_lun *rlun, struct ppa_addr ppa)
+{
+	rlun->bppa.ppa = 0;
+	rlun->bppa.g.ch = ppa.g.ch;
+	rlun->bppa.g.lun = ppa.g.lun;
+}
+
+static int rrpc_luns_init(struct rrpc *rrpc, struct ppa_addr *luns)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct nvm_lun *lun;
 	struct rrpc_lun *rlun;
 	int i, j, ret = -EINVAL;
 
@@ -1313,12 +1319,11 @@ static int rrpc_luns_init(struct rrpc *rrpc, struct list_head *lun_list)
 	if (!rrpc->luns)
 		return -ENOMEM;
 
-	i = 0;
-
 	/* 1:1 mapping */
-	list_for_each_entry(lun, lun_list, list) {
-		rlun = &rrpc->luns[i++];
-		rlun->parent = lun;
+	for (i = 0; i < rrpc->nr_luns; i++) {
+		rlun = &rrpc->luns[i];
+		rlun->id = i;
+		rrpc_set_lun_ppa(rlun, luns[i]);
 		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
 							geo->blks_per_lun);
 		if (!rlun->blocks) {
@@ -1356,8 +1361,6 @@ static int rrpc_luns_init(struct rrpc *rrpc, struct list_head *lun_list)
 		spin_lock_init(&rlun->lock);
 	}
 
-	WARN_ON(i != rrpc->nr_luns);
-
 	return 0;
 err:
 	return ret;
@@ -1511,14 +1514,12 @@ err:
 
 static struct nvm_tgt_type tt_rrpc;
 
-static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
-						struct list_head *lun_list)
+static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk)
 {
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
 	struct nvm_geo *geo = &dev->geo;
 	struct rrpc *rrpc;
-	int lun_begin = (list_first_entry(lun_list, struct nvm_lun, list))->id;
 	sector_t soffset;
 	int ret;
 
@@ -1553,14 +1554,12 @@ static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 	}
 	rrpc->soffset = soffset;
 
-	ret = rrpc_luns_init(rrpc, lun_list);
+	ret = rrpc_luns_init(rrpc, dev->luns);
 	if (ret) {
 		pr_err("nvm: rrpc: could not initialize luns\n");
 		goto err;
 	}
 
-	rrpc->poffset = geo->sec_per_lun * lun_begin;
-
 	ret = rrpc_core_init(rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not initialize core\n");
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index c55cec9a1a26..bc8adba4d63f 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -74,7 +74,9 @@ struct rrpc_block {
 
 struct rrpc_lun {
 	struct rrpc *rrpc;
-	struct nvm_lun *parent;
+
+	int id;
+	struct ppa_addr bppa;
 
 	struct rrpc_block *cur, *gc_cur;
 	struct rrpc_block *blocks;	/* Reference to block allocation */
@@ -107,7 +109,6 @@ struct rrpc {
 	struct gendisk *disk;
 
 	sector_t soffset; /* logical sector offset */
-	u64 poffset; /* physical page offset */
 
 	int nr_luns;
 	struct rrpc_lun *luns;
@@ -164,14 +165,37 @@ struct rrpc_rev_addr {
 	u64 addr;
 };
 
+static inline struct ppa_addr rrpc_linear_to_generic_addr(struct nvm_geo *geo,
+							  struct ppa_addr r)
+{
+	struct ppa_addr l;
+	int secs, pgs;
+	sector_t ppa = r.ppa;
+
+	l.ppa = 0;
+
+	div_u64_rem(ppa, geo->sec_per_pg, &secs);
+	l.g.sec = secs;
+
+	sector_div(ppa, geo->sec_per_pg);
+	div_u64_rem(ppa, geo->pgs_per_blk, &pgs);
+	l.g.pg = pgs;
+
+	return l;
+}
+
+static inline struct ppa_addr rrpc_recov_addr(struct nvm_dev *dev, u64 pba)
+{
+	return linear_to_generic_addr(&dev->geo, pba);
+}
+
 static inline u64 rrpc_blk_to_ppa(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct rrpc_lun *rlun = rblk->rlun;
-	struct nvm_lun *lun = rlun->parent;
 
-	return (lun->id * geo->sec_per_lun) + (rblk->id * geo->sec_per_blk);
+	return (rlun->id * geo->sec_per_lun) + (rblk->id * geo->sec_per_blk);
 }
 
 static inline sector_t rrpc_get_laddr(struct bio *bio)
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 1cdc8124c8c0..588d4a34c083 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -371,6 +371,9 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
 			return -EINVAL;
 		}
 
+		/* Transform physical address to target address space */
+		nvmdev->mt->part_to_tgt(nvmdev, entries, cmd_nlb);
+
 		if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
 			ret = -EINTR;
 			goto out;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index cc210cc85c6d..2222853ef969 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -47,6 +47,7 @@ struct ppa_addr {
 struct nvm_rq;
 struct nvm_id;
 struct nvm_dev;
+struct nvm_tgt_dev;
 
 typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
 typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
@@ -210,7 +211,6 @@ struct nvm_id {
 
 struct nvm_target {
 	struct list_head list;
-	struct list_head lun_list;
 	struct nvm_tgt_dev *dev;
 	struct nvm_tgt_type *type;
 	struct gendisk *disk;
@@ -231,7 +231,7 @@ typedef void (nvm_end_io_fn)(struct nvm_rq *);
 
 struct nvm_rq {
 	struct nvm_tgt_instance *ins;
-	struct nvm_dev *dev;
+	struct nvm_tgt_dev *dev;
 
 	struct bio *bio;
 
@@ -266,15 +266,6 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
 	return rqdata + 1;
 }
 
-struct nvm_lun {
-	int id;
-
-	int lun_id;
-	int chnl_id;
-
-	struct list_head list;
-};
-
 enum {
 	NVM_BLK_ST_FREE =	0x1,	/* Free block */
 	NVM_BLK_ST_TGT =	0x2,	/* Block in use by target */
@@ -321,6 +312,9 @@ struct nvm_tgt_dev {
 	/* Device information */
 	struct nvm_geo geo;
 
+	/* Base ppas for target LUNs */
+	struct ppa_addr *luns;
+
 	sector_t total_secs;
 
 	struct nvm_id identity;
@@ -330,6 +324,7 @@ struct nvm_tgt_dev {
 	struct nvm_dev_ops *ops;
 
 	void *parent;
+	void *map;
 };
 
 struct nvm_dev {
@@ -363,16 +358,18 @@ struct nvm_dev {
 	char name[DISK_NAME_LEN];
 	void *private_data;
 
+	void *rmap;
+
 	struct mutex mlock;
 	spinlock_t lock;
 };
 
 static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
-							struct ppa_addr r)
+						     u64 pba)
 {
 	struct ppa_addr l;
 	int secs, pgs, blks, luns;
-	sector_t ppa = r.ppa;
+	sector_t ppa = pba;
 
 	l.ppa = 0;
 
@@ -465,8 +462,7 @@ static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
 
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
-				struct list_head *lun_list);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
 typedef void (nvm_tgt_exit_fn)(void *);
 
 struct nvm_tgt_type {
@@ -499,10 +495,11 @@ typedef void (nvmm_unregister_fn)(struct nvm_dev *);
 
 typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
 typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
-typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct ppa_addr *, int);
+typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *);
+typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int);
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
+typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int);
 
 struct nvmm_type {
 	const char *name;
@@ -520,6 +517,8 @@ struct nvmm_type {
 	nvmm_get_area_fn *get_area;
 	nvmm_put_area_fn *put_area;
 
+	nvmm_part_to_tgt_fn *part_to_tgt;
+
 	struct list_head list;
 };
 
@@ -533,14 +532,18 @@ extern void nvm_unregister(struct nvm_dev *);
 extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
 							int nr_ppas, int type);
 
-extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
+extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
 extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
-extern int nvm_erase_blk(struct nvm_dev *, struct ppa_addr *, int);
+extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
+extern int nvm_get_l2p_tbl(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *,
+			   void *);
+extern int nvm_get_area(struct nvm_dev *, sector_t *, sector_t);
+extern void nvm_put_area(struct nvm_dev *, sector_t);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
 								void *, int);
-- 
cgit v1.2.3


From 959e911b31981b52ed3f3d6e351b107bcb9163ef Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:11 +0100
Subject: lightnvm: introduce helpers for generic ops in rrpc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid calling media manager and device-specific operations directly from
rrpc. Create helper functions on lightnvm's core instead.

Signed-off-by: Javier González <javier@cnexlabs.com>

Made it work with null_blk as well.
Signed-off-by: Matias Bjørling <m@bjorling.me>

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c   | 22 ++++++++++++++++++++++
 drivers/lightnvm/gennvm.c |  2 --
 drivers/lightnvm/rrpc.c   | 25 ++++++++++++-------------
 include/linux/lightnvm.h  |  5 +----
 4 files changed, 35 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 23d582f82219..69f261e650bc 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -219,6 +219,28 @@ int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p, int flags)
 }
 EXPORT_SYMBOL(nvm_erase_blk);
 
+int nvm_get_l2p_tbl(struct nvm_dev *dev, u64 slba, u32 nlb,
+		    nvm_l2p_update_fn *update_l2p, void *priv)
+{
+	if (!dev->ops->get_l2p_tbl)
+		return 0;
+
+	return dev->ops->get_l2p_tbl(dev, slba, nlb, update_l2p, priv);
+}
+EXPORT_SYMBOL(nvm_get_l2p_tbl);
+
+int nvm_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
+{
+	return dev->mt->get_area(dev, lba, len);
+}
+EXPORT_SYMBOL(nvm_get_area);
+
+void nvm_put_area(struct nvm_dev *dev, sector_t lba)
+{
+	dev->mt->put_area(dev, lba);
+}
+EXPORT_SYMBOL(nvm_put_area);
+
 void nvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
 	int i;
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 5d7c8c47bef8..befa8281ab3f 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -175,8 +175,6 @@ static struct nvm_tgt_dev *gen_create_tgt_dev(struct nvm_dev *dev,
 	tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1;
 	tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun;
 	tgt_dev->q = dev->q;
-	tgt_dev->ops = dev->ops;
-	tgt_dev->mt = dev->mt;
 	tgt_dev->map = dev_map;
 	tgt_dev->luns = luns;
 	memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 75ed12ae0f47..e4678b749ba5 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -1166,11 +1166,8 @@ static int rrpc_map_init(struct rrpc *rrpc)
 		r->addr = ADDR_EMPTY;
 	}
 
-	if (!dev->ops->get_l2p_tbl)
-		return 0;
-
 	/* Bring up the mapping table from device */
-	ret = dev->ops->get_l2p_tbl(dev->parent, rrpc->soffset, rrpc->nr_sects,
+	ret = nvm_get_l2p_tbl(dev->parent, rrpc->soffset, rrpc->nr_sects,
 					rrpc_l2p_update, rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not read L2P table.\n");
@@ -1258,6 +1255,9 @@ static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun)
 	int i;
 	int ret;
 
+	if (!dev->parent->ops->get_bb_tbl)
+		return 0;
+
 	nr_blks = geo->blks_per_lun * geo->plane_mode;
 	blks = kmalloc(nr_blks, GFP_KERNEL);
 	if (!blks)
@@ -1277,7 +1277,6 @@ static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun)
 	if (nr_blks < 0)
 		return nr_blks;
 
-	rlun->nr_free_blocks = geo->blks_per_lun;
 	for (i = 0; i < nr_blks; i++) {
 		if (blks[i] == NVM_BLK_T_FREE)
 			continue;
@@ -1348,17 +1347,19 @@ static int rrpc_luns_init(struct rrpc *rrpc, struct ppa_addr *luns)
 			list_add_tail(&rblk->list, &rlun->free_list);
 		}
 
-		if (rrpc_bb_discovery(dev, rlun))
-			goto err;
-
+		rlun->rrpc = rrpc;
+		rlun->nr_free_blocks = geo->blks_per_lun;
 		rlun->reserved_blocks = 2; /* for GC only */
 
-		rlun->rrpc = rrpc;
 		INIT_LIST_HEAD(&rlun->prio_list);
 		INIT_LIST_HEAD(&rlun->wblk_list);
 
 		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
 		spin_lock_init(&rlun->lock);
+
+		if (rrpc_bb_discovery(dev, rlun))
+			goto err;
+
 	}
 
 	return 0;
@@ -1370,13 +1371,12 @@ err:
 static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
-	struct nvmm_type *mt = dev->mt;
 	sector_t size = rrpc->nr_sects * dev->geo.sec_size;
 	int ret;
 
 	size >>= 9;
 
-	ret = mt->get_area(dev->parent, begin, size);
+	ret = nvm_get_area(dev->parent, begin, size);
 	if (!ret)
 		*begin >>= (ilog2(dev->geo.sec_size) - 9);
 
@@ -1386,10 +1386,9 @@ static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
 static void rrpc_area_free(struct rrpc *rrpc)
 {
 	struct nvm_tgt_dev *dev = rrpc->dev;
-	struct nvmm_type *mt = dev->mt;
 	sector_t begin = rrpc->soffset << (ilog2(dev->geo.sec_size) - 9);
 
-	mt->put_area(dev->parent, begin);
+	nvm_put_area(dev->parent, begin);
 }
 
 static void rrpc_free(struct rrpc *rrpc)
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 2222853ef969..99cd1e70e451 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -320,10 +320,7 @@ struct nvm_tgt_dev {
 	struct nvm_id identity;
 	struct request_queue *q;
 
-	struct nvmm_type *mt;
-	struct nvm_dev_ops *ops;
-
-	void *parent;
+	struct nvm_dev *parent;
 	void *map;
 };
 
-- 
cgit v1.2.3


From a279006afa3377493c4240395c70430f2a9b0d2b Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:12 +0100
Subject: lightnvm: introduce max_phys_sects helper function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Target devices do not have access to the device driver operations.
Introduce a helper function that exposes the max. number of physical
sectors supported by the underlying device.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c  | 8 ++++++++
 include/linux/lightnvm.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 69f261e650bc..3d8eaac99ba8 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -203,6 +203,14 @@ int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas,
 }
 EXPORT_SYMBOL(nvm_set_bb_tbl);
 
+int nvm_max_phys_sects(struct nvm_tgt_dev *tgt_dev)
+{
+	struct nvm_dev *dev = tgt_dev->parent;
+
+	return dev->ops->max_phys_sect;
+}
+EXPORT_SYMBOL(nvm_max_phys_sects);
+
 int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 99cd1e70e451..96375317b479 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -529,6 +529,7 @@ extern void nvm_unregister(struct nvm_dev *);
 extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
 							int nr_ppas, int type);
 
+extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
 extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
 extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
-- 
cgit v1.2.3


From da2d7cb828ce2714c603827ac5a6e1c98a02e861 Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:13 +0100
Subject: lightnvm: use target nvm on target-specific ops.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On target-specific operations pass on nvm_tgt_dev instead of the generic
nvm device.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c  | 15 ++++++++++-----
 drivers/lightnvm/rrpc.c  | 14 +++++++-------
 drivers/lightnvm/rrpc.h  |  2 +-
 include/linux/lightnvm.h |  6 +++---
 4 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 3d8eaac99ba8..07bf989d2f77 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -86,8 +86,7 @@ void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
 }
 EXPORT_SYMBOL(nvm_dev_dma_alloc);
 
-void nvm_dev_dma_free(struct nvm_dev *dev, void *addr,
-							dma_addr_t dma_handler)
+void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
 {
 	dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
 }
@@ -227,9 +226,11 @@ int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p, int flags)
 }
 EXPORT_SYMBOL(nvm_erase_blk);
 
-int nvm_get_l2p_tbl(struct nvm_dev *dev, u64 slba, u32 nlb,
+int nvm_get_l2p_tbl(struct nvm_tgt_dev *tgt_dev, u64 slba, u32 nlb,
 		    nvm_l2p_update_fn *update_l2p, void *priv)
 {
+	struct nvm_dev *dev = tgt_dev->parent;
+
 	if (!dev->ops->get_l2p_tbl)
 		return 0;
 
@@ -237,14 +238,18 @@ int nvm_get_l2p_tbl(struct nvm_dev *dev, u64 slba, u32 nlb,
 }
 EXPORT_SYMBOL(nvm_get_l2p_tbl);
 
-int nvm_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
+int nvm_get_area(struct nvm_tgt_dev *tgt_dev, sector_t *lba, sector_t len)
 {
+	struct nvm_dev *dev = tgt_dev->parent;
+
 	return dev->mt->get_area(dev, lba, len);
 }
 EXPORT_SYMBOL(nvm_get_area);
 
-void nvm_put_area(struct nvm_dev *dev, sector_t lba)
+void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t lba)
 {
+	struct nvm_dev *dev = tgt_dev->parent;
+
 	dev->mt->put_area(dev, lba);
 }
 EXPORT_SYMBOL(nvm_put_area);
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index e4678b749ba5..8a27bcc62f23 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -983,8 +983,8 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
 		if (!(flags & NVM_IOTYPE_GC)) {
 			rrpc_unlock_rq(rrpc, rqd);
 			if (rqd->nr_ppas > 1)
-				nvm_dev_dma_free(dev->parent,
-					rqd->ppa_list, rqd->dma_ppa_list);
+				nvm_dev_dma_free(dev->parent, rqd->ppa_list,
+							rqd->dma_ppa_list);
 		}
 		return NVM_IO_ERR;
 	}
@@ -1116,7 +1116,7 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 
 		div_u64_rem(pba, rrpc->nr_sects, &mod);
 
-		gaddr = rrpc_recov_addr(dev->parent, pba);
+		gaddr = rrpc_recov_addr(dev, pba);
 		rlun = rrpc_ppa_to_lun(rrpc, gaddr);
 		if (!rlun) {
 			pr_err("rrpc: l2p corruption on lba %llu\n",
@@ -1167,8 +1167,8 @@ static int rrpc_map_init(struct rrpc *rrpc)
 	}
 
 	/* Bring up the mapping table from device */
-	ret = nvm_get_l2p_tbl(dev->parent, rrpc->soffset, rrpc->nr_sects,
-					rrpc_l2p_update, rrpc);
+	ret = nvm_get_l2p_tbl(dev, rrpc->soffset, rrpc->nr_sects,
+							rrpc_l2p_update, rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not read L2P table.\n");
 		return -EINVAL;
@@ -1376,7 +1376,7 @@ static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
 
 	size >>= 9;
 
-	ret = nvm_get_area(dev->parent, begin, size);
+	ret = nvm_get_area(dev, begin, size);
 	if (!ret)
 		*begin >>= (ilog2(dev->geo.sec_size) - 9);
 
@@ -1388,7 +1388,7 @@ static void rrpc_area_free(struct rrpc *rrpc)
 	struct nvm_tgt_dev *dev = rrpc->dev;
 	sector_t begin = rrpc->soffset << (ilog2(dev->geo.sec_size) - 9);
 
-	nvm_put_area(dev->parent, begin);
+	nvm_put_area(dev, begin);
 }
 
 static void rrpc_free(struct rrpc *rrpc)
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index bc8adba4d63f..94e4d73116b2 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -184,7 +184,7 @@ static inline struct ppa_addr rrpc_linear_to_generic_addr(struct nvm_geo *geo,
 	return l;
 }
 
-static inline struct ppa_addr rrpc_recov_addr(struct nvm_dev *dev, u64 pba)
+static inline struct ppa_addr rrpc_recov_addr(struct nvm_tgt_dev *dev, u64 pba)
 {
 	return linear_to_generic_addr(&dev->geo, pba);
 }
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 96375317b479..e76f9c4aa49b 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -538,10 +538,10 @@ extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
 extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
-extern int nvm_get_l2p_tbl(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *,
+extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
 			   void *);
-extern int nvm_get_area(struct nvm_dev *, sector_t *, sector_t);
-extern void nvm_put_area(struct nvm_dev *, sector_t);
+extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
+extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
 								void *, int);
-- 
cgit v1.2.3


From 333ba053d145d6f9152f6b0311a345b876f0fed1 Mon Sep 17 00:00:00 2001
From: Javier González <javier@cnexlabs.com>
Date: Mon, 28 Nov 2016 22:39:14 +0100
Subject: lightnvm: transform target get/set bad block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since targets are given a virtual target device, it is necessary to
translate all communication between targets and the backend device.
Implement the translation layer for get/set bad block table.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c   | 58 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/lightnvm/gennvm.c | 19 +++++++++++-----
 drivers/lightnvm/rrpc.c   |  4 ++--
 include/linux/lightnvm.h  | 15 +++++++++---
 4 files changed, 85 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 07bf989d2f77..7622e3dc5d82 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -175,6 +175,26 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name)
 	return NULL;
 }
 
+static void nvm_tgt_generic_to_addr_mode(struct nvm_tgt_dev *tgt_dev,
+					 struct nvm_rq *rqd)
+{
+	struct nvm_dev *dev = tgt_dev->parent;
+	int i;
+
+	if (rqd->nr_ppas > 1) {
+		for (i = 0; i < rqd->nr_ppas; i++) {
+			rqd->ppa_list[i] = dev->mt->trans_ppa(tgt_dev,
+					rqd->ppa_list[i], TRANS_TGT_TO_DEV);
+			rqd->ppa_list[i] = generic_to_dev_addr(dev,
+							rqd->ppa_list[i]);
+		}
+	} else {
+		rqd->ppa_addr = dev->mt->trans_ppa(tgt_dev, rqd->ppa_addr,
+						TRANS_TGT_TO_DEV);
+		rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
+	}
+}
+
 int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas,
 								int type)
 {
@@ -202,6 +222,34 @@ int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas,
 }
 EXPORT_SYMBOL(nvm_set_bb_tbl);
 
+int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
+		       int nr_ppas, int type)
+{
+	struct nvm_dev *dev = tgt_dev->parent;
+	struct nvm_rq rqd;
+	int ret;
+
+	if (nr_ppas > dev->ops->max_phys_sect) {
+		pr_err("nvm: unable to update all blocks atomically\n");
+		return -EINVAL;
+	}
+
+	memset(&rqd, 0, sizeof(struct nvm_rq));
+
+	nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
+	nvm_tgt_generic_to_addr_mode(tgt_dev, &rqd);
+
+	ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
+	nvm_free_rqd_ppalist(dev, &rqd);
+	if (ret) {
+		pr_err("nvm: sysblk failed bb mark\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(nvm_set_tgt_bb_tbl);
+
 int nvm_max_phys_sects(struct nvm_tgt_dev *tgt_dev)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
@@ -519,6 +567,16 @@ int nvm_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa, u8 *blks)
 }
 EXPORT_SYMBOL(nvm_get_bb_tbl);
 
+int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa,
+		       u8 *blks)
+{
+	struct nvm_dev *dev = tgt_dev->parent;
+
+	ppa = dev->mt->trans_ppa(tgt_dev, ppa, TRANS_TGT_TO_DEV);
+	return nvm_get_bb_tbl(dev, ppa, blks);
+}
+EXPORT_SYMBOL(nvm_get_tgt_bb_tbl);
+
 static int nvm_init_slc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
 {
 	struct nvm_geo *geo = &dev->geo;
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index befa8281ab3f..ca7880082d80 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -482,12 +482,6 @@ static void gen_unregister(struct nvm_dev *dev)
 	module_put(THIS_MODULE);
 }
 
-enum {
-	TRANS_TGT_TO_DEV =	0x0,
-	TRANS_DEV_TO_TGT =	0x1,
-};
-
-
 static int gen_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
 {
 	struct gen_dev_map *dev_map = tgt_dev->map;
@@ -584,6 +578,18 @@ static int gen_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p,
 	return nvm_erase_ppa(tgt_dev->parent, p, 1, flags);
 }
 
+static struct ppa_addr gen_trans_ppa(struct nvm_tgt_dev *tgt_dev,
+				     struct ppa_addr p, int direction)
+{
+	gen_trans_fn *f;
+	struct ppa_addr ppa = p;
+
+	f = (direction == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt;
+	f(tgt_dev, &ppa);
+
+	return ppa;
+}
+
 static void gen_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
 			       int len)
 {
@@ -631,6 +637,7 @@ static struct nvmm_type gen = {
 	.get_area		= gen_get_area,
 	.put_area		= gen_put_area,
 
+	.trans_ppa		= gen_trans_ppa,
 	.part_to_tgt		= gen_part_to_tgt,
 };
 
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 8a27bcc62f23..9fb7de395915 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -735,7 +735,7 @@ static void __rrpc_mark_bad_block(struct rrpc *rrpc, struct ppa_addr ppa)
 	rblk = &rlun->blocks[ppa.g.blk];
 	rblk->state = NVM_BLK_ST_BAD;
 
-	nvm_set_bb_tbl(dev->parent, &ppa, 1, NVM_BLK_T_GRWN_BAD);
+	nvm_set_tgt_bb_tbl(dev, &ppa, 1, NVM_BLK_T_GRWN_BAD);
 }
 
 static void rrpc_mark_bad_block(struct rrpc *rrpc, struct nvm_rq *rqd)
@@ -1267,7 +1267,7 @@ static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun)
 	ppa.g.ch = rlun->bppa.g.ch;
 	ppa.g.lun = rlun->bppa.g.lun;
 
-	ret = nvm_get_bb_tbl(dev->parent, ppa, blks);
+	ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
 	if (ret) {
 		pr_err("rrpc: could not get BB table\n");
 		goto out;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index e76f9c4aa49b..7c273bbc5351 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -496,8 +496,15 @@ typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int);
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
+typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *,
+					    struct ppa_addr, int);
 typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int);
 
+enum {
+	TRANS_TGT_TO_DEV =	0x0,
+	TRANS_DEV_TO_TGT =	0x1,
+};
+
 struct nvmm_type {
 	const char *name;
 	unsigned int version[3];
@@ -514,6 +521,7 @@ struct nvmm_type {
 	nvmm_get_area_fn *get_area;
 	nvmm_put_area_fn *put_area;
 
+	nvmm_trans_ppa_fn *trans_ppa;
 	nvmm_part_to_tgt_fn *part_to_tgt;
 
 	struct list_head list;
@@ -526,9 +534,9 @@ extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
 
-extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
-							int nr_ppas, int type);
-
+extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int);
+extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
+			      int, int);
 extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
 extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
@@ -549,6 +557,7 @@ extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int,
 							int, void *, int);
 extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
 extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *);
+extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
 
 /* sysblk.c */
 #define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */
-- 
cgit v1.2.3


From ced6473e7486702f530a49f886b73195e4977734 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 28 Nov 2016 16:41:41 +0100
Subject: driver core: class: add class_groups support

struct class needs to have a set of default groups that are added, as
adding individual attributes does not work well in the long run.  So add
support for that.

Future patches will convert the existing usages of class_attrs to use
class_groups and then class_attrs will go away.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/class.c   | 15 +++++++++++++++
 include/linux/device.h |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 71059e32bebc..a2b2896693d6 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -163,6 +163,18 @@ static void klist_class_dev_put(struct klist_node *n)
 	put_device(dev);
 }
 
+static int class_add_groups(struct class *cls,
+			    const struct attribute_group **groups)
+{
+	return sysfs_create_groups(&cls->p->subsys.kobj, groups);
+}
+
+static void class_remove_groups(struct class *cls,
+				const struct attribute_group **groups)
+{
+	return sysfs_remove_groups(&cls->p->subsys.kobj, groups);
+}
+
 int __class_register(struct class *cls, struct lock_class_key *key)
 {
 	struct subsys_private *cp;
@@ -203,6 +215,8 @@ int __class_register(struct class *cls, struct lock_class_key *key)
 		kfree(cp);
 		return error;
 	}
+	error = class_add_groups(class_get(cls), cls->class_groups);
+	class_put(cls);
 	error = add_class_attrs(class_get(cls));
 	class_put(cls);
 	return error;
@@ -213,6 +227,7 @@ void class_unregister(struct class *cls)
 {
 	pr_debug("device class '%s': unregistering\n", cls->name);
 	remove_class_attrs(cls);
+	class_remove_groups(cls, cls->class_groups);
 	kset_unregister(&cls->p->subsys);
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 4264caacebb9..4cd8e52033b0 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -362,6 +362,7 @@ int subsys_virtual_register(struct bus_type *subsys,
  * @name:	Name of the class.
  * @owner:	The module owner.
  * @class_attrs: Default attributes of this class.
+ * @class_groups: Default attributes of this class.
  * @dev_groups:	Default attributes of the devices that belong to the class.
  * @dev_kobj:	The kobject that represents this class and links it into the hierarchy.
  * @dev_uevent:	Called when a device is added, removed from this class, or a
@@ -390,6 +391,7 @@ struct class {
 	struct module		*owner;
 
 	struct class_attribute		*class_attrs;
+	const struct attribute_group	**class_groups;
 	const struct attribute_group	**dev_groups;
 	struct kobject			*dev_kobj;
 
-- 
cgit v1.2.3


From 1d7f1589d341344c0c598b00de44891a7968c6a0 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Tue, 22 Nov 2016 18:22:09 +0000
Subject: fpga: Clarify how write_init works streaming modes

This interface was designed for streaming, but write_init's buf
argument has an unclear purpose. Define it to be the first bytes
of the bitstream. Each driver gets to set how many bytes (at most)
it wants to see. Short bitstreams will be passed through as-is, while
long ones will be truncated.

The intent is to allow drivers to peek at the header before the transfer
actually starts.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: Alan Tull <atull@opensource.altera.com>
---
 Documentation/fpga/fpga-mgr.txt | 5 ++++-
 drivers/fpga/fpga-mgr.c         | 6 ++++--
 drivers/fpga/socfpga-a10.c      | 1 +
 include/linux/fpga/fpga-mgr.h   | 2 ++
 4 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/fpga/fpga-mgr.txt b/Documentation/fpga/fpga-mgr.txt
index 087924f2b20c..86ee5078fd03 100644
--- a/Documentation/fpga/fpga-mgr.txt
+++ b/Documentation/fpga/fpga-mgr.txt
@@ -169,7 +169,10 @@ The programming sequence is:
  2. .write (may be called once or multiple times)
  3. .write_complete
 
-The .write_init function will prepare the FPGA to receive the image data.
+The .write_init function will prepare the FPGA to receive the image data.  The
+buffer passed into .write_init will be atmost .initial_header_size bytes long,
+if the whole bitstream is not immediately available then the core code will
+buffer up at least this much before starting.
 
 The .write function writes a buffer to the FPGA. The buffer may be contain the
 whole FPGA image or may be a smaller chunk of an FPGA image.  In the latter
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index 79ce2eea44db..f0a69d3e60a5 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -53,10 +53,12 @@ int fpga_mgr_buf_load(struct fpga_manager *mgr, struct fpga_image_info *info,
 	/*
 	 * Call the low level driver's write_init function.  This will do the
 	 * device-specific things to get the FPGA into the state where it is
-	 * ready to receive an FPGA image.
+	 * ready to receive an FPGA image. The low level driver only gets to
+	 * see the first initial_header_size bytes in the buffer.
 	 */
 	mgr->state = FPGA_MGR_STATE_WRITE_INIT;
-	ret = mgr->mops->write_init(mgr, info, buf, count);
+	ret = mgr->mops->write_init(mgr, info, buf,
+				    min(mgr->mops->initial_header_size, count));
 	if (ret) {
 		dev_err(dev, "Error preparing FPGA for writing\n");
 		mgr->state = FPGA_MGR_STATE_WRITE_INIT_ERR;
diff --git a/drivers/fpga/socfpga-a10.c b/drivers/fpga/socfpga-a10.c
index ccd9fb23bd52..f8770af0f6b5 100644
--- a/drivers/fpga/socfpga-a10.c
+++ b/drivers/fpga/socfpga-a10.c
@@ -470,6 +470,7 @@ static enum fpga_mgr_states socfpga_a10_fpga_state(struct fpga_manager *mgr)
 }
 
 static const struct fpga_manager_ops socfpga_a10_fpga_mgr_ops = {
+	.initial_header_size = (RBF_DECOMPRESS_OFFSET + 1) * 4,
 	.state = socfpga_a10_fpga_state,
 	.write_init = socfpga_a10_fpga_write_init,
 	.write = socfpga_a10_fpga_write,
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 96a1a3311649..16551d5eac36 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -84,6 +84,7 @@ struct fpga_image_info {
 
 /**
  * struct fpga_manager_ops - ops for low level fpga manager drivers
+ * @initial_header_size: Maximum number of bytes that should be passed into write_init
  * @state: returns an enum value of the FPGA's state
  * @write_init: prepare the FPGA to receive confuration data
  * @write: write count bytes of configuration data to the FPGA
@@ -95,6 +96,7 @@ struct fpga_image_info {
  * called, so leaving them out is fine.
  */
 struct fpga_manager_ops {
+	size_t initial_header_size;
 	enum fpga_mgr_states (*state)(struct fpga_manager *mgr);
 	int (*write_init)(struct fpga_manager *mgr,
 			  struct fpga_image_info *info,
-- 
cgit v1.2.3


From d853d145ea3e63387a2ac759aa41d5e43876e561 Mon Sep 17 00:00:00 2001
From: jbrunet <jbrunet@baylibre.com>
Date: Mon, 28 Nov 2016 10:46:46 +0100
Subject: net: phy: add an option to disable EEE advertisement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds an option to disable EEE advertisement in the generic PHY
by providing a mask of prohibited modes corresponding to the value found in
the MDIO_AN_EEE_ADV register.

On some platforms, PHY Low power idle seems to be causing issues, even
breaking the link some cases. The patch provides a convenient way for these
platforms to disable EEE advertisement and work around the issue.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Tested-by: Yegor Yefremov <yegorslists@googlemail.com>
Tested-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        |  3 ++
 drivers/net/phy/phy_device.c | 80 +++++++++++++++++++++++++++++++++++++++-----
 include/linux/phy.h          |  3 ++
 3 files changed, 77 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 73adbaa9ac86..a3981cc6448a 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -1396,6 +1396,9 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
 {
 	int val = ethtool_adv_to_mmd_eee_adv_t(data->advertised);
 
+	/* Mask prohibited EEE modes */
+	val &= ~phydev->eee_broken_modes;
+
 	phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, val);
 
 	return 0;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index ba86c191a13e..83e52f1b80f2 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1120,6 +1120,43 @@ static int genphy_config_advert(struct phy_device *phydev)
 	return changed;
 }
 
+/**
+ * genphy_config_eee_advert - disable unwanted eee mode advertisement
+ * @phydev: target phy_device struct
+ *
+ * Description: Writes MDIO_AN_EEE_ADV after disabling unsupported energy
+ *   efficent ethernet modes. Returns 0 if the PHY's advertisement hasn't
+ *   changed, and 1 if it has changed.
+ */
+static int genphy_config_eee_advert(struct phy_device *phydev)
+{
+	u32 broken = phydev->eee_broken_modes;
+	u32 old_adv, adv;
+
+	/* Nothing to disable */
+	if (!broken)
+		return 0;
+
+	/* If the following call fails, we assume that EEE is not
+	 * supported by the phy. If we read 0, EEE is not advertised
+	 * In both case, we don't need to continue
+	 */
+	adv = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN);
+	if (adv <= 0)
+		return 0;
+
+	old_adv = adv;
+	adv &= ~broken;
+
+	/* Advertising remains unchanged with the broken mask */
+	if (old_adv == adv)
+		return 0;
+
+	phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, adv);
+
+	return 1;
+}
+
 /**
  * genphy_setup_forced - configures/forces speed/duplex from @phydev
  * @phydev: target phy_device struct
@@ -1178,15 +1215,20 @@ EXPORT_SYMBOL(genphy_restart_aneg);
  */
 int genphy_config_aneg(struct phy_device *phydev)
 {
-	int result;
+	int err, changed;
+
+	changed = genphy_config_eee_advert(phydev);
 
 	if (AUTONEG_ENABLE != phydev->autoneg)
 		return genphy_setup_forced(phydev);
 
-	result = genphy_config_advert(phydev);
-	if (result < 0) /* error */
-		return result;
-	if (result == 0) {
+	err = genphy_config_advert(phydev);
+	if (err < 0) /* error */
+		return err;
+
+	changed |= err;
+
+	if (changed == 0) {
 		/* Advertisement hasn't changed, but maybe aneg was never on to
 		 * begin with?  Or maybe phy was isolated?
 		 */
@@ -1196,16 +1238,16 @@ int genphy_config_aneg(struct phy_device *phydev)
 			return ctl;
 
 		if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
-			result = 1; /* do restart aneg */
+			changed = 1; /* do restart aneg */
 	}
 
 	/* Only restart aneg if we are advertising something different
 	 * than we were before.
 	 */
-	if (result > 0)
-		result = genphy_restart_aneg(phydev);
+	if (changed > 0)
+		return genphy_restart_aneg(phydev);
 
-	return result;
+	return 0;
 }
 EXPORT_SYMBOL(genphy_config_aneg);
 
@@ -1563,6 +1605,21 @@ static void of_set_phy_supported(struct phy_device *phydev)
 		__set_phy_supported(phydev, max_speed);
 }
 
+static void of_set_phy_eee_broken(struct phy_device *phydev)
+{
+	struct device_node *node = phydev->mdio.dev.of_node;
+	u32 broken;
+
+	if (!IS_ENABLED(CONFIG_OF_MDIO))
+		return;
+
+	if (!node)
+		return;
+
+	if (!of_property_read_u32(node, "eee-broken-modes", &broken))
+		phydev->eee_broken_modes = broken;
+}
+
 /**
  * phy_probe - probe and init a PHY device
  * @dev: device to probe and init
@@ -1600,6 +1657,11 @@ static int phy_probe(struct device *dev)
 	of_set_phy_supported(phydev);
 	phydev->advertising = phydev->supported;
 
+	/* Get the EEE modes we want to prohibit. We will ask
+	 * the PHY stop advertising these mode later on
+	 */
+	of_set_phy_eee_broken(phydev);
+
 	/* Set the state to READY by default */
 	phydev->state = PHY_READY;
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index edde28ce163a..b53177fd38af 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -417,6 +417,9 @@ struct phy_device {
 	u32 advertising;
 	u32 lp_advertising;
 
+	/* Energy efficient ethernet modes which should be prohibited */
+	u32 eee_broken_modes;
+
 	int autoneg;
 
 	int link_timeout;
-- 
cgit v1.2.3


From 3816199506c7826983096fc65ed46f2733a47bb8 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Mon, 31 Oct 2016 11:59:24 -0600
Subject: block: add bio_iov_iter_get_pages()

This is a helper that pins down a range from an iov_iter and adds it to
a bio without requiring a separate memory allocation for the page array.
It will be used for upcoming direct I/O implementations for block devices
and iomap based file systems.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
[hch: ported to the iov_iter interface, renamed and added comments.
      All blame should be directed to me and all fame should go to Kent
      after this!]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>

(cherry picked from commit 9cd56d916aa481ce8f56d9c5302a6ed90c2e0b5f)
---
 block/bio.c         | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/bio.h |  1 +
 2 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index db85c5753a76..2cf6ebabc68c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -847,6 +847,55 @@ done:
 }
 EXPORT_SYMBOL(bio_add_page);
 
+/**
+ * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * @bio: bio to add pages to
+ * @iter: iov iterator describing the region to be mapped
+ *
+ * Pins as many pages from *iter and appends them to @bio's bvec array. The
+ * pages will have to be released using put_page() when done.
+ */
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
+	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
+	struct page **pages = (struct page **)bv;
+	size_t offset, diff;
+	ssize_t size;
+
+	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (unlikely(size <= 0))
+		return size ? size : -EFAULT;
+	nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
+
+	/*
+	 * Deep magic below:  We need to walk the pinned pages backwards
+	 * because we are abusing the space allocated for the bio_vecs
+	 * for the page array.  Because the bio_vecs are larger than the
+	 * page pointers by definition this will always work.  But it also
+	 * means we can't use bio_add_page, so any changes to it's semantics
+	 * need to be reflected here as well.
+	 */
+	bio->bi_iter.bi_size += size;
+	bio->bi_vcnt += nr_pages;
+
+	diff = (nr_pages * PAGE_SIZE - offset) - size;
+	while (nr_pages--) {
+		bv[nr_pages].bv_page = pages[nr_pages];
+		bv[nr_pages].bv_len = PAGE_SIZE;
+		bv[nr_pages].bv_offset = 0;
+	}
+
+	bv[0].bv_offset += offset;
+	bv[0].bv_len -= offset;
+	if (diff)
+		bv[bio->bi_vcnt - 1].bv_len -= diff;
+
+	iov_iter_advance(iter, size);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
+
 struct submit_bio_ret {
 	struct completion event;
 	int error;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 97cb48f03dc7..66228c28c621 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -430,6 +430,7 @@ void bio_chain(struct bio *, struct bio *);
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
 				    const struct iov_iter *, gfp_t);
-- 
cgit v1.2.3


From 54cd255808761ecfd0e000eb78eb74dde8cd0c96 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 29 Nov 2016 16:23:41 +0200
Subject: dmaengine: dma_slave_config: add support for slave port window

Some slave devices uses address window instead of single register for read
and/or write of data. With the src/dst_port_window_size the address window
can be specified and the DMAengine driver should use this information to
correctly set up the transfer to loop within the provided window.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index cc535a478bae..feee6ec6a13b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -336,6 +336,12 @@ enum dma_slave_buswidth {
  * may or may not be applicable on memory sources.
  * @dst_maxburst: same as src_maxburst but for destination target
  * mutatis mutandis.
+ * @src_port_window_size: The length of the register area in words the data need
+ * to be accessed on the device side. It is only used for devices which is using
+ * an area instead of a single register to receive the data. Typically the DMA
+ * loops in this area in order to transfer the data.
+ * @dst_port_window_size: same as src_port_window_size but for the destination
+ * port.
  * @device_fc: Flow Controller Settings. Only valid for slave channels. Fill
  * with 'true' if peripheral should be flow controller. Direction will be
  * selected at Runtime.
@@ -363,6 +369,8 @@ struct dma_slave_config {
 	enum dma_slave_buswidth dst_addr_width;
 	u32 src_maxburst;
 	u32 dst_maxburst;
+	u32 src_port_window_size;
+	u32 dst_port_window_size;
 	bool device_fc;
 	unsigned int slave_id;
 };
-- 
cgit v1.2.3


From bd2c6636cc59c4626a04d9918953a107f88eaff9 Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Fri, 25 Nov 2016 17:59:07 +0300
Subject: dmaengine: DW DMAC: add multi-block property to device tree

Several versions of DW DMAC have multi block transfers hardware
support. Hardware support of multi block transfers is disabled
by default if we use DT to configure DMAC and software emulation
of multi block transfers used instead.
Add multi-block property, so it is possible to enable hardware
multi block transfers (if present) via DT.

Switch from per device is_nollp variable to multi_block array
to be able enable/disable multi block transfers separately per
channel.

Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 Documentation/devicetree/bindings/dma/snps-dma.txt |  2 ++
 arch/arc/boot/dts/abilis_tb10x.dtsi                |  1 +
 arch/arm/boot/dts/spear13xx.dtsi                   |  2 ++
 drivers/dma/dw/core.c                              |  2 +-
 drivers/dma/dw/platform.c                          | 12 +++++++++++-
 drivers/dma/dw/regs.h                              |  3 ++-
 drivers/tty/serial/8250/8250_lpss.c                |  2 +-
 include/linux/platform_data/dma-dw.h               |  5 +++--
 8 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
index 0f5583293c9c..4775c66f4508 100644
--- a/Documentation/devicetree/bindings/dma/snps-dma.txt
+++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
@@ -27,6 +27,8 @@ Optional properties:
   that services interrupts for this device
 - is_private: The device channels should be marked as private and not for by the
   general purpose DMA channel allocator. False if not passed.
+- multi-block: Multi block transfers supported by hardware. Array property with
+  one cell per channel. 0: not supported, 1 (default): supported.
 
 Example:
 
diff --git a/arch/arc/boot/dts/abilis_tb10x.dtsi b/arch/arc/boot/dts/abilis_tb10x.dtsi
index de53f5c3251c..3121536b25a3 100644
--- a/arch/arc/boot/dts/abilis_tb10x.dtsi
+++ b/arch/arc/boot/dts/abilis_tb10x.dtsi
@@ -129,6 +129,7 @@
 			data-width = <4>;
 			clocks = <&ahb_clk>;
 			clock-names = "hclk";
+			multi-block = <1 1 1 1 1 1>;
 		};
 
 		i2c0: i2c@FF120000 {
diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index 449acf0d8272..17ea0abcdbd7 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -118,6 +118,7 @@
 			block_size = <0xfff>;
 			dma-masters = <2>;
 			data-width = <8 8>;
+			multi-block = <1 1 1 1 1 1 1 1>;
 		};
 
 		dma@eb000000 {
@@ -134,6 +135,7 @@
 			chan_priority = <1>;
 			block_size = <0xfff>;
 			data-width = <8 8>;
+			multi-block = <1 1 1 1 1 1 1 1>;
 		};
 
 		fsmc: flash@b0000000 {
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index c2c0a613cb7a..e5adf5d1c34f 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -1569,7 +1569,7 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 				(dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
 		} else {
 			dwc->block_size = pdata->block_size;
-			dwc->nollp = pdata->is_nollp;
+			dwc->nollp = !pdata->multi_block[i];
 		}
 	}
 
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index aa7a5c1b9bf8..b1655e40cfa2 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -102,7 +102,7 @@ dw_dma_parse_dt(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct dw_dma_platform_data *pdata;
-	u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
+	u32 tmp, arr[DW_DMA_MAX_NR_MASTERS], mb[DW_DMA_MAX_NR_CHANNELS];
 	u32 nr_masters;
 	u32 nr_channels;
 
@@ -118,6 +118,8 @@ dw_dma_parse_dt(struct platform_device *pdev)
 
 	if (of_property_read_u32(np, "dma-channels", &nr_channels))
 		return NULL;
+	if (nr_channels > DW_DMA_MAX_NR_CHANNELS)
+		return NULL;
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -152,6 +154,14 @@ dw_dma_parse_dt(struct platform_device *pdev)
 			pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
 	}
 
+	if (!of_property_read_u32_array(np, "multi-block", mb, nr_channels)) {
+		for (tmp = 0; tmp < nr_channels; tmp++)
+			pdata->multi_block[tmp] = mb[tmp];
+	} else {
+		for (tmp = 0; tmp < nr_channels; tmp++)
+			pdata->multi_block[tmp] = 1;
+	}
+
 	return pdata;
 }
 #else
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index f65dd104479f..4e0128c62704 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -12,7 +12,8 @@
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 
-#define DW_DMA_MAX_NR_CHANNELS	8
+#include "internal.h"
+
 #define DW_DMA_MAX_NR_REQUESTS	16
 
 /* flow controller */
diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c
index 886fcf37f291..c4593ec68ff7 100644
--- a/drivers/tty/serial/8250/8250_lpss.c
+++ b/drivers/tty/serial/8250/8250_lpss.c
@@ -157,12 +157,12 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
 static const struct dw_dma_platform_data qrk_serial_dma_pdata = {
 	.nr_channels = 2,
 	.is_private = true,
-	.is_nollp = true,
 	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
 	.chan_priority = CHAN_PRIORITY_ASCENDING,
 	.block_size = 4095,
 	.nr_masters = 1,
 	.data_width = {4},
+	.multi_block = {0},
 };
 
 static void qrk_serial_setup_dma(struct lpss8250 *lpss, struct uart_port *port)
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 5f0e11e7354c..e69e415d0d98 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -14,6 +14,7 @@
 #include <linux/device.h>
 
 #define DW_DMA_MAX_NR_MASTERS	4
+#define DW_DMA_MAX_NR_CHANNELS	8
 
 /**
  * struct dw_dma_slave - Controller-specific information about a slave
@@ -40,19 +41,18 @@ struct dw_dma_slave {
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
  * @is_memcpy: The device channels do support memory-to-memory transfers.
- * @is_nollp: The device channels does not support multi block transfers.
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
  * @nr_masters: Number of AHB masters supported by the controller
  * @data_width: Maximum data width supported by hardware per AHB master
  *		(in bytes, power of 2)
+ * @multi_block: Multi block transfers supported by hardware per channel.
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
 	bool		is_private;
 	bool		is_memcpy;
-	bool		is_nollp;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
 	unsigned char	chan_allocation_order;
@@ -62,6 +62,7 @@ struct dw_dma_platform_data {
 	unsigned int	block_size;
 	unsigned char	nr_masters;
 	unsigned char	data_width[DW_DMA_MAX_NR_MASTERS];
+	unsigned char	multi_block[DW_DMA_MAX_NR_CHANNELS];
 };
 
 #endif /* _PLATFORM_DATA_DMA_DW_H */
-- 
cgit v1.2.3


From f8319483f57f1ca22370f4150bb990aca7728a67 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 30 Nov 2016 14:32:25 +1100
Subject: locking/lockdep: Provide a type check for lock_is_held

Christoph requested lockdep_assert_held() variants that distinguish
between held-for-read or held-for-write.

Provide:

  int lock_is_held_type(struct lockdep_map *lock, int read)

which takes the same argument as lock_acquire(.read) and matches it to
the held_lock instance.

Use of this function should be gated by the debug_locks variable. When
that is 0 the return value of the lock_is_held_type() function is
undefined. This is done to allow both negative and positive tests for
holding locks.

By default we provide (positive) lockdep_assert_held{,_exclusive,_read}()
macros.

Requested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 include/linux/lockdep.h  | 25 +++++++++++++++++++++++--
 kernel/locking/lockdep.c | 20 ++++++++++++--------
 2 files changed, 35 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index c1458fede1f9..1e327bb80838 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -338,9 +338,18 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 extern void lock_release(struct lockdep_map *lock, int nested,
 			 unsigned long ip);
 
-#define lockdep_is_held(lock)	lock_is_held(&(lock)->dep_map)
+/*
+ * Same "read" as for lock_acquire(), except -1 means any.
+ */
+extern int lock_is_held_type(struct lockdep_map *lock, int read);
+
+static inline int lock_is_held(struct lockdep_map *lock)
+{
+	return lock_is_held_type(lock, -1);
+}
 
-extern int lock_is_held(struct lockdep_map *lock);
+#define lockdep_is_held(lock)		lock_is_held(&(lock)->dep_map)
+#define lockdep_is_held_type(lock, r)	lock_is_held_type(&(lock)->dep_map, (r))
 
 extern void lock_set_class(struct lockdep_map *lock, const char *name,
 			   struct lock_class_key *key, unsigned int subclass,
@@ -372,6 +381,14 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
 		WARN_ON(debug_locks && !lockdep_is_held(l));	\
 	} while (0)
 
+#define lockdep_assert_held_exclusive(l)	do {			\
+		WARN_ON(debug_locks && !lockdep_is_held_type(l, 0));	\
+	} while (0)
+
+#define lockdep_assert_held_read(l)	do {				\
+		WARN_ON(debug_locks && !lockdep_is_held_type(l, 1));	\
+	} while (0)
+
 #define lockdep_assert_held_once(l)	do {				\
 		WARN_ON_ONCE(debug_locks && !lockdep_is_held(l));	\
 	} while (0)
@@ -428,7 +445,11 @@ struct lock_class_key { };
 
 #define lockdep_depth(tsk)	(0)
 
+#define lockdep_is_held_type(l, r)		(1)
+
 #define lockdep_assert_held(l)			do { (void)(l); } while (0)
+#define lockdep_assert_held_exclusive(l)	do { (void)(l); } while (0)
+#define lockdep_assert_held_read(l)		do { (void)(l); } while (0)
 #define lockdep_assert_held_once(l)		do { (void)(l); } while (0)
 
 #define lockdep_recursing(tsk)			(0)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 589d763a49b3..cff580a6edf9 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3188,7 +3188,7 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
 	return 0;
 }
 
-static int __lock_is_held(struct lockdep_map *lock);
+static int __lock_is_held(struct lockdep_map *lock, int read);
 
 /*
  * This gets called for every mutex_lock*()/spin_lock*() operation.
@@ -3329,7 +3329,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	}
 	chain_key = iterate_chain_key(chain_key, class_idx);
 
-	if (nest_lock && !__lock_is_held(nest_lock))
+	if (nest_lock && !__lock_is_held(nest_lock, -1))
 		return print_lock_nested_lock_not_held(curr, hlock, ip);
 
 	if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
@@ -3576,7 +3576,7 @@ found_it:
 	return 1;
 }
 
-static int __lock_is_held(struct lockdep_map *lock)
+static int __lock_is_held(struct lockdep_map *lock, int read)
 {
 	struct task_struct *curr = current;
 	int i;
@@ -3584,8 +3584,12 @@ static int __lock_is_held(struct lockdep_map *lock)
 	for (i = 0; i < curr->lockdep_depth; i++) {
 		struct held_lock *hlock = curr->held_locks + i;
 
-		if (match_held_lock(hlock, lock))
-			return 1;
+		if (match_held_lock(hlock, lock)) {
+			if (read == -1 || hlock->read == read)
+				return 1;
+
+			return 0;
+		}
 	}
 
 	return 0;
@@ -3769,7 +3773,7 @@ void lock_release(struct lockdep_map *lock, int nested,
 }
 EXPORT_SYMBOL_GPL(lock_release);
 
-int lock_is_held(struct lockdep_map *lock)
+int lock_is_held_type(struct lockdep_map *lock, int read)
 {
 	unsigned long flags;
 	int ret = 0;
@@ -3781,13 +3785,13 @@ int lock_is_held(struct lockdep_map *lock)
 	check_flags(flags);
 
 	current->lockdep_recursion = 1;
-	ret = __lock_is_held(lock);
+	ret = __lock_is_held(lock, read);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(lock_is_held);
+EXPORT_SYMBOL_GPL(lock_is_held_type);
 
 struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
 {
-- 
cgit v1.2.3


From ff6a9292e6f633d596826be5ba70d3ef90cc3300 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 30 Nov 2016 14:36:01 +1100
Subject: iomap: implement direct I/O

This adds a full fledget direct I/O implementation using the iomap
interface. Full fledged in this case means all features are supported:
AIO, vectored I/O, any iov_iter type including kernel pointers, bvecs
and pipes, support for hole filling and async apending writes.  It does
not mean supporting all the warts of the old generic code.  We expect
i_rwsem to be held over the duration of the call, and we expect to
maintain i_dio_count ourselves, and we pass on any kinds of mapping
to the file system for now.

The algorithm used is very simple: We use iomap_apply to iterate over
the range of the I/O, and then we use the new bio_iov_iter_get_pages
helper to lock down the user range for the size of the extent.
bio_iov_iter_get_pages can currently lock down twice as many pages as
the old direct I/O code did, which means that we will have a better
batch factor for everything but overwrites of badly fragmented files.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>
Tested-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/iomap.c            | 373 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iomap.h |  11 ++
 2 files changed, 384 insertions(+)

(limited to 'include/linux')

diff --git a/fs/iomap.c b/fs/iomap.c
index 13dd413b2b9c..fc2446242935 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -24,6 +24,7 @@
 #include <linux/uio.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/dax.h>
 #include "internal.h"
 
@@ -584,3 +585,375 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(iomap_fiemap);
+
+/*
+ * Private flags for iomap_dio, must not overlap with the public ones in
+ * iomap.h:
+ */
+#define IOMAP_DIO_WRITE		(1 << 30)
+#define IOMAP_DIO_DIRTY		(1 << 31)
+
+struct iomap_dio {
+	struct kiocb		*iocb;
+	iomap_dio_end_io_t	*end_io;
+	loff_t			i_size;
+	loff_t			size;
+	atomic_t		ref;
+	unsigned		flags;
+	int			error;
+
+	union {
+		/* used during submission and for synchronous completion: */
+		struct {
+			struct iov_iter		*iter;
+			struct task_struct	*waiter;
+			struct request_queue	*last_queue;
+			blk_qc_t		cookie;
+		} submit;
+
+		/* used for aio completion: */
+		struct {
+			struct work_struct	work;
+		} aio;
+	};
+};
+
+static ssize_t iomap_dio_complete(struct iomap_dio *dio)
+{
+	struct kiocb *iocb = dio->iocb;
+	ssize_t ret;
+
+	if (dio->end_io) {
+		ret = dio->end_io(iocb,
+				dio->error ? dio->error : dio->size,
+				dio->flags);
+	} else {
+		ret = dio->error;
+	}
+
+	if (likely(!ret)) {
+		ret = dio->size;
+		/* check for short read */
+		if (iocb->ki_pos + ret > dio->i_size &&
+		    !(dio->flags & IOMAP_DIO_WRITE))
+			ret = dio->i_size - iocb->ki_pos;
+		iocb->ki_pos += ret;
+	}
+
+	inode_dio_end(file_inode(iocb->ki_filp));
+	kfree(dio);
+
+	return ret;
+}
+
+static void iomap_dio_complete_work(struct work_struct *work)
+{
+	struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
+	struct kiocb *iocb = dio->iocb;
+	bool is_write = (dio->flags & IOMAP_DIO_WRITE);
+	ssize_t ret;
+
+	ret = iomap_dio_complete(dio);
+	if (is_write && ret > 0)
+		ret = generic_write_sync(iocb, ret);
+	iocb->ki_complete(iocb, ret, 0);
+}
+
+/*
+ * Set an error in the dio if none is set yet.  We have to use cmpxchg
+ * as the submission context and the completion context(s) can race to
+ * update the error.
+ */
+static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret)
+{
+	cmpxchg(&dio->error, 0, ret);
+}
+
+static void iomap_dio_bio_end_io(struct bio *bio)
+{
+	struct iomap_dio *dio = bio->bi_private;
+	bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
+
+	if (bio->bi_error)
+		iomap_dio_set_error(dio, bio->bi_error);
+
+	if (atomic_dec_and_test(&dio->ref)) {
+		if (is_sync_kiocb(dio->iocb)) {
+			struct task_struct *waiter = dio->submit.waiter;
+
+			WRITE_ONCE(dio->submit.waiter, NULL);
+			wake_up_process(waiter);
+		} else if (dio->flags & IOMAP_DIO_WRITE) {
+			struct inode *inode = file_inode(dio->iocb->ki_filp);
+
+			INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
+			queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
+		} else {
+			iomap_dio_complete_work(&dio->aio.work);
+		}
+	}
+
+	if (should_dirty) {
+		bio_check_pages_dirty(bio);
+	} else {
+		struct bio_vec *bvec;
+		int i;
+
+		bio_for_each_segment_all(bvec, bio, i)
+			put_page(bvec->bv_page);
+		bio_put(bio);
+	}
+}
+
+static blk_qc_t
+iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
+		unsigned len)
+{
+	struct page *page = ZERO_PAGE(0);
+	struct bio *bio;
+
+	bio = bio_alloc(GFP_KERNEL, 1);
+	bio->bi_bdev = iomap->bdev;
+	bio->bi_iter.bi_sector =
+		iomap->blkno + ((pos - iomap->offset) >> 9);
+	bio->bi_private = dio;
+	bio->bi_end_io = iomap_dio_bio_end_io;
+
+	get_page(page);
+	if (bio_add_page(bio, page, len, 0) != len)
+		BUG();
+	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT);
+
+	atomic_inc(&dio->ref);
+	return submit_bio(bio);
+}
+
+static loff_t
+iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
+		void *data, struct iomap *iomap)
+{
+	struct iomap_dio *dio = data;
+	unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
+	unsigned fs_block_size = (1 << inode->i_blkbits), pad;
+	unsigned align = iov_iter_alignment(dio->submit.iter);
+	struct iov_iter iter;
+	struct bio *bio;
+	bool need_zeroout = false;
+	int nr_pages, ret;
+
+	if ((pos | length | align) & ((1 << blkbits) - 1))
+		return -EINVAL;
+
+	switch (iomap->type) {
+	case IOMAP_HOLE:
+		if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
+			return -EIO;
+		/*FALLTHRU*/
+	case IOMAP_UNWRITTEN:
+		if (!(dio->flags & IOMAP_DIO_WRITE)) {
+			iov_iter_zero(length, dio->submit.iter);
+			dio->size += length;
+			return length;
+		}
+		dio->flags |= IOMAP_DIO_UNWRITTEN;
+		need_zeroout = true;
+		break;
+	case IOMAP_MAPPED:
+		if (iomap->flags & IOMAP_F_SHARED)
+			dio->flags |= IOMAP_DIO_COW;
+		if (iomap->flags & IOMAP_F_NEW)
+			need_zeroout = true;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EIO;
+	}
+
+	/*
+	 * Operate on a partial iter trimmed to the extent we were called for.
+	 * We'll update the iter in the dio once we're done with this extent.
+	 */
+	iter = *dio->submit.iter;
+	iov_iter_truncate(&iter, length);
+
+	nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
+	if (nr_pages <= 0)
+		return nr_pages;
+
+	if (need_zeroout) {
+		/* zero out from the start of the block to the write offset */
+		pad = pos & (fs_block_size - 1);
+		if (pad)
+			iomap_dio_zero(dio, iomap, pos - pad, pad);
+	}
+
+	do {
+		if (dio->error)
+			return 0;
+
+		bio = bio_alloc(GFP_KERNEL, nr_pages);
+		bio->bi_bdev = iomap->bdev;
+		bio->bi_iter.bi_sector =
+			iomap->blkno + ((pos - iomap->offset) >> 9);
+		bio->bi_private = dio;
+		bio->bi_end_io = iomap_dio_bio_end_io;
+
+		ret = bio_iov_iter_get_pages(bio, &iter);
+		if (unlikely(ret)) {
+			bio_put(bio);
+			return ret;
+		}
+
+		if (dio->flags & IOMAP_DIO_WRITE) {
+			bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT);
+			task_io_account_write(bio->bi_iter.bi_size);
+		} else {
+			bio_set_op_attrs(bio, REQ_OP_READ, 0);
+			if (dio->flags & IOMAP_DIO_DIRTY)
+				bio_set_pages_dirty(bio);
+		}
+
+		dio->size += bio->bi_iter.bi_size;
+		pos += bio->bi_iter.bi_size;
+
+		nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
+
+		atomic_inc(&dio->ref);
+
+		dio->submit.last_queue = bdev_get_queue(iomap->bdev);
+		dio->submit.cookie = submit_bio(bio);
+	} while (nr_pages);
+
+	if (need_zeroout) {
+		/* zero out from the end of the write to the end of the block */
+		pad = pos & (fs_block_size - 1);
+		if (pad)
+			iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
+	}
+
+	iov_iter_advance(dio->submit.iter, length);
+	return length;
+}
+
+ssize_t
+iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops,
+		iomap_dio_end_io_t end_io)
+{
+	struct address_space *mapping = iocb->ki_filp->f_mapping;
+	struct inode *inode = file_inode(iocb->ki_filp);
+	size_t count = iov_iter_count(iter);
+	loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0;
+	unsigned int flags = IOMAP_DIRECT;
+	struct blk_plug plug;
+	struct iomap_dio *dio;
+
+	lockdep_assert_held(&inode->i_rwsem);
+
+	if (!count)
+		return 0;
+
+	dio = kmalloc(sizeof(*dio), GFP_KERNEL);
+	if (!dio)
+		return -ENOMEM;
+
+	dio->iocb = iocb;
+	atomic_set(&dio->ref, 1);
+	dio->size = 0;
+	dio->i_size = i_size_read(inode);
+	dio->end_io = end_io;
+	dio->error = 0;
+	dio->flags = 0;
+
+	dio->submit.iter = iter;
+	if (is_sync_kiocb(iocb)) {
+		dio->submit.waiter = current;
+		dio->submit.cookie = BLK_QC_T_NONE;
+		dio->submit.last_queue = NULL;
+	}
+
+	if (iov_iter_rw(iter) == READ) {
+		if (pos >= dio->i_size)
+			goto out_free_dio;
+
+		if (iter->type == ITER_IOVEC)
+			dio->flags |= IOMAP_DIO_DIRTY;
+	} else {
+		dio->flags |= IOMAP_DIO_WRITE;
+		flags |= IOMAP_WRITE;
+	}
+
+	if (mapping->nrpages) {
+		ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+		if (ret)
+			goto out_free_dio;
+
+		ret = invalidate_inode_pages2_range(mapping,
+				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+		WARN_ON_ONCE(ret);
+		ret = 0;
+	}
+
+	inode_dio_begin(inode);
+
+	blk_start_plug(&plug);
+	do {
+		ret = iomap_apply(inode, pos, count, flags, ops, dio,
+				iomap_dio_actor);
+		if (ret <= 0) {
+			/* magic error code to fall back to buffered I/O */
+			if (ret == -ENOTBLK)
+				ret = 0;
+			break;
+		}
+		pos += ret;
+	} while ((count = iov_iter_count(iter)) > 0);
+	blk_finish_plug(&plug);
+
+	if (ret < 0)
+		iomap_dio_set_error(dio, ret);
+
+	if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+			!inode->i_sb->s_dio_done_wq) {
+		ret = sb_init_dio_done_wq(inode->i_sb);
+		if (ret < 0)
+			iomap_dio_set_error(dio, ret);
+	}
+
+	if (!atomic_dec_and_test(&dio->ref)) {
+		if (!is_sync_kiocb(iocb))
+			return -EIOCBQUEUED;
+
+		for (;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (!READ_ONCE(dio->submit.waiter))
+				break;
+
+			if (!(iocb->ki_flags & IOCB_HIPRI) ||
+			    !dio->submit.last_queue ||
+			    !blk_poll(dio->submit.last_queue,
+					dio->submit.cookie))
+				io_schedule();
+		}
+		__set_current_state(TASK_RUNNING);
+	}
+
+	/*
+	 * Try again to invalidate clean pages which might have been cached by
+	 * non-direct readahead, or faulted in by get_user_pages() if the source
+	 * of the write was an mmap'ed region of the file we're writing.  Either
+	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
+	 * this invalidation fails, tough, the write still worked...
+	 */
+	if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
+		ret = invalidate_inode_pages2_range(mapping,
+				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+		WARN_ON_ONCE(ret);
+	}
+
+	return iomap_dio_complete(dio);
+
+out_free_dio:
+	kfree(dio);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iomap_dio_rw);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f185156de74d..a4c94b86401e 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -50,6 +50,7 @@ struct iomap {
 #define IOMAP_ZERO		(1 << 1) /* zeroing operation, may skip holes */
 #define IOMAP_REPORT		(1 << 2) /* report extent status, e.g. FIEMAP */
 #define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
+#define IOMAP_DIRECT		(1 << 4) /* direct I/O */
 
 struct iomap_ops {
 	/*
@@ -83,4 +84,14 @@ int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		loff_t start, loff_t len, struct iomap_ops *ops);
 
+/*
+ * Flags for direct I/O ->end_io:
+ */
+#define IOMAP_DIO_UNWRITTEN	(1 << 0)	/* covers unwritten extent(s) */
+#define IOMAP_DIO_COW		(1 << 1)	/* covers COW extent(s) */
+typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
+		unsigned flags);
+ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops, iomap_dio_end_io_t end_io);
+
 #endif /* LINUX_IOMAP_H */
-- 
cgit v1.2.3


From c3b004460d77bf3f980d877be539016f2df4df12 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 23 Nov 2016 14:04:55 +0100
Subject: quota: Remove dqonoff_mutex

The only places that were grabbing dqonoff_mutex are functions turning
quotas on and off and these are properly serialized using s_umount
semaphore. Remove dqonoff_mutex.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 80 +++++++++++++++------------------------------------
 fs/super.c            |  1 -
 include/linux/quota.h |  1 -
 3 files changed, 23 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d91aecc939c9..550f78f2b7c7 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -119,8 +119,7 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *   dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
- * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc.
+ *   s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
  */
 
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
@@ -933,7 +932,7 @@ static int dqinit_needed(struct inode *inode, int type)
 	return 0;
 }
 
-/* This routine is guarded by dqonoff_mutex mutex */
+/* This routine is guarded by s_umount semaphore */
 static void add_dquot_ref(struct super_block *sb, int type)
 {
 	struct inode *inode, *old_inode = NULL;
@@ -2108,18 +2107,14 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
 	    DQUOT_USAGE_ENABLED)))
 		return -EINVAL;
 
-	/* We need to serialize quota_off() for device */
-	mutex_lock(&dqopt->dqonoff_mutex);
-
 	/*
 	 * Skip everything if there's nothing to do. We have to do this because
 	 * sometimes we are called when fill_super() failed and calling
 	 * sync_fs() in such cases does no good.
 	 */
-	if (!sb_any_quota_loaded(sb)) {
-		mutex_unlock(&dqopt->dqonoff_mutex);
+	if (!sb_any_quota_loaded(sb))
 		return 0;
-	}
+
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		toputinode[cnt] = NULL;
 		if (type != -1 && cnt != type)
@@ -2173,7 +2168,6 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
 		dqopt->info[cnt].dqi_bgrace = 0;
 		dqopt->ops[cnt] = NULL;
 	}
-	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	/* Skip syncing and setting flags if quota files are hidden */
 	if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
@@ -2191,19 +2185,13 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
 	 * changes done by userspace on the next quotaon() */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (toputinode[cnt]) {
-			mutex_lock(&dqopt->dqonoff_mutex);
-			/* If quota was reenabled in the meantime, we have
-			 * nothing to do */
-			if (!sb_has_quota_loaded(sb, cnt)) {
-				inode_lock(toputinode[cnt]);
-				toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
+			WARN_ON_ONCE(sb_has_quota_loaded(sb, cnt));
+			inode_lock(toputinode[cnt]);
+			toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
 				  S_NOATIME | S_NOQUOTA);
-				truncate_inode_pages(&toputinode[cnt]->i_data,
-						     0);
-				inode_unlock(toputinode[cnt]);
-				mark_inode_dirty_sync(toputinode[cnt]);
-			}
-			mutex_unlock(&dqopt->dqonoff_mutex);
+			truncate_inode_pages(&toputinode[cnt]->i_data, 0);
+			inode_unlock(toputinode[cnt]);
+			mark_inode_dirty_sync(toputinode[cnt]);
 		}
 	if (sb->s_bdev)
 		invalidate_bdev(sb->s_bdev);
@@ -2275,6 +2263,10 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		error = -EINVAL;
 		goto out_fmt;
 	}
+	if (sb_has_quota_loaded(sb, type)) {
+		error = -EBUSY;
+		goto out_fmt;
+	}
 
 	if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
 		/* As we bypass the pagecache we must now flush all the
@@ -2286,11 +2278,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		sync_filesystem(sb);
 		invalidate_bdev(sb->s_bdev);
 	}
-	mutex_lock(&dqopt->dqonoff_mutex);
-	if (sb_has_quota_loaded(sb, type)) {
-		error = -EBUSY;
-		goto out_lock;
-	}
 
 	if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
 		/* We don't want quota and atime on quota files (deadlocks
@@ -2311,7 +2298,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 	error = -EIO;
 	dqopt->files[type] = igrab(inode);
 	if (!dqopt->files[type])
-		goto out_lock;
+		goto out_file_flags;
 	error = -EINVAL;
 	if (!fmt->qf_ops->check_quota_file(sb, type))
 		goto out_file_init;
@@ -2334,14 +2321,13 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 	spin_unlock(&dq_state_lock);
 
 	add_dquot_ref(sb, type);
-	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	return 0;
 
 out_file_init:
 	dqopt->files[type] = NULL;
 	iput(inode);
-out_lock:
+out_file_flags:
 	if (oldflags != -1) {
 		inode_lock(inode);
 		/* Set the flags back (in the case of accidental quotaon()
@@ -2350,7 +2336,6 @@ out_lock:
 		inode->i_flags |= oldflags;
 		inode_unlock(inode);
 	}
-	mutex_unlock(&dqopt->dqonoff_mutex);
 out_fmt:
 	put_quota_format(fmt);
 
@@ -2372,12 +2357,9 @@ int dquot_resume(struct super_block *sb, int type)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
-
-		mutex_lock(&dqopt->dqonoff_mutex);
-		if (!sb_has_quota_suspended(sb, cnt)) {
-			mutex_unlock(&dqopt->dqonoff_mutex);
+		if (!sb_has_quota_suspended(sb, cnt))
 			continue;
-		}
+
 		inode = dqopt->files[cnt];
 		dqopt->files[cnt] = NULL;
 		spin_lock(&dq_state_lock);
@@ -2386,7 +2368,6 @@ int dquot_resume(struct super_block *sb, int type)
 							cnt);
 		dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt);
 		spin_unlock(&dq_state_lock);
-		mutex_unlock(&dqopt->dqonoff_mutex);
 
 		flags = dquot_generic_flag(flags, cnt);
 		ret = vfs_load_quota_inode(inode, cnt,
@@ -2422,9 +2403,7 @@ EXPORT_SYMBOL(dquot_quota_on);
 int dquot_enable(struct inode *inode, int type, int format_id,
 		 unsigned int flags)
 {
-	int ret = 0;
 	struct super_block *sb = inode->i_sb;
-	struct quota_info *dqopt = sb_dqopt(sb);
 
 	/* Just unsuspend quotas? */
 	BUG_ON(flags & DQUOT_SUSPENDED);
@@ -2436,31 +2415,18 @@ int dquot_enable(struct inode *inode, int type, int format_id,
 		return 0;
 	/* Just updating flags needed? */
 	if (sb_has_quota_loaded(sb, type)) {
-		mutex_lock(&dqopt->dqonoff_mutex);
-		/* Now do a reliable test... */
-		if (!sb_has_quota_loaded(sb, type)) {
-			mutex_unlock(&dqopt->dqonoff_mutex);
-			goto load_quota;
-		}
 		if (flags & DQUOT_USAGE_ENABLED &&
-		    sb_has_quota_usage_enabled(sb, type)) {
-			ret = -EBUSY;
-			goto out_lock;
-		}
+		    sb_has_quota_usage_enabled(sb, type))
+			return -EBUSY;
 		if (flags & DQUOT_LIMITS_ENABLED &&
-		    sb_has_quota_limits_enabled(sb, type)) {
-			ret = -EBUSY;
-			goto out_lock;
-		}
+		    sb_has_quota_limits_enabled(sb, type))
+			return -EBUSY;
 		spin_lock(&dq_state_lock);
 		sb_dqopt(sb)->flags |= dquot_state_flag(flags, type);
 		spin_unlock(&dq_state_lock);
-out_lock:
-		mutex_unlock(&dqopt->dqonoff_mutex);
-		return ret;
+		return 0;
 	}
 
-load_quota:
 	return vfs_load_quota_inode(inode, type, format_id, flags);
 }
 EXPORT_SYMBOL(dquot_enable);
diff --git a/fs/super.c b/fs/super.c
index f7f724230e2b..1709ed029a2c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -244,7 +244,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 	mutex_init(&s->s_vfs_rename_mutex);
 	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
 	mutex_init(&s->s_dquot.dqio_mutex);
-	mutex_init(&s->s_dquot.dqonoff_mutex);
 	s->s_maxbytes = MAX_NON_LFS;
 	s->s_op = &default_op;
 	s->s_time_gran = 1000000000;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 55107a8ff887..b281d198ee5b 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -520,7 +520,6 @@ static inline void quota_send_warning(struct kqid qid, dev_t dev,
 struct quota_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
 	struct mutex dqio_mutex;		/* lock device while I/O in progress */
-	struct mutex dqonoff_mutex;		/* Serialize quotaon & quotaoff */
 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
 	const struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
-- 
cgit v1.2.3


From 60fe3910bb029e3671ce7ac080a7acb7e032b9e0 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Date: Tue, 29 Nov 2016 23:45:47 +1100
Subject: kexec_file: Allow arch-specific memory walking for kexec_add_buffer

Allow architectures to specify a different memory walking function for
kexec_add_buffer. x86 uses iomem to track reserved memory ranges, but
PowerPC uses the memblock subsystem.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Acked-by: Dave Young <dyoung@redhat.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/linux/kexec.h   | 29 ++++++++++++++++++++++++++++-
 kernel/kexec_file.c     | 30 ++++++++++++++++++++++--------
 kernel/kexec_internal.h | 16 ----------------
 3 files changed, 50 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 406c33dcae13..5e320ddaaa82 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -148,7 +148,34 @@ struct kexec_file_ops {
 	kexec_verify_sig_t *verify_sig;
 #endif
 };
-#endif
+
+/**
+ * struct kexec_buf - parameters for finding a place for a buffer in memory
+ * @image:	kexec image in which memory to search.
+ * @buffer:	Contents which will be copied to the allocated memory.
+ * @bufsz:	Size of @buffer.
+ * @mem:	On return will have address of the buffer in memory.
+ * @memsz:	Size for the buffer in memory.
+ * @buf_align:	Minimum alignment needed.
+ * @buf_min:	The buffer can't be placed below this address.
+ * @buf_max:	The buffer can't be placed above this address.
+ * @top_down:	Allocate from top of memory.
+ */
+struct kexec_buf {
+	struct kimage *image;
+	char *buffer;
+	unsigned long bufsz;
+	unsigned long mem;
+	unsigned long memsz;
+	unsigned long buf_align;
+	unsigned long buf_min;
+	unsigned long buf_max;
+	bool top_down;
+};
+
+int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
+			       int (*func)(u64, u64, void *));
+#endif /* CONFIG_KEXEC_FILE */
 
 struct kimage {
 	kimage_entry_t head;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 037c321c5618..f865674bff51 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -428,6 +428,27 @@ static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
 	return locate_mem_hole_bottom_up(start, end, kbuf);
 }
 
+/**
+ * arch_kexec_walk_mem - call func(data) on free memory regions
+ * @kbuf:	Context info for the search. Also passed to @func.
+ * @func:	Function to call for each memory region.
+ *
+ * Return: The memory walk will stop when func returns a non-zero value
+ * and that value will be returned. If all free regions are visited without
+ * func returning non-zero, then zero will be returned.
+ */
+int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
+			       int (*func)(u64, u64, void *))
+{
+	if (kbuf->image->type == KEXEC_TYPE_CRASH)
+		return walk_iomem_res_desc(crashk_res.desc,
+					   IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+					   crashk_res.start, crashk_res.end,
+					   kbuf, func);
+	else
+		return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
+}
+
 /*
  * Helper function for placing a buffer in a kexec segment. This assumes
  * that kexec_mutex is held.
@@ -474,14 +495,7 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
 	kbuf->top_down = top_down;
 
 	/* Walk the RAM ranges and allocate a suitable range for the buffer */
-	if (image->type == KEXEC_TYPE_CRASH)
-		ret = walk_iomem_res_desc(crashk_res.desc,
-				IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
-				crashk_res.start, crashk_res.end, kbuf,
-				locate_mem_hole_callback);
-	else
-		ret = walk_system_ram_res(0, -1, kbuf,
-					  locate_mem_hole_callback);
+	ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
 	if (ret != 1) {
 		/* A suitable memory range could not be found for buffer */
 		return -EADDRNOTAVAIL;
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 0a52315d9c62..4cef7e4706b0 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -20,22 +20,6 @@ struct kexec_sha_region {
 	unsigned long len;
 };
 
-/*
- * Keeps track of buffer parameters as provided by caller for requesting
- * memory placement of buffer.
- */
-struct kexec_buf {
-	struct kimage *image;
-	char *buffer;
-	unsigned long bufsz;
-	unsigned long mem;
-	unsigned long memsz;
-	unsigned long buf_align;
-	unsigned long buf_min;
-	unsigned long buf_max;
-	bool top_down;		/* allocate from top of memory hole */
-};
-
 void kimage_file_post_load_cleanup(struct kimage *image);
 #else /* CONFIG_KEXEC_FILE */
 static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
-- 
cgit v1.2.3


From ec2b9bfaac44ea889625a6b9473d33898b10d35f Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Date: Tue, 29 Nov 2016 23:45:48 +1100
Subject: kexec_file: Change kexec_add_buffer to take kexec_buf as argument.

This is done to simplify the kexec_add_buffer argument list.
Adapt all callers to set up a kexec_buf to pass to kexec_add_buffer.

In addition, change the type of kexec_buf.buffer from char * to void *.
There is no particular reason for it to be a char *, and the change
allows us to get rid of 3 existing casts to char * in the code.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Acked-by: Dave Young <dyoung@redhat.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/x86/kernel/crash.c           | 37 ++++++++--------
 arch/x86/kernel/kexec-bzimage64.c | 48 +++++++++++----------
 include/linux/kexec.h             |  8 +---
 kernel/kexec_file.c               | 88 ++++++++++++++++++---------------------
 4 files changed, 87 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 650830e39e3a..3741461c63a0 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -631,9 +631,9 @@ static int determine_backup_region(u64 start, u64 end, void *arg)
 
 int crash_load_segments(struct kimage *image)
 {
-	unsigned long src_start, src_sz, elf_sz;
-	void *elf_addr;
 	int ret;
+	struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+				  .buf_max = ULONG_MAX, .top_down = false };
 
 	/*
 	 * Determine and load a segment for backup area. First 640K RAM
@@ -647,43 +647,44 @@ int crash_load_segments(struct kimage *image)
 	if (ret < 0)
 		return ret;
 
-	src_start = image->arch.backup_src_start;
-	src_sz = image->arch.backup_src_sz;
-
 	/* Add backup segment. */
-	if (src_sz) {
+	if (image->arch.backup_src_sz) {
+		kbuf.buffer = &crash_zero_bytes;
+		kbuf.bufsz = sizeof(crash_zero_bytes);
+		kbuf.memsz = image->arch.backup_src_sz;
+		kbuf.buf_align = PAGE_SIZE;
 		/*
 		 * Ideally there is no source for backup segment. This is
 		 * copied in purgatory after crash. Just add a zero filled
 		 * segment for now to make sure checksum logic works fine.
 		 */
-		ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
-				       sizeof(crash_zero_bytes), src_sz,
-				       PAGE_SIZE, 0, -1, 0,
-				       &image->arch.backup_load_addr);
+		ret = kexec_add_buffer(&kbuf);
 		if (ret)
 			return ret;
+		image->arch.backup_load_addr = kbuf.mem;
 		pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
-			 image->arch.backup_load_addr, src_start, src_sz);
+			 image->arch.backup_load_addr,
+			 image->arch.backup_src_start, kbuf.memsz);
 	}
 
 	/* Prepare elf headers and add a segment */
-	ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
+	ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
 	if (ret)
 		return ret;
 
-	image->arch.elf_headers = elf_addr;
-	image->arch.elf_headers_sz = elf_sz;
+	image->arch.elf_headers = kbuf.buffer;
+	image->arch.elf_headers_sz = kbuf.bufsz;
 
-	ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
-			ELF_CORE_HEADER_ALIGN, 0, -1, 0,
-			&image->arch.elf_load_addr);
+	kbuf.memsz = kbuf.bufsz;
+	kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+	ret = kexec_add_buffer(&kbuf);
 	if (ret) {
 		vfree((void *)image->arch.elf_headers);
 		return ret;
 	}
+	image->arch.elf_load_addr = kbuf.mem;
 	pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-		 image->arch.elf_load_addr, elf_sz, elf_sz);
+		 image->arch.elf_load_addr, kbuf.bufsz, kbuf.bufsz);
 
 	return ret;
 }
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 3407b148c240..d0a814a9d96a 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -331,17 +331,17 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 
 	struct setup_header *header;
 	int setup_sects, kern16_size, ret = 0;
-	unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
+	unsigned long setup_header_size, params_cmdline_sz;
 	struct boot_params *params;
 	unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
 	unsigned long purgatory_load_addr;
-	unsigned long kernel_bufsz, kernel_memsz, kernel_align;
-	char *kernel_buf;
 	struct bzimage64_data *ldata;
 	struct kexec_entry64_regs regs64;
 	void *stack;
 	unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
 	unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
+	struct kexec_buf kbuf = { .image = image, .buf_max = ULONG_MAX,
+				  .top_down = true };
 
 	header = (struct setup_header *)(kernel + setup_hdr_offset);
 	setup_sects = header->setup_sects;
@@ -402,11 +402,11 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 	params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
 				MAX_ELFCOREHDR_STR_LEN;
 	params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
-	params_misc_sz = params_cmdline_sz + efi_map_sz +
+	kbuf.bufsz = params_cmdline_sz + efi_map_sz +
 				sizeof(struct setup_data) +
 				sizeof(struct efi_setup_data);
 
-	params = kzalloc(params_misc_sz, GFP_KERNEL);
+	params = kzalloc(kbuf.bufsz, GFP_KERNEL);
 	if (!params)
 		return ERR_PTR(-ENOMEM);
 	efi_map_offset = params_cmdline_sz;
@@ -418,37 +418,41 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 	/* Is there a limit on setup header size? */
 	memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
 
-	ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
-			       params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
-			       ULONG_MAX, 1, &bootparam_load_addr);
+	kbuf.buffer = params;
+	kbuf.memsz = kbuf.bufsz;
+	kbuf.buf_align = 16;
+	kbuf.buf_min = MIN_BOOTPARAM_ADDR;
+	ret = kexec_add_buffer(&kbuf);
 	if (ret)
 		goto out_free_params;
+	bootparam_load_addr = kbuf.mem;
 	pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-		 bootparam_load_addr, params_misc_sz, params_misc_sz);
+		 bootparam_load_addr, kbuf.bufsz, kbuf.bufsz);
 
 	/* Load kernel */
-	kernel_buf = kernel + kern16_size;
-	kernel_bufsz =  kernel_len - kern16_size;
-	kernel_memsz = PAGE_ALIGN(header->init_size);
-	kernel_align = header->kernel_alignment;
-
-	ret = kexec_add_buffer(image, kernel_buf,
-			       kernel_bufsz, kernel_memsz, kernel_align,
-			       MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
-			       &kernel_load_addr);
+	kbuf.buffer = kernel + kern16_size;
+	kbuf.bufsz =  kernel_len - kern16_size;
+	kbuf.memsz = PAGE_ALIGN(header->init_size);
+	kbuf.buf_align = header->kernel_alignment;
+	kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
+	ret = kexec_add_buffer(&kbuf);
 	if (ret)
 		goto out_free_params;
+	kernel_load_addr = kbuf.mem;
 
 	pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-		 kernel_load_addr, kernel_memsz, kernel_memsz);
+		 kernel_load_addr, kbuf.bufsz, kbuf.memsz);
 
 	/* Load initrd high */
 	if (initrd) {
-		ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
-				       PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
-				       ULONG_MAX, 1, &initrd_load_addr);
+		kbuf.buffer = initrd;
+		kbuf.bufsz = kbuf.memsz = initrd_len;
+		kbuf.buf_align = PAGE_SIZE;
+		kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
+		ret = kexec_add_buffer(&kbuf);
 		if (ret)
 			goto out_free_params;
+		initrd_load_addr = kbuf.mem;
 
 		pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
 				initrd_load_addr, initrd_len, initrd_len);
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 5e320ddaaa82..437ef1b47428 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -163,7 +163,7 @@ struct kexec_file_ops {
  */
 struct kexec_buf {
 	struct kimage *image;
-	char *buffer;
+	void *buffer;
 	unsigned long bufsz;
 	unsigned long mem;
 	unsigned long memsz;
@@ -175,6 +175,7 @@ struct kexec_buf {
 
 int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
 			       int (*func)(u64, u64, void *));
+extern int kexec_add_buffer(struct kexec_buf *kbuf);
 #endif /* CONFIG_KEXEC_FILE */
 
 struct kimage {
@@ -239,11 +240,6 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
 					struct kexec_segment __user *segments,
 					unsigned long flags);
 extern int kernel_kexec(void);
-extern int kexec_add_buffer(struct kimage *image, char *buffer,
-			    unsigned long bufsz, unsigned long memsz,
-			    unsigned long buf_align, unsigned long buf_min,
-			    unsigned long buf_max, bool top_down,
-			    unsigned long *load_addr);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
 extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index f865674bff51..efd2c094af7e 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -449,25 +449,27 @@ int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
 		return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
 }
 
-/*
- * Helper function for placing a buffer in a kexec segment. This assumes
- * that kexec_mutex is held.
+/**
+ * kexec_add_buffer - place a buffer in a kexec segment
+ * @kbuf:	Buffer contents and memory parameters.
+ *
+ * This function assumes that kexec_mutex is held.
+ * On successful return, @kbuf->mem will have the physical address of
+ * the buffer in memory.
+ *
+ * Return: 0 on success, negative errno on error.
  */
-int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
-		     unsigned long memsz, unsigned long buf_align,
-		     unsigned long buf_min, unsigned long buf_max,
-		     bool top_down, unsigned long *load_addr)
+int kexec_add_buffer(struct kexec_buf *kbuf)
 {
 
 	struct kexec_segment *ksegment;
-	struct kexec_buf buf, *kbuf;
 	int ret;
 
 	/* Currently adding segment this way is allowed only in file mode */
-	if (!image->file_mode)
+	if (!kbuf->image->file_mode)
 		return -EINVAL;
 
-	if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+	if (kbuf->image->nr_segments >= KEXEC_SEGMENT_MAX)
 		return -EINVAL;
 
 	/*
@@ -477,22 +479,14 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
 	 * logic goes through list of segments to make sure there are
 	 * no destination overlaps.
 	 */
-	if (!list_empty(&image->control_pages)) {
+	if (!list_empty(&kbuf->image->control_pages)) {
 		WARN_ON(1);
 		return -EINVAL;
 	}
 
-	memset(&buf, 0, sizeof(struct kexec_buf));
-	kbuf = &buf;
-	kbuf->image = image;
-	kbuf->buffer = buffer;
-	kbuf->bufsz = bufsz;
-
-	kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
-	kbuf->buf_align = max(buf_align, PAGE_SIZE);
-	kbuf->buf_min = buf_min;
-	kbuf->buf_max = buf_max;
-	kbuf->top_down = top_down;
+	/* Ensure minimum alignment needed for segments. */
+	kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
+	kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
 
 	/* Walk the RAM ranges and allocate a suitable range for the buffer */
 	ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
@@ -502,13 +496,12 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
 	}
 
 	/* Found a suitable memory range */
-	ksegment = &image->segment[image->nr_segments];
+	ksegment = &kbuf->image->segment[kbuf->image->nr_segments];
 	ksegment->kbuf = kbuf->buffer;
 	ksegment->bufsz = kbuf->bufsz;
 	ksegment->mem = kbuf->mem;
 	ksegment->memsz = kbuf->memsz;
-	image->nr_segments++;
-	*load_addr = ksegment->mem;
+	kbuf->image->nr_segments++;
 	return 0;
 }
 
@@ -630,13 +623,15 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
 				  unsigned long max, int top_down)
 {
 	struct purgatory_info *pi = &image->purgatory_info;
-	unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
-	unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
+	unsigned long align, bss_align, bss_sz, bss_pad;
+	unsigned long entry, load_addr, curr_load_addr, bss_addr, offset;
 	unsigned char *buf_addr, *src;
 	int i, ret = 0, entry_sidx = -1;
 	const Elf_Shdr *sechdrs_c;
 	Elf_Shdr *sechdrs = NULL;
-	void *purgatory_buf = NULL;
+	struct kexec_buf kbuf = { .image = image, .bufsz = 0, .buf_align = 1,
+				  .buf_min = min, .buf_max = max,
+				  .top_down = top_down };
 
 	/*
 	 * sechdrs_c points to section headers in purgatory and are read
@@ -702,9 +697,7 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
 	}
 
 	/* Determine how much memory is needed to load relocatable object. */
-	buf_align = 1;
 	bss_align = 1;
-	buf_sz = 0;
 	bss_sz = 0;
 
 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
@@ -713,10 +706,10 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
 
 		align = sechdrs[i].sh_addralign;
 		if (sechdrs[i].sh_type != SHT_NOBITS) {
-			if (buf_align < align)
-				buf_align = align;
-			buf_sz = ALIGN(buf_sz, align);
-			buf_sz += sechdrs[i].sh_size;
+			if (kbuf.buf_align < align)
+				kbuf.buf_align = align;
+			kbuf.bufsz = ALIGN(kbuf.bufsz, align);
+			kbuf.bufsz += sechdrs[i].sh_size;
 		} else {
 			/* bss section */
 			if (bss_align < align)
@@ -728,32 +721,31 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
 
 	/* Determine the bss padding required to align bss properly */
 	bss_pad = 0;
-	if (buf_sz & (bss_align - 1))
-		bss_pad = bss_align - (buf_sz & (bss_align - 1));
+	if (kbuf.bufsz & (bss_align - 1))
+		bss_pad = bss_align - (kbuf.bufsz & (bss_align - 1));
 
-	memsz = buf_sz + bss_pad + bss_sz;
+	kbuf.memsz = kbuf.bufsz + bss_pad + bss_sz;
 
 	/* Allocate buffer for purgatory */
-	purgatory_buf = vzalloc(buf_sz);
-	if (!purgatory_buf) {
+	kbuf.buffer = vzalloc(kbuf.bufsz);
+	if (!kbuf.buffer) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	if (buf_align < bss_align)
-		buf_align = bss_align;
+	if (kbuf.buf_align < bss_align)
+		kbuf.buf_align = bss_align;
 
 	/* Add buffer to segment list */
-	ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
-				buf_align, min, max, top_down,
-				&pi->purgatory_load_addr);
+	ret = kexec_add_buffer(&kbuf);
 	if (ret)
 		goto out;
+	pi->purgatory_load_addr = kbuf.mem;
 
 	/* Load SHF_ALLOC sections */
-	buf_addr = purgatory_buf;
+	buf_addr = kbuf.buffer;
 	load_addr = curr_load_addr = pi->purgatory_load_addr;
-	bss_addr = load_addr + buf_sz + bss_pad;
+	bss_addr = load_addr + kbuf.bufsz + bss_pad;
 
 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -799,11 +791,11 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
 	 * Used later to identify which section is purgatory and skip it
 	 * from checksumming.
 	 */
-	pi->purgatory_buf = purgatory_buf;
+	pi->purgatory_buf = kbuf.buffer;
 	return ret;
 out:
 	vfree(sechdrs);
-	vfree(purgatory_buf);
+	vfree(kbuf.buffer);
 	return ret;
 }
 
-- 
cgit v1.2.3


From e2e806f9e437b46a3fc8f3174a225c73f2e38c3d Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Date: Tue, 29 Nov 2016 23:45:49 +1100
Subject: kexec_file: Factor out kexec_locate_mem_hole from kexec_add_buffer.

kexec_locate_mem_hole will be used by the PowerPC kexec_file_load
implementation to find free memory for the purgatory stack.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Acked-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/linux/kexec.h |  1 +
 kernel/kexec_file.c   | 25 ++++++++++++++++++++-----
 2 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 437ef1b47428..a33f63351f86 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -176,6 +176,7 @@ struct kexec_buf {
 int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
 			       int (*func)(u64, u64, void *));
 extern int kexec_add_buffer(struct kexec_buf *kbuf);
+int kexec_locate_mem_hole(struct kexec_buf *kbuf);
 #endif /* CONFIG_KEXEC_FILE */
 
 struct kimage {
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index efd2c094af7e..0c2df7f73792 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -449,6 +449,23 @@ int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
 		return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
 }
 
+/**
+ * kexec_locate_mem_hole - find free memory for the purgatory or the next kernel
+ * @kbuf:	Parameters for the memory search.
+ *
+ * On success, kbuf->mem will have the start address of the memory region found.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int kexec_locate_mem_hole(struct kexec_buf *kbuf)
+{
+	int ret;
+
+	ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
+
+	return ret == 1 ? 0 : -EADDRNOTAVAIL;
+}
+
 /**
  * kexec_add_buffer - place a buffer in a kexec segment
  * @kbuf:	Buffer contents and memory parameters.
@@ -489,11 +506,9 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
 	kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
 
 	/* Walk the RAM ranges and allocate a suitable range for the buffer */
-	ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
-	if (ret != 1) {
-		/* A suitable memory range could not be found for buffer */
-		return -EADDRNOTAVAIL;
-	}
+	ret = kexec_locate_mem_hole(kbuf);
+	if (ret)
+		return ret;
 
 	/* Found a suitable memory range */
 	ksegment = &kbuf->image->segment[kbuf->image->nr_segments];
-- 
cgit v1.2.3


From 05b055e89121394058c75dc354e9a46e1e765579 Mon Sep 17 00:00:00 2001
From: Francis Yan <francisyyan@gmail.com>
Date: Sun, 27 Nov 2016 23:07:13 -0800
Subject: tcp: instrument tcp sender limits chronographs

This patch implements the skeleton of the TCP chronograph
instrumentation on sender side limits:

	1) idle (unspec)
	2) busy sending data other than 3-4 below
	3) rwnd-limited
	4) sndbuf-limited

The limits are enumerated 'tcp_chrono'. Since a connection in
theory can idle forever, we do not track the actual length of this
uninteresting idle period. For the rest we track how long the sender
spends in each limit. At any point during the life time of a
connection, the sender must be in one of the four states.

If there are multiple conditions worthy of tracking in a chronograph
then the highest priority enum takes precedence over
the other conditions. So that if something "more interesting"
starts happening, stop the previous chrono and start a new one.

The time unit is jiffy(u32) in order to save space in tcp_sock.
This implies application must sample the stats no longer than every
49 days of 1ms jiffy.

Signed-off-by: Francis Yan <francisyyan@gmail.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  7 +++++--
 include/net/tcp.h     | 14 ++++++++++++++
 net/ipv4/tcp_output.c | 30 ++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 32a7c7e35b71..d5d3bd814338 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -211,8 +211,11 @@ struct tcp_sock {
 		u8 reord;    /* reordering detected */
 	} rack;
 	u16	advmss;		/* Advertised MSS			*/
-	u8	rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
-		unused:7;
+	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */
+	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */
+	u8	chrono_type:2,	/* current chronograph type */
+		rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
+		unused:5;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		thin_dupack : 1,/* Fast retransmit on first dupack      */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7de80739adab..e5ff4083870d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1516,6 +1516,20 @@ struct tcp_fastopen_context {
 	struct rcu_head		rcu;
 };
 
+/* Latencies incurred by various limits for a sender. They are
+ * chronograph-like stats that are mutually exclusive.
+ */
+enum tcp_chrono {
+	TCP_CHRONO_UNSPEC,
+	TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */
+	TCP_CHRONO_RWND_LIMITED, /* Stalled by insufficient receive window */
+	TCP_CHRONO_SNDBUF_LIMITED, /* Stalled by insufficient send buffer */
+	__TCP_CHRONO_MAX,
+};
+
+void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
+void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);
+
 /* write queue abstraction */
 static inline void tcp_write_queue_purge(struct sock *sk)
 {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 19105b46a304..34f751776a01 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2081,6 +2081,36 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 	return false;
 }
 
+static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
+{
+	const u32 now = tcp_time_stamp;
+
+	if (tp->chrono_type > TCP_CHRONO_UNSPEC)
+		tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
+	tp->chrono_start = now;
+	tp->chrono_type = new;
+}
+
+void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* If there are multiple conditions worthy of tracking in a
+	 * chronograph then the highest priority enum takes precedence over
+	 * the other conditions. So that if something "more interesting"
+	 * starts happening, stop the previous chrono and start a new one.
+	 */
+	if (type > tp->chrono_type)
+		tcp_chrono_set(tp, type);
+}
+
+void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
+}
+
 /* This routine writes packets to the network.  It advances the
  * send_head.  This happens as incoming acks open up the remote
  * window for us.
-- 
cgit v1.2.3


From 1c885808e45601b2b6f68b30ac1d999e10b6f606 Mon Sep 17 00:00:00 2001
From: Francis Yan <francisyyan@gmail.com>
Date: Sun, 27 Nov 2016 23:07:18 -0800
Subject: tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING

This patch exports the sender chronograph stats via the socket
SO_TIMESTAMPING channel. Currently we can instrument how long a
particular application unit of data was queued in TCP by tracking
SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_TX_SCHED. Having
these sender chronograph stats exported simultaneously along with
these timestamps allow further breaking down the various sender
limitation.  For example, a video server can tell if a particular
chunk of video on a connection takes a long time to deliver because
TCP was experiencing small receive window. It is not possible to
tell before this patch without packet traces.

To prepare these stats, the user needs to set
SOF_TIMESTAMPING_OPT_STATS and SOF_TIMESTAMPING_OPT_TSONLY flags
while requesting other SOF_TIMESTAMPING TX timestamps. When the
timestamps are available in the error queue, the stats are returned
in a separate control message of type SCM_TIMESTAMPING_OPT_STATS,
in a list of TLVs (struct nlattr) of types: TCP_NLA_BUSY_TIME,
TCP_NLA_RWND_LIMITED, TCP_NLA_SNDBUF_LIMITED. Unit is microsecond.

Signed-off-by: Francis Yan <francisyyan@gmail.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt | 10 ++++++++++
 arch/alpha/include/uapi/asm/socket.h      |  2 ++
 arch/frv/include/uapi/asm/socket.h        |  2 ++
 arch/ia64/include/uapi/asm/socket.h       |  2 ++
 arch/m32r/include/uapi/asm/socket.h       |  2 ++
 arch/mips/include/uapi/asm/socket.h       |  2 ++
 arch/mn10300/include/uapi/asm/socket.h    |  2 ++
 arch/parisc/include/uapi/asm/socket.h     |  2 ++
 arch/powerpc/include/uapi/asm/socket.h    |  2 ++
 arch/s390/include/uapi/asm/socket.h       |  2 ++
 arch/sparc/include/uapi/asm/socket.h      |  2 ++
 arch/xtensa/include/uapi/asm/socket.h     |  2 ++
 include/linux/tcp.h                       |  2 ++
 include/uapi/asm-generic/socket.h         |  2 ++
 include/uapi/linux/net_tstamp.h           |  3 ++-
 include/uapi/linux/tcp.h                  |  8 ++++++++
 net/core/skbuff.c                         | 14 +++++++++++---
 net/core/sock.c                           |  7 +++++++
 net/ipv4/tcp.c                            | 20 ++++++++++++++++++++
 net/socket.c                              |  7 ++++++-
 20 files changed, 90 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index 671cccf0dcd2..96f50694a748 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -182,6 +182,16 @@ SOF_TIMESTAMPING_OPT_TSONLY:
   the timestamp even if sysctl net.core.tstamp_allow_data is 0.
   This option disables SOF_TIMESTAMPING_OPT_CMSG.
 
+SOF_TIMESTAMPING_OPT_STATS:
+
+  Optional stats that are obtained along with the transmit timestamps.
+  It must be used together with SOF_TIMESTAMPING_OPT_TSONLY. When the
+  transmit timestamp is available, the stats are available in a
+  separate control message of type SCM_TIMESTAMPING_OPT_STATS, as a
+  list of TLVs (struct nlattr) of types. These stats allow the
+  application to associate various transport layer stats with
+  the transmit timestamps, such as how long a certain block of
+  data was limited by peer's receiver window.
 
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 9e46d6e656d9..afc901b7a6f6 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index afbc98f02d27..81e03530ed39 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -90,5 +90,7 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 0018fad9039f..57feb0c1f7d7 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -99,4 +99,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 5fe42fc7b6c5..5853f8e92c20 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 2027240aafbb..566ecdcb5b4b 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -108,4 +108,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index 5129f23a9ee1..0e12527c4b0e 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 9c935d717df9..7a109b73ddf7 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -89,4 +89,6 @@
 
 #define SO_CNX_ADVICE		0x402E
 
+#define SCM_TIMESTAMPING_OPT_STATS	0x402F
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 1672e3398270..44583a52f882 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 41b51c2f4f1b..b24a64cbfeb1 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -96,4 +96,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 31aede3af088..a25dc32f5d6a 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -86,6 +86,8 @@
 
 #define SO_CNX_ADVICE		0x0037
 
+#define SCM_TIMESTAMPING_OPT_STATS	0x0038
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 81435d995e11..9fdbe1fe0473 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -101,4 +101,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d5d3bd814338..00e0ee8f001f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -428,4 +428,6 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp)
 	tp->saved_syn = NULL;
 }
 
+struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk);
+
 #endif	/* _LINUX_TCP_H */
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 67d632f1743d..2c748ddad5f8 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -92,4 +92,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 264e515de16f..464dcca5ed68 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -25,8 +25,9 @@ enum {
 	SOF_TIMESTAMPING_TX_ACK = (1<<9),
 	SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
 	SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
+	SOF_TIMESTAMPING_OPT_STATS = (1<<12),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 2863b661d6e1..c53de2691cec 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -220,6 +220,14 @@ struct tcp_info {
 	__u64	tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
 };
 
+/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
+enum {
+	TCP_NLA_PAD,
+	TCP_NLA_BUSY,		/* Time (usec) busy sending data */
+	TCP_NLA_RWND_LIMITED,	/* Time (usec) limited by receive window */
+	TCP_NLA_SNDBUF_LIMITED,	/* Time (usec) limited by send buffer */
+};
+
 /* for TCP_MD5SIG socket option */
 #define TCP_MD5SIG_MAXKEYLEN	80
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d1d1a5a5ad24..ea6fa954c7a0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3839,10 +3839,18 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 	if (!skb_may_tx_timestamp(sk, tsonly))
 		return;
 
-	if (tsonly)
-		skb = alloc_skb(0, GFP_ATOMIC);
-	else
+	if (tsonly) {
+#ifdef CONFIG_INET
+		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+		    sk->sk_protocol == IPPROTO_TCP &&
+		    sk->sk_type == SOCK_STREAM)
+			skb = tcp_get_timestamping_opt_stats(sk);
+		else
+#endif
+			skb = alloc_skb(0, GFP_ATOMIC);
+	} else {
 		skb = skb_clone(orig_skb, GFP_ATOMIC);
+	}
 	if (!skb)
 		return;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 14e6145be33b..d8c7f8c877ca 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -854,6 +854,13 @@ set_rcvbuf:
 				sk->sk_tskey = 0;
 			}
 		}
+
+		if (val & SOF_TIMESTAMPING_OPT_STATS &&
+		    !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
+			ret = -EINVAL;
+			break;
+		}
+
 		sk->sk_tsflags = val;
 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
 			sock_enable_timestamp(sk,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cdde20f49999..1149b48700a1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2841,6 +2841,26 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
+struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *stats;
+	struct tcp_info info;
+
+	stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+	if (!stats)
+		return NULL;
+
+	tcp_get_info_chrono_stats(tp, &info);
+	nla_put_u64_64bit(stats, TCP_NLA_BUSY,
+			  info.tcpi_busy_time, TCP_NLA_PAD);
+	nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED,
+			  info.tcpi_rwnd_limited, TCP_NLA_PAD);
+	nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
+			  info.tcpi_sndbuf_limited, TCP_NLA_PAD);
+	return stats;
+}
+
 static int do_tcp_getsockopt(struct sock *sk, int level,
 		int optname, char __user *optval, int __user *optlen)
 {
diff --git a/net/socket.c b/net/socket.c
index e2584c51aa1f..e6318943ad07 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -693,9 +693,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
 	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
 		empty = 0;
-	if (!empty)
+	if (!empty) {
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(tss), &tss);
+
+		if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
+				 skb->len, skb->data);
+	}
 }
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
-- 
cgit v1.2.3


From 820ee17b8d3b2a57b1ea20b247cc6a1dddaf8b8d Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 29 Nov 2016 09:57:17 -0800
Subject: net: phy: broadcom: Add support code for reading PHY counters

Broadcom PHYs expose a number of PHY error counters: receive errors,
false carrier sense, SerDes BER count, local and remote receive errors.
Add support code to allow retrieving these error counters. Since the
Broadcom PHY library code is used by several drivers, make it possible
for them to specify the storage for the software copy of the statistics.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm-phy-lib.c | 70 +++++++++++++++++++++++++++++++++++++++++++
 drivers/net/phy/bcm-phy-lib.h |  5 ++++
 include/linux/brcmphy.h       |  3 ++
 3 files changed, 78 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index 3156ce6d5861..ab9ad689617c 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -17,6 +17,7 @@
 #include <linux/mdio.h>
 #include <linux/module.h>
 #include <linux/phy.h>
+#include <linux/ethtool.h>
 
 #define MII_BCM_CHANNEL_WIDTH     0x2000
 #define BCM_CL45VEN_EEE_ADV       0x3c
@@ -317,6 +318,75 @@ int bcm_phy_downshift_set(struct phy_device *phydev, u8 count)
 }
 EXPORT_SYMBOL_GPL(bcm_phy_downshift_set);
 
+struct bcm_phy_hw_stat {
+	const char *string;
+	u8 reg;
+	u8 shift;
+	u8 bits;
+};
+
+/* Counters freeze at either 0xffff or 0xff, better than nothing */
+static const struct bcm_phy_hw_stat bcm_phy_hw_stats[] = {
+	{ "phy_receive_errors", MII_BRCM_CORE_BASE12, 0, 16 },
+	{ "phy_serdes_ber_errors", MII_BRCM_CORE_BASE13, 8, 8 },
+	{ "phy_false_carrier_sense_errors", MII_BRCM_CORE_BASE13, 0, 8 },
+	{ "phy_local_rcvr_nok", MII_BRCM_CORE_BASE14, 8, 8 },
+	{ "phy_remote_rcv_nok", MII_BRCM_CORE_BASE14, 0, 8 },
+};
+
+int bcm_phy_get_sset_count(struct phy_device *phydev)
+{
+	return ARRAY_SIZE(bcm_phy_hw_stats);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_get_sset_count);
+
+void bcm_phy_get_strings(struct phy_device *phydev, u8 *data)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++)
+		memcpy(data + i * ETH_GSTRING_LEN,
+		       bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_get_strings);
+
+#ifndef UINT64_MAX
+#define UINT64_MAX              (u64)(~((u64)0))
+#endif
+
+/* Caller is supposed to provide appropriate storage for the library code to
+ * access the shadow copy
+ */
+static u64 bcm_phy_get_stat(struct phy_device *phydev, u64 *shadow,
+			    unsigned int i)
+{
+	struct bcm_phy_hw_stat stat = bcm_phy_hw_stats[i];
+	int val;
+	u64 ret;
+
+	val = phy_read(phydev, stat.reg);
+	if (val < 0) {
+		ret = UINT64_MAX;
+	} else {
+		val >>= stat.shift;
+		val = val & ((1 << stat.bits) - 1);
+		shadow[i] += val;
+		ret = shadow[i];
+	}
+
+	return ret;
+}
+
+void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow,
+		       struct ethtool_stats *stats, u64 *data)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++)
+		data[i] = bcm_phy_get_stat(phydev, shadow, i);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_get_stats);
+
 MODULE_DESCRIPTION("Broadcom PHY Library");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Broadcom Corporation");
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index a117f657c6d7..7c73808cbbde 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -42,4 +42,9 @@ int bcm_phy_downshift_get(struct phy_device *phydev, u8 *count);
 
 int bcm_phy_downshift_set(struct phy_device *phydev, u8 count);
 
+int bcm_phy_get_sset_count(struct phy_device *phydev);
+void bcm_phy_get_strings(struct phy_device *phydev, u8 *data);
+void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow,
+		       struct ethtool_stats *stats, u64 *data);
+
 #endif /* _LINUX_BCM_PHY_LIB_H */
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index f9f8aaf9c943..4f7d8be9ddbf 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -244,6 +244,9 @@
 #define LPI_FEATURE_EN_DIG1000X		0x4000
 
 /* Core register definitions*/
+#define MII_BRCM_CORE_BASE12	0x12
+#define MII_BRCM_CORE_BASE13	0x13
+#define MII_BRCM_CORE_BASE14	0x14
 #define MII_BRCM_CORE_BASE1E	0x1E
 #define MII_BRCM_CORE_EXPB0	0xB0
 #define MII_BRCM_CORE_EXPB1	0xB1
-- 
cgit v1.2.3


From 85de8576a0b14aecc99136cfbf90e367fa2142cb Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 28 Nov 2016 23:16:54 +0100
Subject: bpf, xdp: allow to pass flags to dev_change_xdp_fd

Add an IFLA_XDP_FLAGS attribute that can be passed for setting up
XDP along with IFLA_XDP_FD, which eventually allows user space to
implement typical add/replace/delete logic for programs. Right now,
calling into dev_change_xdp_fd() will always replace previous programs.

When passed XDP_FLAGS_UPDATE_IF_NOEXIST, we can handle this more
graceful when requested by returning -EBUSY in case we try to
attach a new program, but we find that another one is already
attached. This will be used by upcoming front-end for iproute2 as
well.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    |  2 +-
 include/uapi/linux/if_link.h |  4 ++++
 net/core/dev.c               | 20 ++++++++++++++++++--
 net/core/rtnetlink.c         | 14 +++++++++++++-
 4 files changed, 36 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4ffcd874cc20..3755317cc6a9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3253,7 +3253,7 @@ int dev_get_phys_port_id(struct net_device *dev,
 int dev_get_phys_port_name(struct net_device *dev,
 			   char *name, size_t len);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
-int dev_change_xdp_fd(struct net_device *dev, int fd);
+int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 92b2d4928bf1..6b13e591abc9 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -876,10 +876,14 @@ enum {
 
 /* XDP section */
 
+#define XDP_FLAGS_UPDATE_IF_NOEXIST	(1U << 0)
+#define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST)
+
 enum {
 	IFLA_XDP_UNSPEC,
 	IFLA_XDP_FD,
 	IFLA_XDP_ATTACHED,
+	IFLA_XDP_FLAGS,
 	__IFLA_XDP_MAX,
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 048b46b7c92a..bffb5253e778 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6692,26 +6692,42 @@ EXPORT_SYMBOL(dev_change_proto_down);
  *	dev_change_xdp_fd - set or clear a bpf program for a device rx path
  *	@dev: device
  *	@fd: new program fd or negative value to clear
+ *	@flags: xdp-related flags
  *
  *	Set or clear a bpf program for a device
  */
-int dev_change_xdp_fd(struct net_device *dev, int fd)
+int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	struct bpf_prog *prog = NULL;
-	struct netdev_xdp xdp = {};
+	struct netdev_xdp xdp;
 	int err;
 
+	ASSERT_RTNL();
+
 	if (!ops->ndo_xdp)
 		return -EOPNOTSUPP;
 	if (fd >= 0) {
+		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
+			memset(&xdp, 0, sizeof(xdp));
+			xdp.command = XDP_QUERY_PROG;
+
+			err = ops->ndo_xdp(dev, &xdp);
+			if (err < 0)
+				return err;
+			if (xdp.prog_attached)
+				return -EBUSY;
+		}
+
 		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
 		if (IS_ERR(prog))
 			return PTR_ERR(prog);
 	}
 
+	memset(&xdp, 0, sizeof(xdp));
 	xdp.command = XDP_SETUP_PROG;
 	xdp.prog = prog;
+
 	err = ops->ndo_xdp(dev, &xdp);
 	if (err < 0 && prog)
 		bpf_prog_put(prog);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4e60525ea586..bd85570e6e4b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1505,6 +1505,7 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
 static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
 	[IFLA_XDP_FD]		= { .type = NLA_S32 },
 	[IFLA_XDP_ATTACHED]	= { .type = NLA_U8 },
+	[IFLA_XDP_FLAGS]	= { .type = NLA_U32 },
 };
 
 static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
@@ -2164,6 +2165,7 @@ static int do_setlink(const struct sk_buff *skb,
 
 	if (tb[IFLA_XDP]) {
 		struct nlattr *xdp[IFLA_XDP_MAX + 1];
+		u32 xdp_flags = 0;
 
 		err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
 				       ifla_xdp_policy);
@@ -2174,9 +2176,19 @@ static int do_setlink(const struct sk_buff *skb,
 			err = -EINVAL;
 			goto errout;
 		}
+
+		if (xdp[IFLA_XDP_FLAGS]) {
+			xdp_flags = nla_get_u32(xdp[IFLA_XDP_FLAGS]);
+			if (xdp_flags & ~XDP_FLAGS_MASK) {
+				err = -EINVAL;
+				goto errout;
+			}
+		}
+
 		if (xdp[IFLA_XDP_FD]) {
 			err = dev_change_xdp_fd(dev,
-						nla_get_s32(xdp[IFLA_XDP_FD]));
+						nla_get_s32(xdp[IFLA_XDP_FD]),
+						xdp_flags);
 			if (err)
 				goto errout;
 			status |= DO_SETLINK_NOTIFY;
-- 
cgit v1.2.3


From 9e421b7c60860f150aa29e9f7bc902f003d87e68 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 29 Nov 2016 17:41:50 -0800
Subject: Input: synaptics-rmi4 - fix documentation of
 rmi_2d_sensor_platform_data

Typos...

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/rmi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index 8499b6aa2221..27dd9aa4090c 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -108,7 +108,7 @@ struct rmi_2d_sensor_platform_data {
  * @buttonpad - the touchpad is a buttonpad, so enable only the first actual
  * button that is found.
  * @trackstick_buttons - Set when the function 30 is handling the physical
- * buttons of the trackstick (as a PD/2 passthrough device.
+ * buttons of the trackstick (as a PS/2 passthrough device).
  * @disable - the touchpad incorrectly reports F30 and it should be ignored.
  * This is a special case which is due to misconfigured firmware.
  */
-- 
cgit v1.2.3


From 0d37d63a001202b4932f6b14b05d8d055a0a45b6 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 29 Nov 2016 17:42:01 -0800
Subject: Input: synaptics-rmi4 - remove unused fields in struct
 rmi_driver_data

These fields are not used anywhere, there is no point in carrying them.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/rmi.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index 27dd9aa4090c..0b118ab47b8d 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -340,7 +340,6 @@ struct rmi_driver_data {
 	struct rmi_function *f34_container;
 	bool f01_bootloader_mode;
 
-	u32 attn_count;
 	int num_of_irq_regs;
 	int irq_count;
 	void *irq_memory;
@@ -352,14 +351,11 @@ struct rmi_driver_data {
 	struct input_dev *input;
 
 	u8 pdt_props;
-	u8 bsr;
 
 	u8 num_rx_electrodes;
 	u8 num_tx_electrodes;
 
 	bool enabled;
-
-	void *data;
 };
 
 int rmi_register_transport_device(struct rmi_transport_dev *xport);
-- 
cgit v1.2.3


From a64ea311f1e4bc090c89960650637423e86c35c0 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 29 Nov 2016 17:42:13 -0800
Subject: Input: synaptics-rmi4 - add rmi_enable/disable_irq

Set the .enabled boolean and trigger an event processing when enabling
for edge-triggered systems.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_driver.c | 83 +++++++++++++++++++++++++++++++----------
 drivers/input/rmi4/rmi_driver.h |  2 +
 include/linux/rmi.h             |  1 +
 3 files changed, 67 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 2b17d8cb3d10..f04fc4152c1f 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -215,6 +215,7 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id)
 static int rmi_irq_init(struct rmi_device *rmi_dev)
 {
 	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
 	int irq_flags = irq_get_trigger_type(pdata->irq);
 	int ret;
 
@@ -232,6 +233,8 @@ static int rmi_irq_init(struct rmi_device *rmi_dev)
 		return ret;
 	}
 
+	data->enabled = true;
+
 	return 0;
 }
 
@@ -866,17 +869,54 @@ err_put_fn:
 	return error;
 }
 
-int rmi_driver_suspend(struct rmi_device *rmi_dev, bool enable_wake)
+void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake)
 {
 	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
 	int irq = pdata->irq;
-	int retval = 0;
+	int irq_flags;
+	int retval;
 
-	retval = rmi_suspend_functions(rmi_dev);
-	if (retval)
-		dev_warn(&rmi_dev->dev, "Failed to suspend functions: %d\n",
-			retval);
+	mutex_lock(&data->enabled_mutex);
+
+	if (data->enabled)
+		goto out;
+
+	enable_irq(irq);
+	data->enabled = true;
+	if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) {
+		retval = disable_irq_wake(irq);
+		if (!retval)
+			dev_warn(&rmi_dev->dev,
+				 "Failed to disable irq for wake: %d\n",
+				 retval);
+	}
+
+	/*
+	 * Call rmi_process_interrupt_requests() after enabling irq,
+	 * otherwise we may lose interrupt on edge-triggered systems.
+	 */
+	irq_flags = irq_get_trigger_type(pdata->irq);
+	if (irq_flags & IRQ_TYPE_EDGE_BOTH)
+		rmi_process_interrupt_requests(rmi_dev);
+
+out:
+	mutex_unlock(&data->enabled_mutex);
+}
+
+void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake)
+{
+	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	int irq = pdata->irq;
+	int retval;
+
+	mutex_lock(&data->enabled_mutex);
+
+	if (!data->enabled)
+		goto out;
 
+	data->enabled = false;
 	disable_irq(irq);
 	if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) {
 		retval = enable_irq_wake(irq);
@@ -885,24 +925,30 @@ int rmi_driver_suspend(struct rmi_device *rmi_dev, bool enable_wake)
 				 "Failed to enable irq for wake: %d\n",
 				 retval);
 	}
+
+out:
+	mutex_unlock(&data->enabled_mutex);
+}
+
+int rmi_driver_suspend(struct rmi_device *rmi_dev, bool enable_wake)
+{
+	int retval;
+
+	retval = rmi_suspend_functions(rmi_dev);
+	if (retval)
+		dev_warn(&rmi_dev->dev, "Failed to suspend functions: %d\n",
+			retval);
+
+	rmi_disable_irq(rmi_dev, enable_wake);
 	return retval;
 }
 EXPORT_SYMBOL_GPL(rmi_driver_suspend);
 
 int rmi_driver_resume(struct rmi_device *rmi_dev, bool clear_wake)
 {
-	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
-	int irq = pdata->irq;
 	int retval;
 
-	enable_irq(irq);
-	if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) {
-		retval = disable_irq_wake(irq);
-		if (!retval)
-			dev_warn(&rmi_dev->dev,
-				 "Failed to disable irq for wake: %d\n",
-				 retval);
-	}
+	rmi_enable_irq(rmi_dev, clear_wake);
 
 	retval = rmi_resume_functions(rmi_dev);
 	if (retval)
@@ -916,10 +962,8 @@ EXPORT_SYMBOL_GPL(rmi_driver_resume);
 static int rmi_driver_remove(struct device *dev)
 {
 	struct rmi_device *rmi_dev = to_rmi_device(dev);
-	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
-	int irq = pdata->irq;
 
-	disable_irq(irq);
+	rmi_disable_irq(rmi_dev, false);
 
 	rmi_f34_remove_sysfs(rmi_dev);
 	rmi_free_function_list(rmi_dev);
@@ -1108,6 +1152,7 @@ static int rmi_driver_probe(struct device *dev)
 	}
 
 	mutex_init(&data->irq_mutex);
+	mutex_init(&data->enabled_mutex);
 
 	retval = rmi_probe_interrupts(data);
 	if (retval)
diff --git a/drivers/input/rmi4/rmi_driver.h b/drivers/input/rmi4/rmi_driver.h
index 5b201f369505..c9fe3d3deef3 100644
--- a/drivers/input/rmi4/rmi_driver.h
+++ b/drivers/input/rmi4/rmi_driver.h
@@ -101,6 +101,8 @@ int rmi_scan_pdt(struct rmi_device *rmi_dev, void *ctx,
 		 int (*callback)(struct rmi_device *rmi_dev, void *ctx,
 		 const struct pdt_entry *entry));
 int rmi_probe_interrupts(struct rmi_driver_data *data);
+void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake);
+void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake);
 int rmi_init_functions(struct rmi_driver_data *data);
 int rmi_initial_reset(struct rmi_device *rmi_dev, void *ctx,
 		      const struct pdt_entry *pdt);
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index 0b118ab47b8d..621f098f1243 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -356,6 +356,7 @@ struct rmi_driver_data {
 	u8 num_tx_electrodes;
 
 	bool enabled;
+	struct mutex enabled_mutex;
 };
 
 int rmi_register_transport_device(struct rmi_transport_dev *xport);
-- 
cgit v1.2.3


From b634d30a79ecc2d28e61cbe5b1f4443952f37a8f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 30 Nov 2016 10:16:08 -0800
Subject: cgroup, bpf: remove unnecessary #include

this #include is unnecessary and brings whole set of
other headers into cgroup-defs.h. Remove it.

Fixes: 3007098494be ("cgroup: add support for eBPF programs")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Rami Rosen <roszenrami@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index ec80d0c0953e..0cf1adfadd2d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -1,7 +1,6 @@
 #ifndef _BPF_CGROUP_H
 #define _BPF_CGROUP_H
 
-#include <linux/bpf.h>
 #include <linux/jump_label.h>
 #include <uapi/linux/bpf.h>
 
-- 
cgit v1.2.3


From 6d937acfb3f166f6e10abd978fafafa120d6f0d7 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 29 Nov 2016 16:47:01 +0200
Subject: qed: Optimize qed_chain datapath usage

The chain structure and functions are widely used by the qed* modules,
both for configuration and datapath.
E.g., qede's Tx has one such chain and its Rx has two.

Currently, the strucutre's fields which are required for datapath
related functions [produce/consume] are intertwined with fields which
are required only for configuration purposes [init/destroy/etc.].

This patch re-arranges the chain structure so that all the fields which
are required for datapath usage could reside in a single cacheline instead
of the two which are required today.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c         |   7 +-
 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c |   4 +-
 include/linux/qed/qed_chain.h                     | 144 ++++++++++++----------
 3 files changed, 86 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 5be7b8a25425..80162ee0391f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2283,12 +2283,12 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
 {
 	void **pp_virt_addr_tbl = p_chain->pbl.pp_virt_addr_tbl;
 	u32 page_cnt = p_chain->page_cnt, i, pbl_size;
-	u8 *p_pbl_virt = p_chain->pbl.p_virt_table;
+	u8 *p_pbl_virt = p_chain->pbl_sp.p_virt_table;
 
 	if (!pp_virt_addr_tbl)
 		return;
 
-	if (!p_chain->pbl.p_virt_table)
+	if (!p_pbl_virt)
 		goto out;
 
 	for (i = 0; i < page_cnt; i++) {
@@ -2306,7 +2306,8 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
 	pbl_size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
 	dma_free_coherent(&cdev->pdev->dev,
 			  pbl_size,
-			  p_chain->pbl.p_virt_table, p_chain->pbl.p_phys_table);
+			  p_chain->pbl_sp.p_virt_table,
+			  p_chain->pbl_sp.p_phys_table);
 out:
 	vfree(p_chain->pbl.pp_virt_addr_tbl);
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 2888eb0628f8..d0a58282f2a8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -347,11 +347,11 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 
 	/* Place EQ address in RAMROD */
 	DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
-		       p_hwfn->p_eq->chain.pbl.p_phys_table);
+		       p_hwfn->p_eq->chain.pbl_sp.p_phys_table);
 	page_cnt = (u8)qed_chain_get_page_cnt(&p_hwfn->p_eq->chain);
 	p_ramrod->event_ring_num_pages = page_cnt;
 	DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
-		       p_hwfn->p_consq->chain.pbl.p_phys_table);
+		       p_hwfn->p_consq->chain.pbl_sp.p_phys_table);
 
 	qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
 
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index 72d88cf3ca25..37dfba101c6c 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -56,23 +56,6 @@ struct qed_chain_pbl_u32 {
 	u32 cons_page_idx;
 };
 
-struct qed_chain_pbl {
-	/* Base address of a pre-allocated buffer for pbl */
-	dma_addr_t	p_phys_table;
-	void		*p_virt_table;
-
-	/* Table for keeping the virtual addresses of the chain pages,
-	 * respectively to the physical addresses in the pbl table.
-	 */
-	void **pp_virt_addr_tbl;
-
-	/* Index to current used page by producer/consumer */
-	union {
-		struct qed_chain_pbl_u16 pbl16;
-		struct qed_chain_pbl_u32 pbl32;
-	} u;
-};
-
 struct qed_chain_u16 {
 	/* Cyclic index of next element to produce/consme */
 	u16 prod_idx;
@@ -86,46 +69,78 @@ struct qed_chain_u32 {
 };
 
 struct qed_chain {
-	void			*p_virt_addr;
-	dma_addr_t		p_phys_addr;
-	void			*p_prod_elem;
-	void			*p_cons_elem;
+	/* fastpath portion of the chain - required for commands such
+	 * as produce / consume.
+	 */
+	/* Point to next element to produce/consume */
+	void *p_prod_elem;
+	void *p_cons_elem;
+
+	/* Fastpath portions of the PBL [if exists] */
+	struct {
+		/* Table for keeping the virtual addresses of the chain pages,
+		 * respectively to the physical addresses in the pbl table.
+		 */
+		void **pp_virt_addr_tbl;
 
-	enum qed_chain_mode	mode;
-	enum qed_chain_use_mode intended_use; /* used to produce/consume */
-	enum qed_chain_cnt_type cnt_type;
+		union {
+			struct qed_chain_pbl_u16 u16;
+			struct qed_chain_pbl_u32 u32;
+		} c;
+	} pbl;
 
 	union {
 		struct qed_chain_u16 chain16;
 		struct qed_chain_u32 chain32;
 	} u;
 
+	/* Capacity counts only usable elements */
+	u32 capacity;
 	u32 page_cnt;
 
-	/* Number of elements - capacity is for usable elements only,
-	 * while size will contain total number of elements [for entire chain].
+	enum qed_chain_mode mode;
+
+	/* Elements information for fast calculations */
+	u16 elem_per_page;
+	u16 elem_per_page_mask;
+	u16 elem_size;
+	u16 next_page_mask;
+	u16 usable_per_page;
+	u8 elem_unusable;
+
+	u8 cnt_type;
+
+	/* Slowpath of the chain - required for initialization and destruction,
+	 * but isn't involved in regular functionality.
 	 */
-	u32 capacity;
+
+	/* Base address of a pre-allocated buffer for pbl */
+	struct {
+		dma_addr_t p_phys_table;
+		void *p_virt_table;
+	} pbl_sp;
+
+	/* Address of first page of the chain - the address is required
+	 * for fastpath operation [consume/produce] but only for the the SINGLE
+	 * flavour which isn't considered fastpath [== SPQ].
+	 */
+	void *p_virt_addr;
+	dma_addr_t p_phys_addr;
+
+	/* Total number of elements [for entire chain] */
 	u32 size;
 
-	/* Elements information for fast calculations */
-	u16			elem_per_page;
-	u16			elem_per_page_mask;
-	u16			elem_unusable;
-	u16			usable_per_page;
-	u16			elem_size;
-	u16			next_page_mask;
-	struct qed_chain_pbl	pbl;
+	u8 intended_use;
 };
 
 #define QED_CHAIN_PBL_ENTRY_SIZE        (8)
 #define QED_CHAIN_PAGE_SIZE             (0x1000)
 #define ELEMS_PER_PAGE(elem_size)       (QED_CHAIN_PAGE_SIZE / (elem_size))
 
-#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)     \
-	((mode == QED_CHAIN_MODE_NEXT_PTR) ?	     \
-	 (1 + ((sizeof(struct qed_chain_next) - 1) / \
-	       (elem_size))) : 0)
+#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)	 \
+	(((mode) == QED_CHAIN_MODE_NEXT_PTR) ?		 \
+	 (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / \
+		   (elem_size))) : 0)
 
 #define USABLE_ELEMS_PER_PAGE(elem_size, mode) \
 	((u32)(ELEMS_PER_PAGE(elem_size) -     \
@@ -186,7 +201,7 @@ static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain)
 	return p_chain->usable_per_page;
 }
 
-static inline u16 qed_chain_get_unusable_per_page(struct qed_chain *p_chain)
+static inline u8 qed_chain_get_unusable_per_page(struct qed_chain *p_chain)
 {
 	return p_chain->elem_unusable;
 }
@@ -198,7 +213,7 @@ static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain)
 
 static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain)
 {
-	return p_chain->pbl.p_phys_table;
+	return p_chain->pbl_sp.p_phys_table;
 }
 
 /**
@@ -214,10 +229,10 @@ static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain)
 static inline void
 qed_chain_advance_page(struct qed_chain *p_chain,
 		       void **p_next_elem, void *idx_to_inc, void *page_to_inc)
-
 {
 	struct qed_chain_next *p_next = NULL;
 	u32 page_index = 0;
+
 	switch (p_chain->mode) {
 	case QED_CHAIN_MODE_NEXT_PTR:
 		p_next = *p_next_elem;
@@ -305,7 +320,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
 		if ((p_chain->u.chain16.prod_idx &
 		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
 			p_prod_idx = &p_chain->u.chain16.prod_idx;
-			p_prod_page_idx = &p_chain->pbl.u.pbl16.prod_page_idx;
+			p_prod_page_idx = &p_chain->pbl.c.u16.prod_page_idx;
 			qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
 					       p_prod_idx, p_prod_page_idx);
 		}
@@ -314,7 +329,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
 		if ((p_chain->u.chain32.prod_idx &
 		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
 			p_prod_idx = &p_chain->u.chain32.prod_idx;
-			p_prod_page_idx = &p_chain->pbl.u.pbl32.prod_page_idx;
+			p_prod_page_idx = &p_chain->pbl.c.u32.prod_page_idx;
 			qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
 					       p_prod_idx, p_prod_page_idx);
 		}
@@ -378,7 +393,7 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
 		if ((p_chain->u.chain16.cons_idx &
 		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
 			p_cons_idx = &p_chain->u.chain16.cons_idx;
-			p_cons_page_idx = &p_chain->pbl.u.pbl16.cons_page_idx;
+			p_cons_page_idx = &p_chain->pbl.c.u16.cons_page_idx;
 			qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
 					       p_cons_idx, p_cons_page_idx);
 		}
@@ -387,8 +402,8 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
 		if ((p_chain->u.chain32.cons_idx &
 		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
 			p_cons_idx = &p_chain->u.chain32.cons_idx;
-			p_cons_page_idx = &p_chain->pbl.u.pbl32.cons_page_idx;
-		qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
+			p_cons_page_idx = &p_chain->pbl.c.u32.cons_page_idx;
+			qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
 					       p_cons_idx, p_cons_page_idx);
 		}
 		p_chain->u.chain32.cons_idx++;
@@ -429,25 +444,26 @@ static inline void qed_chain_reset(struct qed_chain *p_chain)
 		u32 reset_val = p_chain->page_cnt - 1;
 
 		if (is_chain_u16(p_chain)) {
-			p_chain->pbl.u.pbl16.prod_page_idx = (u16)reset_val;
-			p_chain->pbl.u.pbl16.cons_page_idx = (u16)reset_val;
+			p_chain->pbl.c.u16.prod_page_idx = (u16)reset_val;
+			p_chain->pbl.c.u16.cons_page_idx = (u16)reset_val;
 		} else {
-			p_chain->pbl.u.pbl32.prod_page_idx = reset_val;
-			p_chain->pbl.u.pbl32.cons_page_idx = reset_val;
+			p_chain->pbl.c.u32.prod_page_idx = reset_val;
+			p_chain->pbl.c.u32.cons_page_idx = reset_val;
 		}
 	}
 
 	switch (p_chain->intended_use) {
-	case QED_CHAIN_USE_TO_CONSUME_PRODUCE:
-	case QED_CHAIN_USE_TO_PRODUCE:
-		/* Do nothing */
-		break;
-
 	case QED_CHAIN_USE_TO_CONSUME:
 		/* produce empty elements */
 		for (i = 0; i < p_chain->capacity; i++)
 			qed_chain_recycle_consumed(p_chain);
 		break;
+
+	case QED_CHAIN_USE_TO_CONSUME_PRODUCE:
+	case QED_CHAIN_USE_TO_PRODUCE:
+	default:
+		/* Do nothing */
+		break;
 	}
 }
 
@@ -473,13 +489,13 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain,
 	p_chain->p_virt_addr = NULL;
 	p_chain->p_phys_addr = 0;
 	p_chain->elem_size	= elem_size;
-	p_chain->intended_use = intended_use;
+	p_chain->intended_use = (u8)intended_use;
 	p_chain->mode		= mode;
-	p_chain->cnt_type = cnt_type;
+	p_chain->cnt_type = (u8)cnt_type;
 
-	p_chain->elem_per_page		= ELEMS_PER_PAGE(elem_size);
+	p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size);
 	p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode);
-	p_chain->elem_per_page_mask	= p_chain->elem_per_page - 1;
+	p_chain->elem_per_page_mask = p_chain->elem_per_page - 1;
 	p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode);
 	p_chain->next_page_mask = (p_chain->usable_per_page &
 				   p_chain->elem_per_page_mask);
@@ -488,8 +504,8 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain,
 	p_chain->capacity = p_chain->usable_per_page * page_cnt;
 	p_chain->size = p_chain->elem_per_page * page_cnt;
 
-	p_chain->pbl.p_phys_table = 0;
-	p_chain->pbl.p_virt_table = NULL;
+	p_chain->pbl_sp.p_phys_table = 0;
+	p_chain->pbl_sp.p_virt_table = NULL;
 	p_chain->pbl.pp_virt_addr_tbl = NULL;
 }
 
@@ -530,8 +546,8 @@ static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain,
 					  dma_addr_t p_phys_pbl,
 					  void **pp_virt_addr_tbl)
 {
-	p_chain->pbl.p_phys_table = p_phys_pbl;
-	p_chain->pbl.p_virt_table = p_virt_pbl;
+	p_chain->pbl_sp.p_phys_table = p_phys_pbl;
+	p_chain->pbl_sp.p_virt_table = p_virt_pbl;
 	p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl;
 }
 
-- 
cgit v1.2.3


From 3da7a37ae6886cfba9ef35428eb976fc2ef561fa Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 29 Nov 2016 16:47:06 +0200
Subject: qed*: Handle-based L2-queues.

The driver needs to maintain several FW/HW-indices for each one of
its queues. Currently, that mapping is done by the QED where it uses
an rx/tx array of so-called hw-cids, populating them whenever a new
queue is opened and clearing them upon destruction of said queues.

This maintenance is far from ideal - there's no real reason why
QED needs to maintain such a data-structure. It becomes even worse
when considering the fact that the PF's queues and its child VFs' queues
are all mapped into the same data-structure.
As a by-product, the set of parameters an interface needs to supply for
queue APIs is non-trivial, and some of the variables in the API
structures have different meaning depending on their exact place
in the configuration flow.

This patch re-organizes the way L2 queues are configured and maintained.
In short:
  - Required parameters for queue init are now well-defined.
  - Qed would allocate a queue-cid based on parameters.
    Upon initialization success, it would return a handle to caller.
  - Queue-handle would be maintained by entity requesting queue-init,
    not necessarily qed.
  - All further queue-APIs [update, destroy] would use the opaque
    handle as reference for the queue instead of various indices.

The possible owners of such handles:
  - PF queues [qede] - complete handles based on provided configuration.
  - VF queues [qede] - fw-context-less handles, containing only relative
    information; Only the PF-side would need the absolute indices
    for configuration, so they're omitted here.
  - VF queues [qed, PF-side] - complete handles based on VF initialization.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h        |  12 -
 drivers/net/ethernet/qlogic/qed/qed_dev.c    |  26 --
 drivers/net/ethernet/qlogic/qed/qed_l2.c     | 595 +++++++++++++++------------
 drivers/net/ethernet/qlogic/qed/qed_l2.h     | 133 ++++--
 drivers/net/ethernet/qlogic/qed/qed_sriov.c  | 275 +++++++++----
 drivers/net/ethernet/qlogic/qed/qed_sriov.h  |  21 +-
 drivers/net/ethernet/qlogic/qed/qed_vf.c     |  90 ++--
 drivers/net/ethernet/qlogic/qed/qed_vf.h     |  40 +-
 drivers/net/ethernet/qlogic/qede/qede.h      |   4 +
 drivers/net/ethernet/qlogic/qede/qede_main.c | 109 ++---
 include/linux/qed/qed_eth_if.h               |  56 +--
 11 files changed, 791 insertions(+), 570 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 50b8a01ff512..244dd40ccac3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -241,15 +241,6 @@ struct qed_hw_info {
 	enum qed_wol_support b_wol_support;
 };
 
-struct qed_hw_cid_data {
-	u32	cid;
-	bool	b_cid_allocated;
-
-	/* Additional identifiers */
-	u16	opaque_fid;
-	u8	vport_id;
-};
-
 /* maximun size of read/write commands (HW limit) */
 #define DMAE_MAX_RW_SIZE        0x2000
 
@@ -416,9 +407,6 @@ struct qed_hwfn {
 
 	struct qed_dcbx_info		*p_dcbx_info;
 
-	struct qed_hw_cid_data		*p_tx_cids;
-	struct qed_hw_cid_data		*p_rx_cids;
-
 	struct qed_dmae_info		dmae_info;
 
 	/* QM init */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 80162ee0391f..00b9a67ba359 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -134,15 +134,6 @@ void qed_resc_free(struct qed_dev *cdev)
 
 	kfree(cdev->reset_stats);
 
-	for_each_hwfn(cdev, i) {
-		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
-
-		kfree(p_hwfn->p_tx_cids);
-		p_hwfn->p_tx_cids = NULL;
-		kfree(p_hwfn->p_rx_cids);
-		p_hwfn->p_rx_cids = NULL;
-	}
-
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -425,23 +416,6 @@ int qed_resc_alloc(struct qed_dev *cdev)
 	if (!cdev->fw_data)
 		return -ENOMEM;
 
-	/* Allocate Memory for the Queue->CID mapping */
-	for_each_hwfn(cdev, i) {
-		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
-		int tx_size = sizeof(struct qed_hw_cid_data) *
-				     RESC_NUM(p_hwfn, QED_L2_QUEUE);
-		int rx_size = sizeof(struct qed_hw_cid_data) *
-				     RESC_NUM(p_hwfn, QED_L2_QUEUE);
-
-		p_hwfn->p_tx_cids = kzalloc(tx_size, GFP_KERNEL);
-		if (!p_hwfn->p_tx_cids)
-			goto alloc_no_mem;
-
-		p_hwfn->p_rx_cids = kzalloc(rx_size, GFP_KERNEL);
-		if (!p_hwfn->p_rx_cids)
-			goto alloc_no_mem;
-	}
-
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 		u32 n_eqes, num_cons;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 900b253be317..6a3727c4c0c6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
+#include <linux/vmalloc.h>
 #include "qed.h"
 #include <linux/qed/qed_chain.h>
 #include "qed_cxt.h"
@@ -41,6 +42,124 @@
 #define QED_MAX_SGES_NUM 16
 #define CRC32_POLY 0x1edc6f41
 
+void qed_eth_queue_cid_release(struct qed_hwfn *p_hwfn,
+			       struct qed_queue_cid *p_cid)
+{
+	/* VFs' CIDs are 0-based in PF-view, and uninitialized on VF */
+	if (!p_cid->is_vf && IS_PF(p_hwfn->cdev))
+		qed_cxt_release_cid(p_hwfn, p_cid->cid);
+	vfree(p_cid);
+}
+
+/* The internal is only meant to be directly called by PFs initializeing CIDs
+ * for their VFs.
+ */
+struct qed_queue_cid *
+_qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
+		      u16 opaque_fid,
+		      u32 cid,
+		      u8 vf_qid,
+		      struct qed_queue_start_common_params *p_params)
+{
+	bool b_is_same = (p_hwfn->hw_info.opaque_fid == opaque_fid);
+	struct qed_queue_cid *p_cid;
+	int rc;
+
+	p_cid = vmalloc(sizeof(*p_cid));
+	if (!p_cid)
+		return NULL;
+	memset(p_cid, 0, sizeof(*p_cid));
+
+	p_cid->opaque_fid = opaque_fid;
+	p_cid->cid = cid;
+	p_cid->vf_qid = vf_qid;
+	p_cid->rel = *p_params;
+
+	/* Don't try calculating the absolute indices for VFs */
+	if (IS_VF(p_hwfn->cdev)) {
+		p_cid->abs = p_cid->rel;
+		goto out;
+	}
+
+	/* Calculate the engine-absolute indices of the resources.
+	 * This would guarantee they're valid later on.
+	 * In some cases [SBs] we already have the right values.
+	 */
+	rc = qed_fw_vport(p_hwfn, p_cid->rel.vport_id, &p_cid->abs.vport_id);
+	if (rc)
+		goto fail;
+
+	rc = qed_fw_l2_queue(p_hwfn, p_cid->rel.queue_id, &p_cid->abs.queue_id);
+	if (rc)
+		goto fail;
+
+	/* In case of a PF configuring its VF's queues, the stats-id is already
+	 * absolute [since there's a single index that's suitable per-VF].
+	 */
+	if (b_is_same) {
+		rc = qed_fw_vport(p_hwfn, p_cid->rel.stats_id,
+				  &p_cid->abs.stats_id);
+		if (rc)
+			goto fail;
+	} else {
+		p_cid->abs.stats_id = p_cid->rel.stats_id;
+	}
+
+	/* SBs relevant information was already provided as absolute */
+	p_cid->abs.sb = p_cid->rel.sb;
+	p_cid->abs.sb_idx = p_cid->rel.sb_idx;
+
+	/* This is tricky - we're actually interested in whehter this is a PF
+	 * entry meant for the VF.
+	 */
+	if (!b_is_same)
+		p_cid->is_vf = true;
+out:
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_SP,
+		   "opaque_fid: %04x CID %08x vport %02x [%02x] qzone %04x [%04x] stats %02x [%02x] SB %04x PI %02x\n",
+		   p_cid->opaque_fid,
+		   p_cid->cid,
+		   p_cid->rel.vport_id,
+		   p_cid->abs.vport_id,
+		   p_cid->rel.queue_id,
+		   p_cid->abs.queue_id,
+		   p_cid->rel.stats_id,
+		   p_cid->abs.stats_id, p_cid->abs.sb, p_cid->abs.sb_idx);
+
+	return p_cid;
+
+fail:
+	vfree(p_cid);
+	return NULL;
+}
+
+static struct qed_queue_cid *qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
+						  u16 opaque_fid, struct
+						  qed_queue_start_common_params
+						  *p_params)
+{
+	struct qed_queue_cid *p_cid;
+	u32 cid = 0;
+
+	/* Get a unique firmware CID for this queue, in case it's a PF.
+	 * VF's don't need a CID as the queue configuration will be done
+	 * by PF.
+	 */
+	if (IS_PF(p_hwfn->cdev)) {
+		if (qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &cid)) {
+			DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
+			return NULL;
+		}
+	}
+
+	p_cid = _qed_eth_queue_to_cid(p_hwfn, opaque_fid, cid, 0, p_params);
+	if (!p_cid && IS_PF(p_hwfn->cdev))
+		qed_cxt_release_cid(p_hwfn, cid);
+
+	return p_cid;
+}
+
 int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 			   struct qed_sp_vport_start_params *p_params)
 {
@@ -496,61 +615,26 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev,
 	return 0;
 }
 
-static int qed_sp_release_queue_cid(
-	struct qed_hwfn *p_hwfn,
-	struct qed_hw_cid_data *p_cid_data)
-{
-	if (!p_cid_data->b_cid_allocated)
-		return 0;
-
-	qed_cxt_release_cid(p_hwfn, p_cid_data->cid);
-
-	p_cid_data->b_cid_allocated = false;
-
-	return 0;
-}
-
-int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
-				u16 opaque_fid,
-				u32 cid,
-				struct qed_queue_start_common_params *p_params,
-				u8 stats_id,
-				u16 bd_max_bytes,
-				dma_addr_t bd_chain_phys_addr,
-				dma_addr_t cqe_pbl_addr,
-				u16 cqe_pbl_size, bool b_use_zone_a_prod)
+int qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
+			     struct qed_queue_cid *p_cid,
+			     u16 bd_max_bytes,
+			     dma_addr_t bd_chain_phys_addr,
+			     dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size)
 {
 	struct rx_queue_start_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	struct qed_hw_cid_data *p_rx_cid;
-	u16 abs_rx_q_id = 0;
-	u8 abs_vport_id = 0;
 	int rc = -EINVAL;
 
-	/* Store information for the stop */
-	p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
-	p_rx_cid->cid = cid;
-	p_rx_cid->opaque_fid = opaque_fid;
-	p_rx_cid->vport_id = p_params->vport_id;
-
-	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-	if (rc)
-		return rc;
-
-	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_rx_q_id);
-	if (rc)
-		return rc;
-
 	DP_VERBOSE(p_hwfn, QED_MSG_SP,
-		   "opaque_fid=0x%x, cid=0x%x, rx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
-		   opaque_fid,
-		   cid, p_params->queue_id, p_params->vport_id, p_params->sb);
+		   "opaque_fid=0x%x, cid=0x%x, rx_qzone=0x%x, vport_id=0x%x, sb_id=0x%x\n",
+		   p_cid->opaque_fid, p_cid->cid,
+		   p_cid->abs.queue_id, p_cid->abs.vport_id, p_cid->abs.sb);
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
-	init_data.cid = cid;
-	init_data.opaque_fid = opaque_fid;
+	init_data.cid = p_cid->cid;
+	init_data.opaque_fid = p_cid->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -561,11 +645,11 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 
 	p_ramrod = &p_ent->ramrod.rx_queue_start;
 
-	p_ramrod->sb_id = cpu_to_le16(p_params->sb);
-	p_ramrod->sb_index = p_params->sb_idx;
-	p_ramrod->vport_id = abs_vport_id;
-	p_ramrod->stats_counter_id = stats_id;
-	p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+	p_ramrod->sb_id = cpu_to_le16(p_cid->abs.sb);
+	p_ramrod->sb_index = p_cid->abs.sb_idx;
+	p_ramrod->vport_id = p_cid->abs.vport_id;
+	p_ramrod->stats_counter_id = p_cid->abs.stats_id;
+	p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
 	p_ramrod->complete_cqe_flg = 0;
 	p_ramrod->complete_event_flg = 1;
 
@@ -575,85 +659,85 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 	p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
-	if (p_params->vf_qid || b_use_zone_a_prod) {
-		p_ramrod->vf_rx_prod_index = p_params->vf_qid;
+	if (p_cid->is_vf) {
+		p_ramrod->vf_rx_prod_index = p_cid->vf_qid;
 		DP_VERBOSE(p_hwfn, QED_MSG_SP,
 			   "Queue%s is meant for VF rxq[%02x]\n",
-			   b_use_zone_a_prod ? " [legacy]" : "",
-			   p_params->vf_qid);
-		p_ramrod->vf_rx_prod_use_zone_a = b_use_zone_a_prod;
+			   !!p_cid->b_legacy_vf ? " [legacy]" : "",
+			   p_cid->vf_qid);
+		p_ramrod->vf_rx_prod_use_zone_a = !!p_cid->b_legacy_vf;
 	}
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
 static int
-qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
-			  u16 opaque_fid,
-			  struct qed_queue_start_common_params *p_params,
+qed_eth_pf_rx_queue_start(struct qed_hwfn *p_hwfn,
+			  struct qed_queue_cid *p_cid,
 			  u16 bd_max_bytes,
 			  dma_addr_t bd_chain_phys_addr,
 			  dma_addr_t cqe_pbl_addr,
 			  u16 cqe_pbl_size, void __iomem **pp_prod)
 {
-	struct qed_hw_cid_data *p_rx_cid;
 	u32 init_prod_val = 0;
-	u16 abs_l2_queue = 0;
-	u8 abs_stats_id = 0;
-	int rc;
 
-	if (IS_VF(p_hwfn->cdev)) {
-		return qed_vf_pf_rxq_start(p_hwfn,
-					   p_params->queue_id,
-					   p_params->sb,
-					   (u8)p_params->sb_idx,
-					   bd_max_bytes,
-					   bd_chain_phys_addr,
-					   cqe_pbl_addr, cqe_pbl_size, pp_prod);
-	}
-
-	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_l2_queue);
-	if (rc)
-		return rc;
-
-	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
-	if (rc)
-		return rc;
-
-	*pp_prod = (u8 __iomem *)p_hwfn->regview +
-				 GTT_BAR0_MAP_REG_MSDM_RAM +
-				 MSTORM_ETH_PF_PRODS_OFFSET(abs_l2_queue);
+	*pp_prod = p_hwfn->regview +
+		   GTT_BAR0_MAP_REG_MSDM_RAM +
+		    MSTORM_ETH_PF_PRODS_OFFSET(p_cid->abs.queue_id);
 
 	/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
 	__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
 			  (u32 *)(&init_prod_val));
 
+	return qed_eth_rxq_start_ramrod(p_hwfn, p_cid,
+					bd_max_bytes,
+					bd_chain_phys_addr,
+					cqe_pbl_addr, cqe_pbl_size);
+}
+
+static int
+qed_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
+		       u16 opaque_fid,
+		       struct qed_queue_start_common_params *p_params,
+		       u16 bd_max_bytes,
+		       dma_addr_t bd_chain_phys_addr,
+		       dma_addr_t cqe_pbl_addr,
+		       u16 cqe_pbl_size,
+		       struct qed_rxq_start_ret_params *p_ret_params)
+{
+	struct qed_queue_cid *p_cid;
+	int rc;
+
 	/* Allocate a CID for the queue */
-	p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
-	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_rx_cid->cid);
-	if (rc) {
-		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
-		return rc;
-	}
-	p_rx_cid->b_cid_allocated = true;
+	p_cid = qed_eth_queue_to_cid(p_hwfn, opaque_fid, p_params);
+	if (!p_cid)
+		return -ENOMEM;
 
-	rc = qed_sp_eth_rxq_start_ramrod(p_hwfn,
-					 opaque_fid,
-					 p_rx_cid->cid,
-					 p_params,
-					 abs_stats_id,
+	if (IS_PF(p_hwfn->cdev)) {
+		rc = qed_eth_pf_rx_queue_start(p_hwfn, p_cid,
+					       bd_max_bytes,
+					       bd_chain_phys_addr,
+					       cqe_pbl_addr, cqe_pbl_size,
+					       &p_ret_params->p_prod);
+	} else {
+		rc = qed_vf_pf_rxq_start(p_hwfn, p_cid,
 					 bd_max_bytes,
 					 bd_chain_phys_addr,
-					 cqe_pbl_addr, cqe_pbl_size, false);
+					 cqe_pbl_addr,
+					 cqe_pbl_size, &p_ret_params->p_prod);
+	}
 
+	/* Provide the caller with a reference to as handler */
 	if (rc)
-		qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
+		qed_eth_queue_cid_release(p_hwfn, p_cid);
+	else
+		p_ret_params->p_handle = (void *)p_cid;
 
 	return rc;
 }
 
 int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
-				u16 rx_queue_id,
+				void **pp_rxq_handles,
 				u8 num_rxqs,
 				u8 complete_cqe_flg,
 				u8 complete_event_flg,
@@ -663,8 +747,7 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 	struct rx_queue_update_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	struct qed_hw_cid_data *p_rx_cid;
-	u16 qid, abs_rx_q_id = 0;
+	struct qed_queue_cid *p_cid;
 	int rc = -EINVAL;
 	u8 i;
 
@@ -673,12 +756,11 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 	init_data.p_comp_data = p_comp_data;
 
 	for (i = 0; i < num_rxqs; i++) {
-		qid = rx_queue_id + i;
-		p_rx_cid = &p_hwfn->p_rx_cids[qid];
+		p_cid = ((struct qed_queue_cid **)pp_rxq_handles)[i];
 
 		/* Get SPQ entry */
-		init_data.cid = p_rx_cid->cid;
-		init_data.opaque_fid = p_rx_cid->opaque_fid;
+		init_data.cid = p_cid->cid;
+		init_data.opaque_fid = p_cid->opaque_fid;
 
 		rc = qed_sp_init_request(p_hwfn, &p_ent,
 					 ETH_RAMROD_RX_QUEUE_UPDATE,
@@ -687,10 +769,9 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 			return rc;
 
 		p_ramrod = &p_ent->ramrod.rx_queue_update;
+		p_ramrod->vport_id = p_cid->abs.vport_id;
 
-		qed_fw_vport(p_hwfn, p_rx_cid->vport_id, &p_ramrod->vport_id);
-		qed_fw_l2_queue(p_hwfn, qid, &abs_rx_q_id);
-		p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+		p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
 		p_ramrod->complete_cqe_flg = complete_cqe_flg;
 		p_ramrod->complete_event_flg = complete_event_flg;
 
@@ -702,24 +783,19 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
-int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
-			     u16 rx_queue_id,
-			     bool eq_completion_only, bool cqe_completion)
+static int
+qed_eth_pf_rx_queue_stop(struct qed_hwfn *p_hwfn,
+			 struct qed_queue_cid *p_cid,
+			 bool b_eq_completion_only, bool b_cqe_completion)
 {
-	struct qed_hw_cid_data *p_rx_cid = &p_hwfn->p_rx_cids[rx_queue_id];
 	struct rx_queue_stop_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	u16 abs_rx_q_id = 0;
-	int rc = -EINVAL;
-
-	if (IS_VF(p_hwfn->cdev))
-		return qed_vf_pf_rxq_stop(p_hwfn, rx_queue_id, cqe_completion);
+	int rc;
 
-	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
-	init_data.cid = p_rx_cid->cid;
-	init_data.opaque_fid = p_rx_cid->opaque_fid;
+	init_data.cid = p_cid->cid;
+	init_data.opaque_fid = p_cid->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -729,62 +805,53 @@ int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	p_ramrod = &p_ent->ramrod.rx_queue_stop;
-
-	qed_fw_vport(p_hwfn, p_rx_cid->vport_id, &p_ramrod->vport_id);
-	qed_fw_l2_queue(p_hwfn, rx_queue_id, &abs_rx_q_id);
-	p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+	p_ramrod->vport_id = p_cid->abs.vport_id;
+	p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
 
 	/* Cleaning the queue requires the completion to arrive there.
 	 * In addition, VFs require the answer to come as eqe to PF.
 	 */
-	p_ramrod->complete_cqe_flg =
-		(!!(p_rx_cid->opaque_fid == p_hwfn->hw_info.opaque_fid) &&
-		 !eq_completion_only) || cqe_completion;
-	p_ramrod->complete_event_flg =
-		!(p_rx_cid->opaque_fid == p_hwfn->hw_info.opaque_fid) ||
-		eq_completion_only;
+	p_ramrod->complete_cqe_flg = (!p_cid->is_vf &&
+				      !b_eq_completion_only) ||
+				     b_cqe_completion;
+	p_ramrod->complete_event_flg = p_cid->is_vf || b_eq_completion_only;
 
-	rc = qed_spq_post(p_hwfn, p_ent, NULL);
-	if (rc)
-		return rc;
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+int qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
+			  void *p_rxq,
+			  bool eq_completion_only, bool cqe_completion)
+{
+	struct qed_queue_cid *p_cid = (struct qed_queue_cid *)p_rxq;
+	int rc = -EINVAL;
 
-	return qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
+	if (IS_PF(p_hwfn->cdev))
+		rc = qed_eth_pf_rx_queue_stop(p_hwfn, p_cid,
+					      eq_completion_only,
+					      cqe_completion);
+	else
+		rc = qed_vf_pf_rxq_stop(p_hwfn, p_cid, cqe_completion);
+
+	if (!rc)
+		qed_eth_queue_cid_release(p_hwfn, p_cid);
+	return rc;
 }
 
-int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
-				u16  opaque_fid,
-				u32  cid,
-				struct qed_queue_start_common_params *p_params,
-				u8  stats_id,
-				dma_addr_t pbl_addr,
-				u16 pbl_size,
-				union qed_qm_pq_params *p_pq_params)
+int
+qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
+			 struct qed_queue_cid *p_cid,
+			 dma_addr_t pbl_addr, u16 pbl_size, u16 pq_id)
 {
 	struct tx_queue_start_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	struct qed_hw_cid_data *p_tx_cid;
-	u16 pq_id, abs_tx_q_id = 0;
 	int rc = -EINVAL;
-	u8 abs_vport_id;
-
-	/* Store information for the stop */
-	p_tx_cid = &p_hwfn->p_tx_cids[p_params->queue_id];
-	p_tx_cid->cid		= cid;
-	p_tx_cid->opaque_fid	= opaque_fid;
-
-	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-	if (rc)
-		return rc;
-
-	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_tx_q_id);
-	if (rc)
-		return rc;
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
-	init_data.cid = cid;
-	init_data.opaque_fid = opaque_fid;
+	init_data.cid = p_cid->cid;
+	init_data.opaque_fid = p_cid->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -794,96 +861,92 @@ int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
 		return rc;
 
 	p_ramrod = &p_ent->ramrod.tx_queue_start;
-	p_ramrod->vport_id = abs_vport_id;
+	p_ramrod->vport_id = p_cid->abs.vport_id;
 
-	p_ramrod->sb_id = cpu_to_le16(p_params->sb);
-	p_ramrod->sb_index = p_params->sb_idx;
-	p_ramrod->stats_counter_id = stats_id;
+	p_ramrod->sb_id = cpu_to_le16(p_cid->abs.sb);
+	p_ramrod->sb_index = p_cid->abs.sb_idx;
+	p_ramrod->stats_counter_id = p_cid->abs.stats_id;
 
-	p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id);
+	p_ramrod->queue_zone_id = cpu_to_le16(p_cid->abs.queue_id);
+	p_ramrod->same_as_last_id = cpu_to_le16(p_cid->abs.queue_id);
 
 	p_ramrod->pbl_size = cpu_to_le16(pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr);
 
-	pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, p_pq_params);
 	p_ramrod->qm_pq_id = cpu_to_le16(pq_id);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
 static int
-qed_sp_eth_tx_queue_start(struct qed_hwfn *p_hwfn,
-			  u16 opaque_fid,
-			  struct qed_queue_start_common_params *p_params,
+qed_eth_pf_tx_queue_start(struct qed_hwfn *p_hwfn,
+			  struct qed_queue_cid *p_cid,
+			  u8 tc,
 			  dma_addr_t pbl_addr,
 			  u16 pbl_size, void __iomem **pp_doorbell)
 {
-	struct qed_hw_cid_data *p_tx_cid;
 	union qed_qm_pq_params pq_params;
-	u8 abs_stats_id = 0;
 	int rc;
 
-	if (IS_VF(p_hwfn->cdev)) {
-		return qed_vf_pf_txq_start(p_hwfn,
-					   p_params->queue_id,
-					   p_params->sb,
-					   p_params->sb_idx,
-					   pbl_addr, pbl_size, pp_doorbell);
-	}
+	memset(&pq_params, 0, sizeof(pq_params));
 
-	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
+	rc = qed_eth_txq_start_ramrod(p_hwfn, p_cid,
+				      pbl_addr, pbl_size,
+				      qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH,
+						    &pq_params));
 	if (rc)
 		return rc;
 
-	p_tx_cid = &p_hwfn->p_tx_cids[p_params->queue_id];
-	memset(p_tx_cid, 0, sizeof(*p_tx_cid));
-	memset(&pq_params, 0, sizeof(pq_params));
+	/* Provide the caller with the necessary return values */
+	*pp_doorbell = p_hwfn->doorbells +
+		       qed_db_addr(p_cid->cid, DQ_DEMS_LEGACY);
 
-	/* Allocate a CID for the queue */
-	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_tx_cid->cid);
-	if (rc) {
-		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
-		return rc;
-	}
-	p_tx_cid->b_cid_allocated = true;
+	return 0;
+}
 
-	DP_VERBOSE(p_hwfn, QED_MSG_SP,
-		   "opaque_fid=0x%x, cid=0x%x, tx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
-		   opaque_fid, p_tx_cid->cid,
-		   p_params->queue_id, p_params->vport_id, p_params->sb);
-
-	rc = qed_sp_eth_txq_start_ramrod(p_hwfn,
-					 opaque_fid,
-					 p_tx_cid->cid,
-					 p_params,
-					 abs_stats_id,
-					 pbl_addr,
-					 pbl_size,
-					 &pq_params);
-
-	*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-				     qed_db_addr(p_tx_cid->cid, DQ_DEMS_LEGACY);
+static int
+qed_eth_tx_queue_start(struct qed_hwfn *p_hwfn,
+		       u16 opaque_fid,
+		       struct qed_queue_start_common_params *p_params,
+		       u8 tc,
+		       dma_addr_t pbl_addr,
+		       u16 pbl_size,
+		       struct qed_txq_start_ret_params *p_ret_params)
+{
+	struct qed_queue_cid *p_cid;
+	int rc;
+
+	p_cid = qed_eth_queue_to_cid(p_hwfn, opaque_fid, p_params);
+	if (!p_cid)
+		return -EINVAL;
+
+	if (IS_PF(p_hwfn->cdev))
+		rc = qed_eth_pf_tx_queue_start(p_hwfn, p_cid, tc,
+					       pbl_addr, pbl_size,
+					       &p_ret_params->p_doorbell);
+	else
+		rc = qed_vf_pf_txq_start(p_hwfn, p_cid,
+					 pbl_addr, pbl_size,
+					 &p_ret_params->p_doorbell);
 
 	if (rc)
-		qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
+		qed_eth_queue_cid_release(p_hwfn, p_cid);
+	else
+		p_ret_params->p_handle = (void *)p_cid;
 
 	return rc;
 }
 
-int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id)
+static int
+qed_eth_pf_tx_queue_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid)
 {
-	struct qed_hw_cid_data *p_tx_cid = &p_hwfn->p_tx_cids[tx_queue_id];
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EINVAL;
-
-	if (IS_VF(p_hwfn->cdev))
-		return qed_vf_pf_txq_stop(p_hwfn, tx_queue_id);
+	int rc;
 
-	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
-	init_data.cid = p_tx_cid->cid;
-	init_data.opaque_fid = p_tx_cid->opaque_fid;
+	init_data.cid = p_cid->cid;
+	init_data.opaque_fid = p_cid->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -892,11 +955,22 @@ int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id)
 	if (rc)
 		return rc;
 
-	rc = qed_spq_post(p_hwfn, p_ent, NULL);
-	if (rc)
-		return rc;
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+int qed_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, void *p_handle)
+{
+	struct qed_queue_cid *p_cid = (struct qed_queue_cid *)p_handle;
+	int rc;
+
+	if (IS_PF(p_hwfn->cdev))
+		rc = qed_eth_pf_tx_queue_stop(p_hwfn, p_cid);
+	else
+		rc = qed_vf_pf_txq_stop(p_hwfn, p_cid);
 
-	return qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
+	if (!rc)
+		qed_eth_queue_cid_release(p_hwfn, p_cid);
+	return rc;
 }
 
 static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
@@ -1880,58 +1954,53 @@ static int qed_update_vport(struct qed_dev *cdev,
 }
 
 static int qed_start_rxq(struct qed_dev *cdev,
-			 struct qed_queue_start_common_params *params,
+			 u8 rss_num,
+			 struct qed_queue_start_common_params *p_params,
 			 u16 bd_max_bytes,
 			 dma_addr_t bd_chain_phys_addr,
 			 dma_addr_t cqe_pbl_addr,
 			 u16 cqe_pbl_size,
-			 void __iomem **pp_prod)
+			 struct qed_rxq_start_ret_params *ret_params)
 {
 	struct qed_hwfn *p_hwfn;
 	int rc, hwfn_index;
 
-	hwfn_index = params->rss_id % cdev->num_hwfns;
+	hwfn_index = rss_num % cdev->num_hwfns;
 	p_hwfn = &cdev->hwfns[hwfn_index];
 
-	/* Fix queue ID in 100g mode */
-	params->queue_id /= cdev->num_hwfns;
-
-	rc = qed_sp_eth_rx_queue_start(p_hwfn,
-				       p_hwfn->hw_info.opaque_fid,
-				       params,
-				       bd_max_bytes,
-				       bd_chain_phys_addr,
-				       cqe_pbl_addr,
-				       cqe_pbl_size,
-				       pp_prod);
+	p_params->queue_id = p_params->queue_id / cdev->num_hwfns;
+	p_params->stats_id = p_params->vport_id;
 
+	rc = qed_eth_rx_queue_start(p_hwfn,
+				    p_hwfn->hw_info.opaque_fid,
+				    p_params,
+				    bd_max_bytes,
+				    bd_chain_phys_addr,
+				    cqe_pbl_addr, cqe_pbl_size, ret_params);
 	if (rc) {
-		DP_ERR(cdev, "Failed to start RXQ#%d\n", params->queue_id);
+		DP_ERR(cdev, "Failed to start RXQ#%d\n", p_params->queue_id);
 		return rc;
 	}
 
 	DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
-		   "Started RX-Q %d [rss %d] on V-PORT %d and SB %d\n",
-		   params->queue_id, params->rss_id, params->vport_id,
-		   params->sb);
+		   "Started RX-Q %d [rss_num %d] on V-PORT %d and SB %d\n",
+		   p_params->queue_id, rss_num, p_params->vport_id,
+		   p_params->sb);
 
 	return 0;
 }
 
-static int qed_stop_rxq(struct qed_dev *cdev,
-			struct qed_stop_rxq_params *params)
+static int qed_stop_rxq(struct qed_dev *cdev, u8 rss_id, void *handle)
 {
 	int rc, hwfn_index;
 	struct qed_hwfn *p_hwfn;
 
-	hwfn_index	= params->rss_id % cdev->num_hwfns;
-	p_hwfn		= &cdev->hwfns[hwfn_index];
+	hwfn_index = rss_id % cdev->num_hwfns;
+	p_hwfn = &cdev->hwfns[hwfn_index];
 
-	rc = qed_sp_eth_rx_queue_stop(p_hwfn,
-				      params->rx_queue_id / cdev->num_hwfns,
-				      params->eq_completion_only, false);
+	rc = qed_eth_rx_queue_stop(p_hwfn, handle, false, false);
 	if (rc) {
-		DP_ERR(cdev, "Failed to stop RXQ#%d\n", params->rx_queue_id);
+		DP_ERR(cdev, "Failed to stop RXQ#%02x\n", rss_id);
 		return rc;
 	}
 
@@ -1939,26 +2008,24 @@ static int qed_stop_rxq(struct qed_dev *cdev,
 }
 
 static int qed_start_txq(struct qed_dev *cdev,
+			 u8 rss_num,
 			 struct qed_queue_start_common_params *p_params,
 			 dma_addr_t pbl_addr,
 			 u16 pbl_size,
-			 void __iomem **pp_doorbell)
+			 struct qed_txq_start_ret_params *ret_params)
 {
 	struct qed_hwfn *p_hwfn;
 	int rc, hwfn_index;
 
-	hwfn_index	= p_params->rss_id % cdev->num_hwfns;
-	p_hwfn		= &cdev->hwfns[hwfn_index];
-
-	/* Fix queue ID in 100g mode */
-	p_params->queue_id /= cdev->num_hwfns;
+	hwfn_index = rss_num % cdev->num_hwfns;
+	p_hwfn = &cdev->hwfns[hwfn_index];
+	p_params->queue_id = p_params->queue_id / cdev->num_hwfns;
+	p_params->stats_id = p_params->vport_id;
 
-	rc = qed_sp_eth_tx_queue_start(p_hwfn,
-				       p_hwfn->hw_info.opaque_fid,
-				       p_params,
-				       pbl_addr,
-				       pbl_size,
-				       pp_doorbell);
+	rc = qed_eth_tx_queue_start(p_hwfn,
+				    p_hwfn->hw_info.opaque_fid,
+				    p_params, 0,
+				    pbl_addr, pbl_size, ret_params);
 
 	if (rc) {
 		DP_ERR(cdev, "Failed to start TXQ#%d\n", p_params->queue_id);
@@ -1966,8 +2033,8 @@ static int qed_start_txq(struct qed_dev *cdev,
 	}
 
 	DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
-		   "Started TX-Q %d [rss %d] on V-PORT %d and SB %d\n",
-		   p_params->queue_id, p_params->rss_id, p_params->vport_id,
+		   "Started TX-Q %d [rss_num %d] on V-PORT %d and SB %d\n",
+		   p_params->queue_id, rss_num, p_params->vport_id,
 		   p_params->sb);
 
 	return 0;
@@ -1981,19 +2048,17 @@ static int qed_fastpath_stop(struct qed_dev *cdev)
 	return 0;
 }
 
-static int qed_stop_txq(struct qed_dev *cdev,
-			struct qed_stop_txq_params *params)
+static int qed_stop_txq(struct qed_dev *cdev, u8 rss_id, void *handle)
 {
 	struct qed_hwfn *p_hwfn;
 	int rc, hwfn_index;
 
-	hwfn_index	= params->rss_id % cdev->num_hwfns;
-	p_hwfn		= &cdev->hwfns[hwfn_index];
+	hwfn_index = rss_id % cdev->num_hwfns;
+	p_hwfn = &cdev->hwfns[hwfn_index];
 
-	rc = qed_sp_eth_tx_queue_stop(p_hwfn,
-				      params->tx_queue_id / cdev->num_hwfns);
+	rc = qed_eth_tx_queue_stop(p_hwfn, handle);
 	if (rc) {
-		DP_ERR(cdev, "Failed to stop TXQ#%d\n", params->tx_queue_id);
+		DP_ERR(cdev, "Failed to stop TXQ#%02x\n", rss_id);
 		return rc;
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index e495d62fcc03..48c9bfc28140 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -78,11 +78,34 @@ struct qed_filter_mcast {
 	unsigned char mac[QED_MAX_MC_ADDRS][ETH_ALEN];
 };
 
-int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
-			     u16 rx_queue_id,
-			     bool eq_completion_only, bool cqe_completion);
+/**
+ * @brief qed_eth_rx_queue_stop - This ramrod closes an Rx queue
+ *
+ * @param p_hwfn
+ * @param p_rxq			Handler of queue to close
+ * @param eq_completion_only	If True completion will be on
+ *				EQe, if False completion will be
+ *				on EQe if p_hwfn opaque
+ *				different from the RXQ opaque
+ *				otherwise on CQe.
+ * @param cqe_completion	If True completion will be
+ *				receive on CQe.
+ * @return int
+ */
+int
+qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
+		      void *p_rxq,
+		      bool eq_completion_only, bool cqe_completion);
 
-int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id);
+/**
+ * @brief qed_eth_tx_queue_stop - closes a Tx queue
+ *
+ * @param p_hwfn
+ * @param p_txq - handle to Tx queue needed to be closed
+ *
+ * @return int
+ */
+int qed_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, void *p_txq);
 
 enum qed_tpa_mode {
 	QED_TPA_MODE_NONE,
@@ -196,19 +219,19 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn,
  * @note At the moment - only used by non-linux VFs.
  *
  * @param p_hwfn
- * @param rx_queue_id		RX Queue ID
- * @param num_rxqs		Allow to update multiple rx
- *				queues, from rx_queue_id to
- *				(rx_queue_id + num_rxqs)
+ * @param pp_rxq_handlers	An array of queue handlers to be updated.
+ * @param num_rxqs              number of queues to update.
  * @param complete_cqe_flg	Post completion to the CQE Ring if set
  * @param complete_event_flg	Post completion to the Event Ring if set
+ * @param comp_mode
+ * @param p_comp_data
  *
  * @return int
  */
 
 int
 qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
-			    u16 rx_queue_id,
+			    void **pp_rxq_handlers,
 			    u8 num_rxqs,
 			    u8 complete_cqe_flg,
 			    u8 complete_event_flg,
@@ -217,27 +240,79 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 
 void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
 
-int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
-			   struct qed_sp_vport_start_params *p_params);
+void qed_reset_vport_stats(struct qed_dev *cdev);
+
+struct qed_queue_cid {
+	/* 'Relative' is a relative term ;-). Usually the indices [not counting
+	 * SBs] would be PF-relative, but there are some cases where that isn't
+	 * the case - specifically for a PF configuring its VF indices it's
+	 * possible some fields [E.g., stats-id] in 'rel' would already be abs.
+	 */
+	struct qed_queue_start_common_params rel;
+	struct qed_queue_start_common_params abs;
+	u32 cid;
+	u16 opaque_fid;
+
+	/* VFs queues are mapped differently, so we need to know the
+	 * relative queue associated with them [0-based].
+	 * Notice this is relevant on the *PF* queue-cid of its VF's queues,
+	 * and not on the VF itself.
+	 */
+	bool is_vf;
+	u8 vf_qid;
+
+	/* Legacy VFs might have Rx producer located elsewhere */
+	bool b_legacy_vf;
+};
 
-int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
-				u16 opaque_fid,
-				u32 cid,
-				struct qed_queue_start_common_params *params,
-				u8 stats_id,
-				u16 bd_max_bytes,
-				dma_addr_t bd_chain_phys_addr,
-				dma_addr_t cqe_pbl_addr,
-				u16 cqe_pbl_size, bool b_use_zone_a_prod);
-
-int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
-				u16  opaque_fid,
-				u32  cid,
-				struct qed_queue_start_common_params *p_params,
-				u8  stats_id,
-				dma_addr_t pbl_addr,
-				u16 pbl_size,
-				union qed_qm_pq_params *p_pq_params);
+void qed_eth_queue_cid_release(struct qed_hwfn *p_hwfn,
+			       struct qed_queue_cid *p_cid);
+
+struct qed_queue_cid *_qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
+					    u16 opaque_fid,
+					    u32 cid,
+					    u8 vf_qid,
+					    struct qed_queue_start_common_params
+					    *p_params);
+
+int
+qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
+		       struct qed_sp_vport_start_params *p_params);
+
+/**
+ * @brief - Starts an Rx queue, when queue_cid is already prepared
+ *
+ * @param p_hwfn
+ * @param p_cid
+ * @param bd_max_bytes
+ * @param bd_chain_phys_addr
+ * @param cqe_pbl_addr
+ * @param cqe_pbl_size
+ *
+ * @return int
+ */
+int
+qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
+			 struct qed_queue_cid *p_cid,
+			 u16 bd_max_bytes,
+			 dma_addr_t bd_chain_phys_addr,
+			 dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size);
+
+/**
+ * @brief - Starts a Tx queue, where queue_cid is already prepared
+ *
+ * @param p_hwfn
+ * @param p_cid
+ * @param pbl_addr
+ * @param pbl_size
+ * @param p_pq_params - parameters for choosing the PQ for this Tx queue
+ *
+ * @return int
+ */
+int
+qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
+			 struct qed_queue_cid *p_cid,
+			 dma_addr_t pbl_addr, u16 pbl_size, u16 pq_id);
 
 u8 qed_mcast_bin_from_mac(u8 *mac);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index f3f742a4e59a..85b09dd1787a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -808,37 +808,70 @@ static void qed_iov_free_vf_igu_sbs(struct qed_hwfn *p_hwfn,
 
 static int qed_iov_init_hw_for_vf(struct qed_hwfn *p_hwfn,
 				  struct qed_ptt *p_ptt,
-				  u16 rel_vf_id, u16 num_rx_queues)
+				  struct qed_iov_vf_init_params *p_params)
 {
 	u8 num_of_vf_avaiable_chains = 0;
 	struct qed_vf_info *vf = NULL;
+	u16 qid, num_irqs;
 	int rc = 0;
 	u32 cids;
 	u8 i;
 
-	vf = qed_iov_get_vf_info(p_hwfn, rel_vf_id, false);
+	vf = qed_iov_get_vf_info(p_hwfn, p_params->rel_vf_id, false);
 	if (!vf) {
 		DP_ERR(p_hwfn, "qed_iov_init_hw_for_vf : vf is NULL\n");
 		return -EINVAL;
 	}
 
 	if (vf->b_init) {
-		DP_NOTICE(p_hwfn, "VF[%d] is already active.\n", rel_vf_id);
+		DP_NOTICE(p_hwfn, "VF[%d] is already active.\n",
+			  p_params->rel_vf_id);
 		return -EINVAL;
 	}
 
+	/* Perform sanity checking on the requested queue_id */
+	for (i = 0; i < p_params->num_queues; i++) {
+		u16 min_vf_qzone = FEAT_NUM(p_hwfn, QED_PF_L2_QUE);
+		u16 max_vf_qzone = min_vf_qzone +
+		    FEAT_NUM(p_hwfn, QED_VF_L2_QUE) - 1;
+
+		qid = p_params->req_rx_queue[i];
+		if (qid < min_vf_qzone || qid > max_vf_qzone) {
+			DP_NOTICE(p_hwfn,
+				  "Can't enable Rx qid [%04x] for VF[%d]: qids [0x%04x,...,0x%04x] available\n",
+				  qid,
+				  p_params->rel_vf_id,
+				  min_vf_qzone, max_vf_qzone);
+			return -EINVAL;
+		}
+
+		qid = p_params->req_tx_queue[i];
+		if (qid > max_vf_qzone) {
+			DP_NOTICE(p_hwfn,
+				  "Can't enable Tx qid [%04x] for VF[%d]: max qid 0x%04x\n",
+				  qid, p_params->rel_vf_id, max_vf_qzone);
+			return -EINVAL;
+		}
+
+		/* If client *really* wants, Tx qid can be shared with PF */
+		if (qid < min_vf_qzone)
+			DP_VERBOSE(p_hwfn,
+				   QED_MSG_IOV,
+				   "VF[%d] is using PF qid [0x%04x] for Txq[0x%02x]\n",
+				   p_params->rel_vf_id, qid, i);
+	}
+
 	/* Limit number of queues according to number of CIDs */
 	qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH, &cids);
 	DP_VERBOSE(p_hwfn,
 		   QED_MSG_IOV,
 		   "VF[%d] - requesting to initialize for 0x%04x queues [0x%04x CIDs available]\n",
-		   vf->relative_vf_id, num_rx_queues, (u16) cids);
-	num_rx_queues = min_t(u16, num_rx_queues, ((u16) cids));
+		   vf->relative_vf_id, p_params->num_queues, (u16)cids);
+	num_irqs = min_t(u16, p_params->num_queues, ((u16)cids));
 
 	num_of_vf_avaiable_chains = qed_iov_alloc_vf_igu_sbs(p_hwfn,
 							     p_ptt,
-							     vf,
-							     num_rx_queues);
+							     vf, num_irqs);
 	if (!num_of_vf_avaiable_chains) {
 		DP_ERR(p_hwfn, "no available igu sbs\n");
 		return -ENOMEM;
@@ -849,25 +882,22 @@ static int qed_iov_init_hw_for_vf(struct qed_hwfn *p_hwfn,
 	vf->num_txqs = num_of_vf_avaiable_chains;
 
 	for (i = 0; i < vf->num_rxqs; i++) {
-		u16 queue_id = qed_int_queue_id_from_sb_id(p_hwfn,
-							   vf->igu_sbs[i]);
+		struct qed_vf_q_info *p_queue = &vf->vf_queues[i];
 
-		if (queue_id > RESC_NUM(p_hwfn, QED_L2_QUEUE)) {
-			DP_NOTICE(p_hwfn,
-				  "VF[%d] will require utilizing of out-of-bounds queues - %04x\n",
-				  vf->relative_vf_id, queue_id);
-			return -EINVAL;
-		}
+		p_queue->fw_rx_qid = p_params->req_rx_queue[i];
+		p_queue->fw_tx_qid = p_params->req_tx_queue[i];
 
 		/* CIDs are per-VF, so no problem having them 0-based. */
-		vf->vf_queues[i].fw_rx_qid = queue_id;
-		vf->vf_queues[i].fw_tx_qid = queue_id;
-		vf->vf_queues[i].fw_cid = i;
+		p_queue->fw_cid = i;
 
 		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-			   "VF[%d] - [%d] SB %04x, Tx/Rx queue %04x CID %04x\n",
-			   vf->relative_vf_id, i, vf->igu_sbs[i], queue_id, i);
+			   "VF[%d] - Q[%d] SB %04x, qid [Rx %04x Tx %04x]  CID %04x\n",
+			   vf->relative_vf_id,
+			   i, vf->igu_sbs[i],
+			   p_queue->fw_rx_qid,
+			   p_queue->fw_tx_qid, p_queue->fw_cid);
 	}
+
 	rc = qed_iov_enable_vf_access(p_hwfn, p_ptt, vf);
 	if (!rc) {
 		vf->b_init = true;
@@ -1187,8 +1217,19 @@ static void qed_iov_vf_cleanup(struct qed_hwfn *p_hwfn,
 
 	p_vf->num_active_rxqs = 0;
 
-	for (i = 0; i < QED_MAX_VF_CHAINS_PER_PF; i++)
-		p_vf->vf_queues[i].rxq_active = 0;
+	for (i = 0; i < QED_MAX_VF_CHAINS_PER_PF; i++) {
+		struct qed_vf_q_info *p_queue = &p_vf->vf_queues[i];
+
+		if (p_queue->p_rx_cid) {
+			qed_eth_queue_cid_release(p_hwfn, p_queue->p_rx_cid);
+			p_queue->p_rx_cid = NULL;
+		}
+
+		if (p_queue->p_tx_cid) {
+			qed_eth_queue_cid_release(p_hwfn, p_queue->p_tx_cid);
+			p_queue->p_tx_cid = NULL;
+		}
+	}
 
 	memset(&p_vf->shadow_config, 0, sizeof(p_vf->shadow_config));
 	memset(&p_vf->acquire, 0, sizeof(p_vf->acquire));
@@ -1594,21 +1635,21 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
 
 		/* Update all the Rx queues */
 		for (i = 0; i < QED_MAX_VF_CHAINS_PER_PF; i++) {
-			u16 qid;
+			struct qed_queue_cid *p_cid;
 
-			if (!p_vf->vf_queues[i].rxq_active)
+			p_cid = p_vf->vf_queues[i].p_rx_cid;
+			if (!p_cid)
 				continue;
 
-			qid = p_vf->vf_queues[i].fw_rx_qid;
-
-			rc = qed_sp_eth_rx_queues_update(p_hwfn, qid,
+			rc = qed_sp_eth_rx_queues_update(p_hwfn,
+							 (void **)&p_cid,
 							 1, 0, 1,
 							 QED_SPQ_MODE_EBLOCK,
 							 NULL);
 			if (rc) {
 				DP_NOTICE(p_hwfn,
 					  "Failed to send Rx update fo queue[0x%04x]\n",
-					  qid);
+					  p_cid->rel.queue_id);
 				return rc;
 			}
 		}
@@ -1782,23 +1823,34 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 	struct qed_queue_start_common_params params;
 	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
 	u8 status = PFVF_STATUS_NO_RESOURCE;
+	struct qed_vf_q_info *p_queue;
 	struct vfpf_start_rxq_tlv *req;
 	bool b_legacy_vf = false;
 	int rc;
 
-	memset(&params, 0, sizeof(params));
 	req = &mbx->req_virt->start_rxq;
 
 	if (!qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid) ||
 	    !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
 		goto out;
 
-	params.queue_id =  vf->vf_queues[req->rx_qid].fw_rx_qid;
-	params.vf_qid = req->rx_qid;
+	/* Acquire a new queue-cid */
+	p_queue = &vf->vf_queues[req->rx_qid];
+
+	memset(&params, 0, sizeof(params));
+	params.queue_id = p_queue->fw_rx_qid;
 	params.vport_id = vf->vport_id;
+	params.stats_id = vf->abs_vf_id + 0x10;
 	params.sb = req->hw_sb;
 	params.sb_idx = req->sb_index;
 
+	p_queue->p_rx_cid = _qed_eth_queue_to_cid(p_hwfn,
+						  vf->opaque_fid,
+						  p_queue->fw_cid,
+						  req->rx_qid, &params);
+	if (!p_queue->p_rx_cid)
+		goto out;
+
 	/* Legacy VFs have their Producers in a different location, which they
 	 * calculate on their own and clean the producer prior to this.
 	 */
@@ -1811,21 +1863,19 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 		       MSTORM_ETH_VF_PRODS_OFFSET(vf->abs_vf_id, req->rx_qid),
 		       0);
 	}
+	p_queue->p_rx_cid->b_legacy_vf = b_legacy_vf;
 
-	rc = qed_sp_eth_rxq_start_ramrod(p_hwfn, vf->opaque_fid,
-					 vf->vf_queues[req->rx_qid].fw_cid,
-					 &params,
-					 vf->abs_vf_id + 0x10,
-					 req->bd_max_bytes,
-					 req->rxq_addr,
-					 req->cqe_pbl_addr, req->cqe_pbl_size,
-					 b_legacy_vf);
-
+	rc = qed_eth_rxq_start_ramrod(p_hwfn,
+				      p_queue->p_rx_cid,
+				      req->bd_max_bytes,
+				      req->rxq_addr,
+				      req->cqe_pbl_addr, req->cqe_pbl_size);
 	if (rc) {
 		status = PFVF_STATUS_FAILURE;
+		qed_eth_queue_cid_release(p_hwfn, p_queue->p_rx_cid);
+		p_queue->p_rx_cid = NULL;
 	} else {
 		status = PFVF_STATUS_SUCCESS;
-		vf->vf_queues[req->rx_qid].rxq_active = true;
 		vf->num_active_rxqs++;
 	}
 
@@ -1882,7 +1932,9 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 	u8 status = PFVF_STATUS_NO_RESOURCE;
 	union qed_qm_pq_params pq_params;
 	struct vfpf_start_txq_tlv *req;
+	struct qed_vf_q_info *p_queue;
 	int rc;
+	u16 pq;
 
 	/* Prepare the parameters which would choose the right PQ */
 	memset(&pq_params, 0, sizeof(pq_params));
@@ -1896,24 +1948,31 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 	    !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
 		goto out;
 
-	params.queue_id =  vf->vf_queues[req->tx_qid].fw_tx_qid;
+	/* Acquire a new queue-cid */
+	p_queue = &vf->vf_queues[req->tx_qid];
+
+	params.queue_id = p_queue->fw_tx_qid;
 	params.vport_id = vf->vport_id;
+	params.stats_id = vf->abs_vf_id + 0x10;
 	params.sb = req->hw_sb;
 	params.sb_idx = req->sb_index;
 
-	rc = qed_sp_eth_txq_start_ramrod(p_hwfn,
-					 vf->opaque_fid,
-					 vf->vf_queues[req->tx_qid].fw_cid,
-					 &params,
-					 vf->abs_vf_id + 0x10,
-					 req->pbl_addr,
-					 req->pbl_size, &pq_params);
+	p_queue->p_tx_cid = _qed_eth_queue_to_cid(p_hwfn,
+						  vf->opaque_fid,
+						  p_queue->fw_cid,
+						  req->tx_qid, &params);
+	if (!p_queue->p_tx_cid)
+		goto out;
 
+	pq = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, &pq_params);
+	rc = qed_eth_txq_start_ramrod(p_hwfn, p_queue->p_tx_cid,
+				      req->pbl_addr, req->pbl_size, pq);
 	if (rc) {
 		status = PFVF_STATUS_FAILURE;
+		qed_eth_queue_cid_release(p_hwfn, p_queue->p_tx_cid);
+		p_queue->p_tx_cid = NULL;
 	} else {
 		status = PFVF_STATUS_SUCCESS;
-		vf->vf_queues[req->tx_qid].txq_active = true;
 	}
 
 out:
@@ -1924,6 +1983,7 @@ static int qed_iov_vf_stop_rxqs(struct qed_hwfn *p_hwfn,
 				struct qed_vf_info *vf,
 				u16 rxq_id, u8 num_rxqs, bool cqe_completion)
 {
+	struct qed_vf_q_info *p_queue;
 	int rc = 0;
 	int qid;
 
@@ -1931,16 +1991,18 @@ static int qed_iov_vf_stop_rxqs(struct qed_hwfn *p_hwfn,
 		return -EINVAL;
 
 	for (qid = rxq_id; qid < rxq_id + num_rxqs; qid++) {
-		if (vf->vf_queues[qid].rxq_active) {
-			rc = qed_sp_eth_rx_queue_stop(p_hwfn,
-						      vf->vf_queues[qid].
-						      fw_rx_qid, false,
-						      cqe_completion);
+		p_queue = &vf->vf_queues[qid];
 
-			if (rc)
-				return rc;
-		}
-		vf->vf_queues[qid].rxq_active = false;
+		if (!p_queue->p_rx_cid)
+			continue;
+
+		rc = qed_eth_rx_queue_stop(p_hwfn,
+					   p_queue->p_rx_cid,
+					   false, cqe_completion);
+		if (rc)
+			return rc;
+
+		vf->vf_queues[qid].p_rx_cid = NULL;
 		vf->num_active_rxqs--;
 	}
 
@@ -1951,22 +2013,24 @@ static int qed_iov_vf_stop_txqs(struct qed_hwfn *p_hwfn,
 				struct qed_vf_info *vf, u16 txq_id, u8 num_txqs)
 {
 	int rc = 0;
+	struct qed_vf_q_info *p_queue;
 	int qid;
 
 	if (txq_id + num_txqs > ARRAY_SIZE(vf->vf_queues))
 		return -EINVAL;
 
 	for (qid = txq_id; qid < txq_id + num_txqs; qid++) {
-		if (vf->vf_queues[qid].txq_active) {
-			rc = qed_sp_eth_tx_queue_stop(p_hwfn,
-						      vf->vf_queues[qid].
-						      fw_tx_qid);
+		p_queue = &vf->vf_queues[qid];
+		if (!p_queue->p_tx_cid)
+			continue;
 
-			if (rc)
-				return rc;
-		}
-		vf->vf_queues[qid].txq_active = false;
+		rc = qed_eth_tx_queue_stop(p_hwfn, p_queue->p_tx_cid);
+		if (rc)
+			return rc;
+
+		p_queue->p_tx_cid = NULL;
 	}
+
 	return rc;
 }
 
@@ -2021,10 +2085,11 @@ static void qed_iov_vf_mbx_update_rxqs(struct qed_hwfn *p_hwfn,
 				       struct qed_ptt *p_ptt,
 				       struct qed_vf_info *vf)
 {
+	struct qed_queue_cid *handlers[QED_MAX_VF_CHAINS_PER_PF];
 	u16 length = sizeof(struct pfvf_def_resp_tlv);
 	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
 	struct vfpf_update_rxq_tlv *req;
-	u8 status = PFVF_STATUS_SUCCESS;
+	u8 status = PFVF_STATUS_FAILURE;
 	u8 complete_event_flg;
 	u8 complete_cqe_flg;
 	u16 qid;
@@ -2035,29 +2100,36 @@ static void qed_iov_vf_mbx_update_rxqs(struct qed_hwfn *p_hwfn,
 	complete_cqe_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_CQE_FLAG);
 	complete_event_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_EVENT_FLAG);
 
+	/* Validate inputs */
+	if (req->num_rxqs + req->rx_qid > QED_MAX_VF_CHAINS_PER_PF ||
+	    !qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid)) {
+		DP_INFO(p_hwfn, "VF[%d]: Incorrect Rxqs [%04x, %02x]\n",
+			vf->relative_vf_id, req->rx_qid, req->num_rxqs);
+		goto out;
+	}
+
 	for (i = 0; i < req->num_rxqs; i++) {
 		qid = req->rx_qid + i;
-
-		if (!vf->vf_queues[qid].rxq_active) {
-			DP_NOTICE(p_hwfn, "VF rx_qid = %d isn`t active!\n",
-				  qid);
-			status = PFVF_STATUS_FAILURE;
-			break;
+		if (!vf->vf_queues[qid].p_rx_cid) {
+			DP_INFO(p_hwfn,
+				"VF[%d] rx_qid = %d isn`t active!\n",
+				vf->relative_vf_id, qid);
+			goto out;
 		}
 
-		rc = qed_sp_eth_rx_queues_update(p_hwfn,
-						 vf->vf_queues[qid].fw_rx_qid,
-						 1,
-						 complete_cqe_flg,
-						 complete_event_flg,
-						 QED_SPQ_MODE_EBLOCK, NULL);
-
-		if (rc) {
-			status = PFVF_STATUS_FAILURE;
-			break;
-		}
+		handlers[i] = vf->vf_queues[qid].p_rx_cid;
 	}
 
+	rc = qed_sp_eth_rx_queues_update(p_hwfn, (void **)&handlers,
+					 req->num_rxqs,
+					 complete_cqe_flg,
+					 complete_event_flg,
+					 QED_SPQ_MODE_EBLOCK, NULL);
+	if (rc)
+		goto out;
+
+	status = PFVF_STATUS_SUCCESS;
+out:
 	qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_UPDATE_RXQ,
 			     length, status);
 }
@@ -2268,7 +2340,7 @@ qed_iov_vp_update_rss_param(struct qed_hwfn *p_hwfn,
 			DP_NOTICE(p_hwfn,
 				  "rss_ind_table[%d] = %d, rxq is out of range\n",
 				  i, q_idx);
-		else if (!vf->vf_queues[q_idx].rxq_active)
+		else if (!vf->vf_queues[q_idx].p_rx_cid)
 			DP_NOTICE(p_hwfn,
 				  "rss_ind_table[%d] = %d, rxq is not active\n",
 				  i, q_idx);
@@ -3468,8 +3540,28 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 	return 0;
 }
 
+static void qed_sriov_enable_qid_config(struct qed_hwfn *hwfn,
+					u16 vfid,
+					struct qed_iov_vf_init_params *params)
+{
+	u16 base, i;
+
+	/* Since we have an equal resource distribution per-VF, and we assume
+	 * PF has acquired the QED_PF_L2_QUE first queues, we start setting
+	 * sequentially from there.
+	 */
+	base = FEAT_NUM(hwfn, QED_PF_L2_QUE) + vfid * params->num_queues;
+
+	params->rel_vf_id = vfid;
+	for (i = 0; i < params->num_queues; i++) {
+		params->req_rx_queue[i] = base + i;
+		params->req_tx_queue[i] = base + i;
+	}
+}
+
 static int qed_sriov_enable(struct qed_dev *cdev, int num)
 {
+	struct qed_iov_vf_init_params params;
 	int i, j, rc;
 
 	if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) {
@@ -3478,15 +3570,17 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
 		return -EINVAL;
 	}
 
+	memset(&params, 0, sizeof(params));
+
 	/* Initialize HW for VF access */
 	for_each_hwfn(cdev, j) {
 		struct qed_hwfn *hwfn = &cdev->hwfns[j];
 		struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
-		int num_queues;
 
 		/* Make sure not to use more than 16 queues per VF */
-		num_queues = min_t(int,
-				   FEAT_NUM(hwfn, QED_VF_L2_QUE) / num, 16);
+		params.num_queues = min_t(int,
+					  FEAT_NUM(hwfn, QED_VF_L2_QUE) / num,
+					  16);
 
 		if (!ptt) {
 			DP_ERR(hwfn, "Failed to acquire ptt\n");
@@ -3498,7 +3592,8 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
 			if (!qed_iov_is_valid_vfid(hwfn, i, false, true))
 				continue;
 
-			rc = qed_iov_init_hw_for_vf(hwfn, ptt, i, num_queues);
+			qed_sriov_enable_qid_config(hwfn, i, &params);
+			rc = qed_iov_init_hw_for_vf(hwfn, ptt, &params);
 			if (rc) {
 				DP_ERR(cdev, "Failed to enable VF[%d]\n", i);
 				qed_ptt_release(hwfn, ptt);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index 3cf515b1b427..509c02b4772e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -58,6 +58,23 @@ struct qed_public_vf_info {
 	int tx_rate;
 };
 
+struct qed_iov_vf_init_params {
+	u16 rel_vf_id;
+
+	/* Number of requested Queues; Currently, don't support different
+	 * number of Rx/Tx queues.
+	 */
+
+	u16 num_queues;
+
+	/* Allow the client to choose which qzones to use for Rx/Tx,
+	 * and which queue_base to use for Tx queues on a per-queue basis.
+	 * Notice values should be relative to the PF resources.
+	 */
+	u16 req_rx_queue[QED_MAX_VF_CHAINS_PER_PF];
+	u16 req_tx_queue[QED_MAX_VF_CHAINS_PER_PF];
+};
+
 /* This struct is part of qed_dev and contains data relevant to all hwfns;
  * Initialized only if SR-IOV cpabability is exposed in PCIe config space.
  */
@@ -99,10 +116,10 @@ struct qed_iov_vf_mbx {
 
 struct qed_vf_q_info {
 	u16 fw_rx_qid;
+	struct qed_queue_cid *p_rx_cid;
 	u16 fw_tx_qid;
+	struct qed_queue_cid *p_tx_cid;
 	u8 fw_cid;
-	u8 rxq_active;
-	u8 txq_active;
 };
 
 enum vf_state {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 3c0633642f4c..60b31a8ede73 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -388,18 +388,18 @@ free_p_iov:
 #define MSTORM_QZONE_START(dev)   (TSTORM_QZONE_START +	\
 				   (TSTORM_QZONE_SIZE * NUM_OF_L2_QUEUES(dev)))
 
-int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
-			u8 rx_qid,
-			u16 sb,
-			u8 sb_index,
-			u16 bd_max_bytes,
-			dma_addr_t bd_chain_phys_addr,
-			dma_addr_t cqe_pbl_addr,
-			u16 cqe_pbl_size, void __iomem **pp_prod)
+int
+qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
+		    struct qed_queue_cid *p_cid,
+		    u16 bd_max_bytes,
+		    dma_addr_t bd_chain_phys_addr,
+		    dma_addr_t cqe_pbl_addr,
+		    u16 cqe_pbl_size, void __iomem **pp_prod)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct pfvf_start_queue_resp_tlv *resp;
 	struct vfpf_start_rxq_tlv *req;
+	u8 rx_qid = p_cid->rel.queue_id;
 	int rc;
 
 	/* clear mailbox and prep first tlv */
@@ -409,21 +409,22 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 	req->cqe_pbl_addr = cqe_pbl_addr;
 	req->cqe_pbl_size = cqe_pbl_size;
 	req->rxq_addr = bd_chain_phys_addr;
-	req->hw_sb = sb;
-	req->sb_index = sb_index;
+	req->hw_sb = p_cid->rel.sb;
+	req->sb_index = p_cid->rel.sb_idx;
 	req->bd_max_bytes = bd_max_bytes;
 	req->stat_id = -1;
 
 	/* If PF is legacy, we'll need to calculate producers ourselves
 	 * as well as clean them.
 	 */
-	if (pp_prod && p_iov->b_pre_fp_hsi) {
+	if (p_iov->b_pre_fp_hsi) {
 		u8 hw_qid = p_iov->acquire_resp.resc.hw_qid[rx_qid];
 		u32 init_prod_val = 0;
 
-		*pp_prod = (u8 __iomem *)p_hwfn->regview +
-					 MSTORM_QZONE_START(p_hwfn->cdev) +
-					 hw_qid * MSTORM_QZONE_SIZE;
+		*pp_prod = (u8 __iomem *)
+		    p_hwfn->regview +
+		    MSTORM_QZONE_START(p_hwfn->cdev) +
+		    hw_qid * MSTORM_QZONE_SIZE;
 
 		/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
 		__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
@@ -444,7 +445,7 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 	}
 
 	/* Learn the address of the producer from the response */
-	if (pp_prod && !p_iov->b_pre_fp_hsi) {
+	if (!p_iov->b_pre_fp_hsi) {
 		u32 init_prod_val = 0;
 
 		*pp_prod = (u8 __iomem *)p_hwfn->regview + resp->offset;
@@ -462,7 +463,8 @@ exit:
 	return rc;
 }
 
-int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn, u16 rx_qid, bool cqe_completion)
+int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
+		       struct qed_queue_cid *p_cid, bool cqe_completion)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct vfpf_stop_rxqs_tlv *req;
@@ -472,7 +474,7 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn, u16 rx_qid, bool cqe_completion)
 	/* clear mailbox and prep first tlv */
 	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_STOP_RXQS, sizeof(*req));
 
-	req->rx_qid = rx_qid;
+	req->rx_qid = p_cid->rel.queue_id;
 	req->num_rxqs = 1;
 	req->cqe_completion = cqe_completion;
 
@@ -496,28 +498,28 @@ exit:
 	return rc;
 }
 
-int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
-			u16 tx_queue_id,
-			u16 sb,
-			u8 sb_index,
-			dma_addr_t pbl_addr,
-			u16 pbl_size, void __iomem **pp_doorbell)
+int
+qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
+		    struct qed_queue_cid *p_cid,
+		    dma_addr_t pbl_addr,
+		    u16 pbl_size, void __iomem **pp_doorbell)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct pfvf_start_queue_resp_tlv *resp;
 	struct vfpf_start_txq_tlv *req;
+	u16 qid = p_cid->rel.queue_id;
 	int rc;
 
 	/* clear mailbox and prep first tlv */
 	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_START_TXQ, sizeof(*req));
 
-	req->tx_qid = tx_queue_id;
+	req->tx_qid = qid;
 
 	/* Tx */
 	req->pbl_addr = pbl_addr;
 	req->pbl_size = pbl_size;
-	req->hw_sb = sb;
-	req->sb_index = sb_index;
+	req->hw_sb = p_cid->rel.sb;
+	req->sb_index = p_cid->rel.sb_idx;
 
 	/* add list termination tlv */
 	qed_add_tlv(p_hwfn, &p_iov->offset,
@@ -533,33 +535,29 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 		goto exit;
 	}
 
-	if (pp_doorbell) {
-		/* Modern PFs provide the actual offsets, while legacy
-		 * provided only the queue id.
-		 */
-		if (!p_iov->b_pre_fp_hsi) {
-			*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-						     resp->offset;
-		} else {
-			u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id];
-			u32 db_addr;
-
-			db_addr = qed_db_addr_vf(cid, DQ_DEMS_LEGACY);
-			*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-						     db_addr;
-		}
+	/* Modern PFs provide the actual offsets, while legacy
+	 * provided only the queue id.
+	 */
+	if (!p_iov->b_pre_fp_hsi) {
+		*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + resp->offset;
+	} else {
+		u8 cid = p_iov->acquire_resp.resc.cid[qid];
 
-		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-			   "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
-			   tx_queue_id, *pp_doorbell, resp->offset);
+		*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
+					     qed_db_addr_vf(cid,
+							    DQ_DEMS_LEGACY);
 	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+		   "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
+		   qid, *pp_doorbell, resp->offset);
 exit:
 	qed_vf_pf_req_end(p_hwfn, rc);
 
 	return rc;
 }
 
-int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
+int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct vfpf_stop_txqs_tlv *req;
@@ -569,7 +567,7 @@ int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
 	/* clear mailbox and prep first tlv */
 	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_STOP_TXQS, sizeof(*req));
 
-	req->tx_qid = tx_qid;
+	req->tx_qid = p_cid->rel.queue_id;
 	req->num_txqs = 1;
 
 	/* add list termination tlv */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index 325c250d4ee5..11eb3854e6f2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -666,10 +666,7 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn);
 /**
  * @brief VF - start the RX Queue by sending a message to the PF
  * @param p_hwfn
- * @param cid                   - zero based within the VF
- * @param rx_queue_id           - zero based within the VF
- * @param sb                    - VF status block for this queue
- * @param sb_index              - Index within the status block
+ * @param p_cid			- Only relative fields are relevant
  * @param bd_max_bytes          - maximum number of bytes per bd
  * @param bd_chain_phys_addr    - physical address of bd chain
  * @param cqe_pbl_addr          - physical address of pbl
@@ -680,9 +677,7 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn);
  * @return int
  */
 int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
-			u8 rx_queue_id,
-			u16 sb,
-			u8 sb_index,
+			struct qed_queue_cid *p_cid,
 			u16 bd_max_bytes,
 			dma_addr_t bd_chain_phys_addr,
 			dma_addr_t cqe_pbl_addr,
@@ -702,24 +697,23 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
  *
  * @return int
  */
-int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
-			u16 tx_queue_id,
-			u16 sb,
-			u8 sb_index,
-			dma_addr_t pbl_addr,
-			u16 pbl_size, void __iomem **pp_doorbell);
+int
+qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
+		    struct qed_queue_cid *p_cid,
+		    dma_addr_t pbl_addr,
+		    u16 pbl_size, void __iomem **pp_doorbell);
 
 /**
  * @brief VF - stop the RX queue by sending a message to the PF
  *
  * @param p_hwfn
- * @param rx_qid
+ * @param p_cid
  * @param cqe_completion
  *
  * @return int
  */
 int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
-		       u16 rx_qid, bool cqe_completion);
+		       struct qed_queue_cid *p_cid, bool cqe_completion);
 
 /**
  * @brief VF - stop the TX queue by sending a message to the PF
@@ -729,7 +723,7 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
  *
  * @return int
  */
-int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid);
+int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid);
 
 /**
  * @brief VF - send a vport update command
@@ -902,9 +896,7 @@ static inline int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
 }
 
 static inline int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
-				      u8 rx_queue_id,
-				      u16 sb,
-				      u8 sb_index,
+				      struct qed_queue_cid *p_cid,
 				      u16 bd_max_bytes,
 				      dma_addr_t bd_chain_phys_adr,
 				      dma_addr_t cqe_pbl_addr,
@@ -914,9 +906,7 @@ static inline int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 }
 
 static inline int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
-				      u16 tx_queue_id,
-				      u16 sb,
-				      u8 sb_index,
+				      struct qed_queue_cid *p_cid,
 				      dma_addr_t pbl_addr,
 				      u16 pbl_size, void __iomem **pp_doorbell)
 {
@@ -924,12 +914,14 @@ static inline int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 }
 
 static inline int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
-				     u16 rx_qid, bool cqe_completion)
+				     struct qed_queue_cid *p_cid,
+				     bool cqe_completion)
 {
 	return -EINVAL;
 }
 
-static inline int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
+static inline int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn,
+				     struct qed_queue_cid *p_cid)
 {
 	return -EINVAL;
 }
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 2116c4cc8924..c2135765f8ec 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -264,6 +264,8 @@ struct qede_rx_queue {
 	u64			rx_hw_errors;
 	u64			rx_alloc_errors;
 	u64			rx_ip_frags;
+
+	void *handle;
 };
 
 union db_prod {
@@ -293,6 +295,8 @@ struct qede_tx_queue {
 	u64			stopped_cnt;
 
 	bool			is_legacy;
+	void *handle;
+
 };
 
 #define BD_UNMAP_ADDR(bd)		HILO_U64(le32_to_cpu((bd)->addr.hi), \
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 64c7f3b75283..834921178615 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -3334,6 +3334,12 @@ static int qede_drain_txq(struct qede_dev *edev,
 	return 0;
 }
 
+static int qede_stop_txq(struct qede_dev *edev,
+			 struct qede_tx_queue *txq, int rss_id)
+{
+	return edev->ops->q_tx_stop(edev->cdev, rss_id, txq->handle);
+}
+
 static int qede_stop_queues(struct qede_dev *edev)
 {
 	struct qed_update_vport_params vport_update_params;
@@ -3367,28 +3373,18 @@ static int qede_stop_queues(struct qede_dev *edev)
 
 	/* Stop all Queues in reverse order */
 	for (i = QEDE_QUEUE_CNT(edev) - 1; i >= 0; i--) {
-		struct qed_stop_rxq_params rx_params;
-
 		fp = &edev->fp_array[i];
 
 		/* Stop the Tx Queue(s) */
 		if (fp->type & QEDE_FASTPATH_TX) {
-			struct qed_stop_txq_params tx_params;
-
-			tx_params.rss_id = i;
-			tx_params.tx_queue_id = fp->txq->index;
-				rc = edev->ops->q_tx_stop(cdev, &tx_params);
-				if (rc)
-					return rc;
+			rc = qede_stop_txq(edev, fp->txq, i);
+			if (rc)
+				return rc;
 		}
 
 		/* Stop the Rx Queue */
 		if (fp->type & QEDE_FASTPATH_RX) {
-			memset(&rx_params, 0, sizeof(rx_params));
-			rx_params.rss_id = i;
-			rx_params.rx_queue_id = fp->rxq->rxq_id;
-
-			rc = edev->ops->q_rx_stop(cdev, &rx_params);
+			rc = edev->ops->q_rx_stop(cdev, i, fp->rxq->handle);
 			if (rc) {
 				DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
 				return rc;
@@ -3404,6 +3400,46 @@ static int qede_stop_queues(struct qede_dev *edev)
 	return rc;
 }
 
+static int qede_start_txq(struct qede_dev *edev,
+			  struct qede_fastpath *fp,
+			  struct qede_tx_queue *txq, u8 rss_id, u16 sb_idx)
+{
+	dma_addr_t phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
+	u32 page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
+	struct qed_queue_start_common_params params;
+	struct qed_txq_start_ret_params ret_params;
+	int rc;
+
+	memset(&params, 0, sizeof(params));
+	memset(&ret_params, 0, sizeof(ret_params));
+
+	params.queue_id = txq->index;
+	params.sb = fp->sb_info->igu_sb_id;
+	params.sb_idx = sb_idx;
+
+	rc = edev->ops->q_tx_start(edev->cdev, rss_id, &params, phys_table,
+				   page_cnt, &ret_params);
+	if (rc) {
+		DP_ERR(edev, "Start TXQ #%d failed %d\n", txq->index, rc);
+		return rc;
+	}
+
+	txq->doorbell_addr = ret_params.p_doorbell;
+	txq->handle = ret_params.p_handle;
+
+	/* Determine the FW consumer address associated */
+	txq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[sb_idx];
+
+	/* Prepare the doorbell parameters */
+	SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
+	SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_ETH_TX_BD_PROD_CMD);
+	txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
+
+	return rc;
+}
+
 static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 {
 	int vlan_removal_en = 1;
@@ -3445,11 +3481,12 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 		u32 page_cnt;
 
 		if (fp->type & QEDE_FASTPATH_RX) {
+			struct qed_rxq_start_ret_params ret_params;
 			struct qede_rx_queue *rxq = fp->rxq;
 			__le16 *val;
 
+			memset(&ret_params, 0, sizeof(ret_params));
 			memset(&q_params, 0, sizeof(q_params));
-			q_params.rss_id = i;
 			q_params.queue_id = rxq->rxq_id;
 			q_params.vport_id = 0;
 			q_params.sb = fp->sb_info->igu_sb_id;
@@ -3459,18 +3496,21 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 			    qed_chain_get_pbl_phys(&rxq->rx_comp_ring);
 			page_cnt = qed_chain_get_page_cnt(&rxq->rx_comp_ring);
 
-			rc = edev->ops->q_rx_start(cdev, &q_params,
+			rc = edev->ops->q_rx_start(cdev, i, &q_params,
 						   rxq->rx_buf_size,
 						   rxq->rx_bd_ring.p_phys_addr,
 						   p_phys_table,
-						   page_cnt,
-						   &rxq->hw_rxq_prod_addr);
+						   page_cnt, &ret_params);
 			if (rc) {
 				DP_ERR(edev, "Start RXQ #%d failed %d\n", i,
 				       rc);
 				return rc;
 			}
 
+			/* Use the return parameters */
+			rxq->hw_rxq_prod_addr = ret_params.p_prod;
+			rxq->handle = ret_params.p_handle;
+
 			val = &fp->sb_info->sb_virt->pi_array[RX_PI];
 			rxq->hw_cons_ptr = val;
 
@@ -3478,38 +3518,9 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			struct qede_tx_queue *txq = fp->txq;
-
-			p_phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
-			page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
-
-			memset(&q_params, 0, sizeof(q_params));
-			q_params.rss_id = i;
-			q_params.queue_id = txq->index;
-			q_params.vport_id = 0;
-			q_params.sb = fp->sb_info->igu_sb_id;
-			q_params.sb_idx = TX_PI(0);
-
-			rc = edev->ops->q_tx_start(cdev, &q_params,
-						   p_phys_table, page_cnt,
-						   &txq->doorbell_addr);
-			if (rc) {
-				DP_ERR(edev, "Start TXQ #%d failed %d\n",
-				       txq->index, rc);
+			rc = qede_start_txq(edev, fp, fp->txq, i, TX_PI(0));
+			if (rc)
 				return rc;
-			}
-
-			txq->hw_cons_ptr =
-				&fp->sb_info->sb_virt->pi_array[TX_PI(0)];
-			SET_FIELD(txq->tx_db.data.params,
-				  ETH_DB_DATA_DEST, DB_DEST_XCM);
-			SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD,
-				  DB_AGG_CMD_SET);
-			SET_FIELD(txq->tx_db.data.params,
-				  ETH_DB_DATA_AGG_VAL_SEL,
-				  DQ_XCM_ETH_TX_BD_PROD_CMD);
-
-			txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
 		}
 	}
 
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 9755a3feb52e..7a52f7c58c37 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -15,6 +15,29 @@
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_iov_if.h>
 
+struct qed_queue_start_common_params {
+	/* Should always be relative to entity sending this. */
+	u8 vport_id;
+	u16 queue_id;
+
+	/* Relative, but relevant only for PFs */
+	u8 stats_id;
+
+	/* These are always absolute */
+	u16 sb;
+	u8 sb_idx;
+};
+
+struct qed_rxq_start_ret_params {
+	void __iomem *p_prod;
+	void *p_handle;
+};
+
+struct qed_txq_start_ret_params {
+	void __iomem *p_doorbell;
+	void *p_handle;
+};
+
 struct qed_dev_eth_info {
 	struct qed_dev_info common;
 
@@ -56,18 +79,6 @@ struct qed_start_vport_params {
 	bool clear_stats;
 };
 
-struct qed_stop_rxq_params {
-	u8 rss_id;
-	u8 rx_queue_id;
-	u8 vport_id;
-	bool eq_completion_only;
-};
-
-struct qed_stop_txq_params {
-	u8 rss_id;
-	u8 tx_queue_id;
-};
-
 enum qed_filter_rx_mode_type {
 	QED_FILTER_RX_MODE_TYPE_REGULAR,
 	QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC,
@@ -112,15 +123,6 @@ struct qed_filter_params {
 	union qed_filter_type_params filter;
 };
 
-struct qed_queue_start_common_params {
-	u8 rss_id;
-	u8 queue_id;
-	u8 vport_id;
-	u16 sb;
-	u16 sb_idx;
-	u16 vf_qid;
-};
-
 struct qed_tunn_params {
 	u16 vxlan_port;
 	u8 update_vxlan_port;
@@ -220,24 +222,24 @@ struct qed_eth_ops {
 			    struct qed_update_vport_params *params);
 
 	int (*q_rx_start)(struct qed_dev *cdev,
+			  u8 rss_num,
 			  struct qed_queue_start_common_params *params,
 			  u16 bd_max_bytes,
 			  dma_addr_t bd_chain_phys_addr,
 			  dma_addr_t cqe_pbl_addr,
 			  u16 cqe_pbl_size,
-			  void __iomem **pp_prod);
+			  struct qed_rxq_start_ret_params *ret_params);
 
-	int (*q_rx_stop)(struct qed_dev *cdev,
-			 struct qed_stop_rxq_params *params);
+	int (*q_rx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle);
 
 	int (*q_tx_start)(struct qed_dev *cdev,
+			  u8 rss_num,
 			  struct qed_queue_start_common_params *params,
 			  dma_addr_t pbl_addr,
 			  u16 pbl_size,
-			  void __iomem **pp_doorbell);
+			  struct qed_txq_start_ret_params *ret_params);
 
-	int (*q_tx_stop)(struct qed_dev *cdev,
-			 struct qed_stop_txq_params *params);
+	int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle);
 
 	int (*filter_config)(struct qed_dev *cdev,
 			     struct qed_filter_params *params);
-- 
cgit v1.2.3


From 91291d9ad92faa65a56a9a19d658d8049b78d3d4 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 30 Nov 2016 16:21:25 +0530
Subject: PM / OPP: Pass opp_table to dev_pm_opp_put_regulator()

Joonyoung Shim reported an interesting problem on his ARM octa-core
Odoroid-XU3 platform. During system suspend, dev_pm_opp_put_regulator()
was failing for a struct device for which dev_pm_opp_set_regulator() is
called earlier.

This happened because an earlier call to
dev_pm_opp_of_cpumask_remove_table() function (from cpufreq-dt.c file)
removed all the entries from opp_table->dev_list apart from the last CPU
device in the cpumask of CPUs sharing the OPP.

But both dev_pm_opp_set_regulator() and dev_pm_opp_put_regulator()
routines get CPU device for the first CPU in the cpumask. And so the OPP
core failed to find the OPP table for the struct device.

This patch attempts to fix this problem by returning a pointer to the
opp_table from dev_pm_opp_set_regulator() and using that as the
parameter to dev_pm_opp_put_regulator(). This ensures that the
dev_pm_opp_put_regulator() doesn't fail to find the opp table.

Note that similar design problem also exists with other
dev_pm_opp_put_*() APIs, but those aren't used currently by anyone and
so we don't need to update them for now.

Cc: 4.4+ <stable@vger.kernel.org> # 4.4+
Reported-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[ Viresh: Wrote commit log and tested on exynos 5250 ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 22 ++++++----------------
 drivers/cpufreq/cpufreq-dt.c  | 12 ++++++++----
 include/linux/pm_opp.h        | 11 ++++++-----
 3 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 4c7c6da7a989..2824d3a5e9f0 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1316,7 +1316,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-int dev_pm_opp_set_regulator(struct device *dev, const char *name)
+struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name)
 {
 	struct opp_table *opp_table;
 	struct regulator *reg;
@@ -1354,20 +1354,20 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name)
 	opp_table->regulator = reg;
 
 	mutex_unlock(&opp_table_lock);
-	return 0;
+	return opp_table;
 
 err:
 	_remove_opp_table(opp_table);
 unlock:
 	mutex_unlock(&opp_table_lock);
 
-	return ret;
+	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
 
 /**
  * dev_pm_opp_put_regulator() - Releases resources blocked for regulator
- * @dev: Device for which regulator was set.
+ * @opp_table: OPP table returned from dev_pm_opp_set_regulator().
  *
  * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
@@ -1375,22 +1375,12 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-void dev_pm_opp_put_regulator(struct device *dev)
+void dev_pm_opp_put_regulator(struct opp_table *opp_table)
 {
-	struct opp_table *opp_table;
-
 	mutex_lock(&opp_table_lock);
 
-	/* Check for existing table for 'dev' first */
-	opp_table = _find_opp_table(dev);
-	if (IS_ERR(opp_table)) {
-		dev_err(dev, "Failed to find opp_table: %ld\n",
-			PTR_ERR(opp_table));
-		goto unlock;
-	}
-
 	if (IS_ERR(opp_table->regulator)) {
-		dev_err(dev, "%s: Doesn't have regulator set\n", __func__);
+		pr_err("%s: Doesn't have regulator set\n", __func__);
 		goto unlock;
 	}
 
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 5c07ae05d69a..4d3ec92cbabf 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -28,6 +28,7 @@
 #include "cpufreq-dt.h"
 
 struct private_data {
+	struct opp_table *opp_table;
 	struct device *cpu_dev;
 	struct thermal_cooling_device *cdev;
 	const char *reg_name;
@@ -143,6 +144,7 @@ static int resources_available(void)
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
 	struct cpufreq_frequency_table *freq_table;
+	struct opp_table *opp_table = NULL;
 	struct private_data *priv;
 	struct device *cpu_dev;
 	struct clk *cpu_clk;
@@ -186,8 +188,9 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	 */
 	name = find_supply_name(cpu_dev);
 	if (name) {
-		ret = dev_pm_opp_set_regulator(cpu_dev, name);
-		if (ret) {
+		opp_table = dev_pm_opp_set_regulator(cpu_dev, name);
+		if (IS_ERR(opp_table)) {
+			ret = PTR_ERR(opp_table);
 			dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n",
 				policy->cpu, ret);
 			goto out_put_clk;
@@ -237,6 +240,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	}
 
 	priv->reg_name = name;
+	priv->opp_table = opp_table;
 
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
@@ -285,7 +289,7 @@ out_free_priv:
 out_free_opp:
 	dev_pm_opp_of_cpumask_remove_table(policy->cpus);
 	if (name)
-		dev_pm_opp_put_regulator(cpu_dev);
+		dev_pm_opp_put_regulator(opp_table);
 out_put_clk:
 	clk_put(cpu_clk);
 
@@ -300,7 +304,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
 	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	if (priv->reg_name)
-		dev_pm_opp_put_regulator(priv->cpu_dev);
+		dev_pm_opp_put_regulator(priv->opp_table);
 
 	clk_put(policy->clk);
 	kfree(priv);
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index bca26157f5b6..f6bc76501912 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -19,6 +19,7 @@
 
 struct dev_pm_opp;
 struct device;
+struct opp_table;
 
 enum dev_pm_opp_event {
 	OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
@@ -62,8 +63,8 @@ int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
 void dev_pm_opp_put_supported_hw(struct device *dev);
 int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
 void dev_pm_opp_put_prop_name(struct device *dev);
-int dev_pm_opp_set_regulator(struct device *dev, const char *name);
-void dev_pm_opp_put_regulator(struct device *dev);
+struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name);
+void dev_pm_opp_put_regulator(struct opp_table *opp_table);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
@@ -170,12 +171,12 @@ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 
 static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
 
-static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name)
+static inline struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name)
 {
-	return -ENOTSUPP;
+	return ERR_PTR(-ENOTSUPP);
 }
 
-static inline void dev_pm_opp_put_regulator(struct device *dev) {}
+static inline void dev_pm_opp_put_regulator(struct opp_table *opp_table) {}
 
 static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
-- 
cgit v1.2.3


From 5fdca6531434c1c1b2d584873afdda52e5ad448c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 29 Nov 2016 11:04:42 -0500
Subject: svcrdma: Renovate sendto chunk list parsing

The current sendto code appears to support clients that provide only
one of a Read list, a Write list, or a Reply chunk. My reading of
that code is that it doesn't support the following cases:

 - Read list + Write list
 - Read list + Reply chunk
 - Write list + Reply chunk
 - Read list + Write list + Reply chunk

The protocol allows more than one Read or Write chunk in those
lists. Some clients do send a Read list and Reply chunk
simultaneously. NFSv4 WRITE uses a Read list for the data payload,
and a Reply chunk because the GETATTR result in the reply can
contain a large object like an ACL.

Generalize one of the sendto code paths needed to support all of
the above cases, and attempt to ensure that only one pass is done
through the RPC Call's transport header to gather chunk list
information for building the reply.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h         |  2 -
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 14 +++++
 net/sunrpc/xprtrdma/svc_rdma_sendto.c   | 92 +++++++++------------------------
 3 files changed, 39 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index cc3ae16eac68..6aef63b9a669 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -236,8 +236,6 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
 extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
 			    struct svc_rdma_req_map *, bool);
 extern int svc_rdma_sendto(struct svc_rqst *);
-extern struct rpcrdma_read_chunk *
-	svc_rdma_get_read_chunk(struct rpcrdma_msg *);
 extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
 				int);
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ad1df979b3f0..873c2a938d35 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -415,6 +415,20 @@ done:
 	return 1;
 }
 
+/* Returns the address of the first read chunk or <nul> if no read chunk
+ * is present
+ */
+static struct rpcrdma_read_chunk *
+svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
+{
+	struct rpcrdma_read_chunk *ch =
+		(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+
+	if (ch->rc_discrim == xdr_zero)
+		return NULL;
+	return ch;
+}
+
 static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 			    struct rpcrdma_msg *rmsgp,
 			    struct svc_rqst *rqstp,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index f5a91edcd233..0a58d4062f2f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -153,76 +153,35 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
 	return dma_addr;
 }
 
-/* Returns the address of the first read chunk or <nul> if no read chunk
- * is present
+/* Parse the RPC Call's transport header.
  */
-struct rpcrdma_read_chunk *
-svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
+static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
+				      struct rpcrdma_write_array **write,
+				      struct rpcrdma_write_array **reply)
 {
-	struct rpcrdma_read_chunk *ch =
-		(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+	__be32 *p;
 
-	if (ch->rc_discrim == xdr_zero)
-		return NULL;
-	return ch;
-}
-
-/* Returns the address of the first read write array element or <nul>
- * if no write array list is present
- */
-static struct rpcrdma_write_array *
-svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
-{
-	if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
-	    rmsgp->rm_body.rm_chunks[1] == xdr_zero)
-		return NULL;
-	return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
-}
+	p = (__be32 *)&rmsgp->rm_body.rm_chunks[0];
 
-/* Returns the address of the first reply array element or <nul> if no
- * reply array is present
- */
-static struct rpcrdma_write_array *
-svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
-			 struct rpcrdma_write_array *wr_ary)
-{
-	struct rpcrdma_read_chunk *rch;
-	struct rpcrdma_write_array *rp_ary;
+	/* Read list */
+	while (*p++ != xdr_zero)
+		p += 5;
 
-	/* XXX: Need to fix when reply chunk may occur with read list
-	 *	and/or write list.
-	 */
-	if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
-	    rmsgp->rm_body.rm_chunks[1] != xdr_zero)
-		return NULL;
-
-	rch = svc_rdma_get_read_chunk(rmsgp);
-	if (rch) {
-		while (rch->rc_discrim != xdr_zero)
-			rch++;
-
-		/* The reply chunk follows an empty write array located
-		 * at 'rc_position' here. The reply array is at rc_target.
-		 */
-		rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
-		goto found_it;
-	}
-
-	if (wr_ary) {
-		int chunk = be32_to_cpu(wr_ary->wc_nchunks);
-
-		rp_ary = (struct rpcrdma_write_array *)
-			 &wr_ary->wc_array[chunk].wc_target.rs_length;
-		goto found_it;
+	/* Write list */
+	if (*p != xdr_zero) {
+		*write = (struct rpcrdma_write_array *)p;
+		while (*p++ != xdr_zero)
+			p += 1 + be32_to_cpu(*p) * 4;
+	} else {
+		*write = NULL;
+		p++;
 	}
 
-	/* No read list, no write list */
-	rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2];
-
- found_it:
-	if (rp_ary->wc_discrim == xdr_zero)
-		return NULL;
-	return rp_ary;
+	/* Reply chunk */
+	if (*p != xdr_zero)
+		*reply = (struct rpcrdma_write_array *)p;
+	else
+		*reply = NULL;
 }
 
 /* RPC-over-RDMA Version One private extension: Remote Invalidation.
@@ -244,8 +203,8 @@ static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
 
 	inv_rkey = 0;
 
-	rd_ary = svc_rdma_get_read_chunk(rdma_argp);
-	if (rd_ary) {
+	rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0];
+	if (rd_ary->rc_discrim != xdr_zero) {
 		inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
 		goto out;
 	}
@@ -622,8 +581,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	 * places this at the start of page 0.
 	 */
 	rdma_argp = page_address(rqstp->rq_pages[0]);
-	wr_ary = svc_rdma_get_write_array(rdma_argp);
-	rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
+	svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary);
 
 	inv_rkey = 0;
 	if (rdma->sc_snd_w_inv)
-- 
cgit v1.2.3


From e4eb42cecc6dc546aac888ee4913d59121e886ee Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 29 Nov 2016 11:04:50 -0500
Subject: svcrdma: Remove BH-disabled spin locking in svc_rdma_send()

svcrdma's current SQ accounting algorithm takes sc_lock and disables
bottom-halves while posting all RDMA Read, Write, and Send WRs.

This is relatively heavyweight serialization. And note that Write and
Send are already fully serialized by the xpt_mutex.

Using a single atomic_t should be all that is necessary to guarantee
that ib_post_send() is called only when there is enough space on the
send queue. This is what the other RDMA-enabled storage targets do.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    |  7 ++++++-
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 25 ++++++++++---------------
 3 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 6aef63b9a669..601cb07aa746 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -139,7 +139,7 @@ struct svcxprt_rdma {
 	int                  sc_max_sge_rd;	/* max sge for read target */
 	bool		     sc_snd_w_inv;	/* OK to use Send With Invalidate */
 
-	atomic_t             sc_sq_count;	/* Number of SQ WR on queue */
+	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
 	unsigned int	     sc_sq_depth;	/* Depth of SQ */
 	unsigned int	     sc_rq_depth;	/* Depth of RQ */
 	u32		     sc_max_requests;	/* Forward credits */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 0a58d4062f2f..30eeab527bd0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -594,7 +594,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 		goto err0;
 	inline_bytes = rqstp->rq_res.len;
 
-	/* Create the RDMA response header */
+	/* Create the RDMA response header. xprt->xpt_mutex,
+	 * acquired in svc_send(), serializes RPC replies. The
+	 * code path below that inserts the credit grant value
+	 * into each transport header runs only inside this
+	 * critical section.
+	 */
 	ret = -ENOMEM;
 	res_page = alloc_page(GFP_KERNEL);
 	if (!res_page)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 6864fb967038..da990d7f8b20 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -434,7 +434,7 @@ static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
 		goto err;
 
 out:
-	atomic_dec(&xprt->sc_sq_count);
+	atomic_inc(&xprt->sc_sq_avail);
 	wake_up(&xprt->sc_send_wait);
 	return;
 
@@ -1008,6 +1008,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	newxprt->sc_rq_depth = newxprt->sc_max_requests +
 			       newxprt->sc_max_bc_requests;
 	newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+	atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
 
 	if (!svc_rdma_prealloc_ctxts(newxprt))
 		goto errout;
@@ -1333,15 +1334,13 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 
 	/* If the SQ is full, wait until an SQ entry is available */
 	while (1) {
-		spin_lock_bh(&xprt->sc_lock);
-		if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
-			spin_unlock_bh(&xprt->sc_lock);
+		if ((atomic_sub_return(wr_count, &xprt->sc_sq_avail) < 0)) {
 			atomic_inc(&rdma_stat_sq_starve);
 
 			/* Wait until SQ WR available if SQ still full */
+			atomic_add(wr_count, &xprt->sc_sq_avail);
 			wait_event(xprt->sc_send_wait,
-				   atomic_read(&xprt->sc_sq_count) <
-				   xprt->sc_sq_depth);
+				   atomic_read(&xprt->sc_sq_avail) > wr_count);
 			if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
 				return -ENOTCONN;
 			continue;
@@ -1351,21 +1350,17 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 			svc_xprt_get(&xprt->sc_xprt);
 
 		/* Bump used SQ WR count and post */
-		atomic_add(wr_count, &xprt->sc_sq_count);
 		ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
 		if (ret) {
 			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-			atomic_sub(wr_count, &xprt->sc_sq_count);
 			for (i = 0; i < wr_count; i ++)
 				svc_xprt_put(&xprt->sc_xprt);
-			dprintk("svcrdma: failed to post SQ WR rc=%d, "
-			       "sc_sq_count=%d, sc_sq_depth=%d\n",
-			       ret, atomic_read(&xprt->sc_sq_count),
-			       xprt->sc_sq_depth);
-		}
-		spin_unlock_bh(&xprt->sc_lock);
-		if (ret)
+			dprintk("svcrdma: failed to post SQ WR rc=%d\n", ret);
+			dprintk("    sc_sq_avail=%d, sc_sq_depth=%d\n",
+				atomic_read(&xprt->sc_sq_avail),
+				xprt->sc_sq_depth);
 			wake_up(&xprt->sc_send_wait);
+		}
 		break;
 	}
 	return ret;
-- 
cgit v1.2.3


From dd6fd213b05e7a1f590b470500343dd97c3a32c1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 29 Nov 2016 11:04:58 -0500
Subject: svcrdma: Remove DMA map accounting

Clean up: sc_dma_used is not required for correct operation. It is
simply a debugging tool to report when svcrdma has leaked DMA maps.

However, manipulating an atomic has a measurable CPU cost, and DMA
map accounting specific to svcrdma will be meaningless once svcrdma
is converted to use the new generic r/w API.

A similar kind of debug accounting can be done simply by enabling
the IOMMU or by using CONFIG_DMA_API_DEBUG, CONFIG_IOMMU_DEBUG, and
CONFIG_IOMMU_LEAK.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  2 --
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  |  1 -
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 13 +++----------
 3 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 601cb07aa746..43d7c709d117 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -148,7 +148,6 @@ struct svcxprt_rdma {
 
 	struct ib_pd         *sc_pd;
 
-	atomic_t	     sc_dma_used;
 	spinlock_t	     sc_ctxt_lock;
 	struct list_head     sc_ctxts;
 	int		     sc_ctxt_used;
@@ -200,7 +199,6 @@ static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
 					   struct svc_rdma_op_ctxt *ctxt)
 {
 	ctxt->mapped_sges++;
-	atomic_inc(&rdma->sc_dma_used);
 }
 
 /* svc_rdma_backchannel.c */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 873c2a938d35..283246e0afc2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -279,7 +279,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 		       frmr->sg);
 		return -ENOMEM;
 	}
-	atomic_inc(&xprt->sc_dma_used);
 
 	n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
 	if (unlikely(n != frmr->sg_nents)) {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index da990d7f8b20..a613ebd1dd85 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -224,25 +224,22 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 	struct svcxprt_rdma *xprt = ctxt->xprt;
 	struct ib_device *device = xprt->sc_cm_id->device;
 	u32 lkey = xprt->sc_pd->local_dma_lkey;
-	unsigned int i, count;
+	unsigned int i;
 
-	for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
+	for (i = 0; i < ctxt->mapped_sges; i++) {
 		/*
 		 * Unmap the DMA addr in the SGE if the lkey matches
 		 * the local_dma_lkey, otherwise, ignore it since it is
 		 * an FRMR lkey and will be unmapped later when the
 		 * last WR that uses it completes.
 		 */
-		if (ctxt->sge[i].lkey == lkey) {
-			count++;
+		if (ctxt->sge[i].lkey == lkey)
 			ib_dma_unmap_page(device,
 					    ctxt->sge[i].addr,
 					    ctxt->sge[i].length,
 					    ctxt->direction);
-		}
 	}
 	ctxt->mapped_sges = 0;
-	atomic_sub(count, &xprt->sc_dma_used);
 }
 
 void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -944,7 +941,6 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
 	if (frmr) {
 		ib_dma_unmap_sg(rdma->sc_cm_id->device,
 				frmr->sg, frmr->sg_nents, frmr->direction);
-		atomic_dec(&rdma->sc_dma_used);
 		spin_lock_bh(&rdma->sc_frmr_q_lock);
 		WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
 		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
@@ -1256,9 +1252,6 @@ static void __svc_rdma_free(struct work_struct *work)
 	if (rdma->sc_ctxt_used != 0)
 		pr_err("svcrdma: ctxt still in use? (%d)\n",
 		       rdma->sc_ctxt_used);
-	if (atomic_read(&rdma->sc_dma_used) != 0)
-		pr_err("svcrdma: dma still in use? (%d)\n",
-		       atomic_read(&rdma->sc_dma_used));
 
 	/* Final put of backchannel client transport */
 	if (xprt->xpt_bc_xprt) {
-- 
cgit v1.2.3


From 96a58f9c1921f28fab5ed008be791adacb540cc6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 29 Nov 2016 11:05:07 -0500
Subject: svcrdma: Remove svc_rdma_op_ctxt::wc_status

Clean up: Completion status is already reported in the individual
completion handlers. Save a few bytes in struct svc_rdma_op_ctxt.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          | 1 -
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 4 ++--
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 43d7c709d117..757fb963696c 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -79,7 +79,6 @@ struct svc_rdma_op_ctxt {
 	struct ib_cqe reg_cqe;
 	struct ib_cqe inv_cqe;
 	struct list_head dto_q;
-	enum ib_wc_status wc_status;
 	u32 byte_len;
 	u32 position;
 	struct svcxprt_rdma *xprt;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 283246e0afc2..428ab4ef77f3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -640,8 +640,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 			goto defer;
 		goto out;
 	}
-	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
-		ctxt, rdma_xprt, rqstp, ctxt->wc_status);
+	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n",
+		ctxt, rdma_xprt, rqstp);
 	atomic_inc(&rdma_stat_recv);
 
 	/* Build up the XDR from the receive buffers. */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index a613ebd1dd85..f9d961c00dd2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -393,7 +393,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 
 	/* WARNING: Only wc->wr_cqe and wc->status are reliable */
 	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
-	ctxt->wc_status = wc->status;
 	svc_rdma_unmap_dma(ctxt);
 
 	if (wc->status != IB_WC_SUCCESS)
-- 
cgit v1.2.3


From 0a135b88bceac40d0036e401c19cdbda65b38a8f Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Wed, 30 Nov 2016 17:01:50 -0800
Subject: Input: synaptics-rmi4 - have only one struct platform data

If struct rmi_device_platform_data contains pointers to other struct,
it gets difficult to allocate a fixed size struct and copy it over between
drivers.

Change the pointers into a struct and change the code in rmi4 accordingly.

Reviewed-by: Andrew Duggan <aduggan@synaptics.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_f11.c | 4 ++--
 drivers/input/rmi4/rmi_f12.c | 4 ++--
 drivers/input/rmi4/rmi_f30.c | 9 ++++-----
 include/linux/rmi.h          | 4 ++--
 4 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
index c252d405df4c..ffcbbc1745de 100644
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -1080,8 +1080,8 @@ static int rmi_f11_initialize(struct rmi_function *fn)
 		rc = rmi_2d_sensor_of_probe(&fn->dev, &f11->sensor_pdata);
 		if (rc)
 			return rc;
-	} else if (pdata->sensor_pdata) {
-		f11->sensor_pdata = *pdata->sensor_pdata;
+	} else {
+		f11->sensor_pdata = pdata->sensor_pdata;
 	}
 
 	f11->rezero_wait_ms = f11->sensor_pdata.rezero_wait;
diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c
index d7255f18306d..82a4964e5eb9 100644
--- a/drivers/input/rmi4/rmi_f12.c
+++ b/drivers/input/rmi4/rmi_f12.c
@@ -352,8 +352,8 @@ static int rmi_f12_probe(struct rmi_function *fn)
 		ret = rmi_2d_sensor_of_probe(&fn->dev, &f12->sensor_pdata);
 		if (ret)
 			return ret;
-	} else if (pdata->sensor_pdata) {
-		f12->sensor_pdata = *pdata->sensor_pdata;
+	} else {
+		f12->sensor_pdata = pdata->sensor_pdata;
 	}
 
 	ret = rmi_read_register_desc(rmi_dev, query_addr,
diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c
index 485907ff10f4..f696137a56f5 100644
--- a/drivers/input/rmi4/rmi_f30.c
+++ b/drivers/input/rmi4/rmi_f30.c
@@ -196,7 +196,7 @@ static int rmi_f30_config(struct rmi_function *fn)
 				rmi_get_platform_data(fn->rmi_dev);
 	int error;
 
-	if (pdata->f30_data && pdata->f30_data->disable) {
+	if (pdata->f30_data.disable) {
 		drv->clear_irq_bits(fn->rmi_dev, fn->irq_mask);
 	} else {
 		/* Write Control Register values back to device */
@@ -355,7 +355,7 @@ static inline int rmi_f30_initialize(struct rmi_function *fn)
 	f30->gpioled_key_map = (u16 *)map_memory;
 
 	pdata = rmi_get_platform_data(rmi_dev);
-	if (pdata && f30->has_gpio) {
+	if (f30->has_gpio) {
 		button = BTN_LEFT;
 		for (i = 0; i < f30->gpioled_count; i++) {
 			if (rmi_f30_is_valid_button(i, f30->ctrl)) {
@@ -366,8 +366,7 @@ static inline int rmi_f30_initialize(struct rmi_function *fn)
 				 * f30->has_mech_mouse_btns, but I am
 				 * not sure, so use only the pdata info
 				 */
-				if (pdata->f30_data &&
-				    pdata->f30_data->buttonpad)
+				if (pdata->f30_data.buttonpad)
 					break;
 			}
 		}
@@ -382,7 +381,7 @@ static int rmi_f30_probe(struct rmi_function *fn)
 	const struct rmi_device_platform_data *pdata =
 				rmi_get_platform_data(fn->rmi_dev);
 
-	if (pdata->f30_data && pdata->f30_data->disable)
+	if (pdata->f30_data.disable)
 		return 0;
 
 	rc = rmi_f30_initialize(fn);
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index 621f098f1243..7780e40a2573 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -218,9 +218,9 @@ struct rmi_device_platform_data {
 	struct rmi_device_platform_data_spi spi_data;
 
 	/* function handler pdata */
-	struct rmi_2d_sensor_platform_data *sensor_pdata;
+	struct rmi_2d_sensor_platform_data sensor_pdata;
 	struct rmi_f01_power_management power_management;
-	struct rmi_f30_data *f30_data;
+	struct rmi_f30_data f30_data;
 };
 
 /**
-- 
cgit v1.2.3


From e16769d4bca67218531505ad1a8365d227fedcf9 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Mon, 28 Nov 2016 13:22:42 +0100
Subject: fs: configfs: don't return anything from drop_link

Documentation/filesystems/configfs/configfs.txt says:

"When unlink(2) is called on the symbolic link, the source item is
notified via the ->drop_link() method.  Like the ->drop_item() method,
this is a void function and cannot return failure."

The ->drop_item() is indeed a void function, the ->drop_link() is
actually not. This, together with the fact that the value of ->drop_link()
is silently ignored suggests, that it is the ->drop_link() return
type that should be corrected and changed to void.

This patch changes drop_link() signature and all its users.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
[hch: reverted reformatting of some code]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/filesystems/configfs/configfs.txt |  2 +-
 drivers/nvme/target/configfs.c                  | 10 ++++------
 drivers/target/target_core_fabric_configfs.c    |  7 +++----
 drivers/usb/gadget/configfs.c                   |  8 +++-----
 drivers/usb/gadget/function/uvc_configfs.c      | 25 +++++--------------------
 include/linux/configfs.h                        |  2 +-
 6 files changed, 17 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
index 8ec9136aae56..3828e85345ae 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -174,7 +174,7 @@ among other things.  For that, it needs a type.
 		void (*release)(struct config_item *);
 		int (*allow_link)(struct config_item *src,
 				  struct config_item *target);
-		int (*drop_link)(struct config_item *src,
+		void (*drop_link)(struct config_item *src,
 				 struct config_item *target);
 	};
 
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index af5e2dc4a3d5..a780e2dbc940 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -466,7 +466,7 @@ out_free_link:
 	return ret;
 }
 
-static int nvmet_port_subsys_drop_link(struct config_item *parent,
+static void nvmet_port_subsys_drop_link(struct config_item *parent,
 		struct config_item *target)
 {
 	struct nvmet_port *port = to_nvmet_port(parent->ci_parent);
@@ -479,7 +479,7 @@ static int nvmet_port_subsys_drop_link(struct config_item *parent,
 			goto found;
 	}
 	up_write(&nvmet_config_sem);
-	return -EINVAL;
+	return;
 
 found:
 	list_del(&p->entry);
@@ -488,7 +488,6 @@ found:
 		nvmet_disable_port(port);
 	up_write(&nvmet_config_sem);
 	kfree(p);
-	return 0;
 }
 
 static struct configfs_item_operations nvmet_port_subsys_item_ops = {
@@ -542,7 +541,7 @@ out_free_link:
 	return ret;
 }
 
-static int nvmet_allowed_hosts_drop_link(struct config_item *parent,
+static void nvmet_allowed_hosts_drop_link(struct config_item *parent,
 		struct config_item *target)
 {
 	struct nvmet_subsys *subsys = to_subsys(parent->ci_parent);
@@ -555,14 +554,13 @@ static int nvmet_allowed_hosts_drop_link(struct config_item *parent,
 			goto found;
 	}
 	up_write(&nvmet_config_sem);
-	return -EINVAL;
+	return;
 
 found:
 	list_del(&p->entry);
 	nvmet_genctr++;
 	up_write(&nvmet_config_sem);
 	kfree(p);
-	return 0;
 }
 
 static struct configfs_item_operations nvmet_allowed_hosts_item_ops = {
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 31a096aa16ab..d8a16ca6baa5 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -137,7 +137,7 @@ static int target_fabric_mappedlun_link(
 	return core_dev_add_initiator_node_lun_acl(se_tpg, lacl, lun, lun_access_ro);
 }
 
-static int target_fabric_mappedlun_unlink(
+static void target_fabric_mappedlun_unlink(
 	struct config_item *lun_acl_ci,
 	struct config_item *lun_ci)
 {
@@ -146,7 +146,7 @@ static int target_fabric_mappedlun_unlink(
 	struct se_lun *lun = container_of(to_config_group(lun_ci),
 			struct se_lun, lun_group);
 
-	return core_dev_del_initiator_node_lun_acl(lun, lacl);
+	core_dev_del_initiator_node_lun_acl(lun, lacl);
 }
 
 static struct se_lun_acl *item_to_lun_acl(struct config_item *item)
@@ -669,7 +669,7 @@ out:
 	return ret;
 }
 
-static int target_fabric_port_unlink(
+static void target_fabric_port_unlink(
 	struct config_item *lun_ci,
 	struct config_item *se_dev_ci)
 {
@@ -688,7 +688,6 @@ static int target_fabric_port_unlink(
 	}
 
 	core_dev_del_lun(se_tpg, lun);
-	return 0;
 }
 
 static void target_fabric_port_release(struct config_item *item)
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 3984787f8e97..78c44979dde3 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -408,7 +408,7 @@ out:
 	return ret;
 }
 
-static int config_usb_cfg_unlink(
+static void config_usb_cfg_unlink(
 	struct config_item *usb_cfg_ci,
 	struct config_item *usb_func_ci)
 {
@@ -437,12 +437,11 @@ static int config_usb_cfg_unlink(
 			list_del(&f->list);
 			usb_put_function(f);
 			mutex_unlock(&gi->lock);
-			return 0;
+			return;
 		}
 	}
 	mutex_unlock(&gi->lock);
 	WARN(1, "Unable to locate function to unbind\n");
-	return 0;
 }
 
 static struct configfs_item_operations gadget_config_item_ops = {
@@ -865,7 +864,7 @@ out:
 	return ret;
 }
 
-static int os_desc_unlink(struct config_item *os_desc_ci,
+static void os_desc_unlink(struct config_item *os_desc_ci,
 			  struct config_item *usb_cfg_ci)
 {
 	struct gadget_info *gi = container_of(to_config_group(os_desc_ci),
@@ -878,7 +877,6 @@ static int os_desc_unlink(struct config_item *os_desc_ci,
 	cdev->os_desc_config = NULL;
 	WARN_ON(gi->composite.gadget_driver.udc_name);
 	mutex_unlock(&gi->lock);
-	return 0;
 }
 
 static struct configfs_item_operations os_desc_ops = {
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index 31125a4a2658..4e037d2a7a60 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -547,7 +547,7 @@ out:
 	return ret;
 }
 
-static int uvcg_control_class_drop_link(struct config_item *src,
+static void uvcg_control_class_drop_link(struct config_item *src,
 					struct config_item *target)
 {
 	struct config_item *control, *header;
@@ -555,7 +555,6 @@ static int uvcg_control_class_drop_link(struct config_item *src,
 	struct mutex *su_mutex = &src->ci_group->cg_subsys->su_mutex;
 	struct uvc_descriptor_header **class_array;
 	struct uvcg_control_header *target_hdr;
-	int ret = -EINVAL;
 
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */
 
@@ -569,23 +568,17 @@ static int uvcg_control_class_drop_link(struct config_item *src,
 	mutex_lock(&opts->lock);
 
 	class_array = uvcg_get_ctl_class_arr(src, opts);
-	if (!class_array)
-		goto unlock;
-	if (opts->refcnt) {
-		ret = -EBUSY;
+	if (!class_array || opts->refcnt)
 		goto unlock;
-	}
 
 	target_hdr = to_uvcg_control_header(target);
 	--target_hdr->linked;
 	class_array[0] = NULL;
-	ret = 0;
 
 unlock:
 	mutex_unlock(&opts->lock);
 out:
 	mutex_unlock(su_mutex);
-	return ret;
 }
 
 static struct configfs_item_operations uvcg_control_class_item_ops = {
@@ -777,7 +770,7 @@ out:
 	return ret;
 }
 
-static int uvcg_streaming_header_drop_link(struct config_item *src,
+static void uvcg_streaming_header_drop_link(struct config_item *src,
 					   struct config_item *target)
 {
 	struct mutex *su_mutex = &src->ci_group->cg_subsys->su_mutex;
@@ -786,7 +779,6 @@ static int uvcg_streaming_header_drop_link(struct config_item *src,
 	struct uvcg_streaming_header *src_hdr;
 	struct uvcg_format *target_fmt = NULL;
 	struct uvcg_format_ptr *format_ptr, *tmp;
-	int ret = -EINVAL;
 
 	src_hdr = to_uvcg_streaming_header(src);
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */
@@ -811,8 +803,6 @@ static int uvcg_streaming_header_drop_link(struct config_item *src,
 out:
 	mutex_unlock(&opts->lock);
 	mutex_unlock(su_mutex);
-	return ret;
-
 }
 
 static struct configfs_item_operations uvcg_streaming_header_item_ops = {
@@ -2051,7 +2041,7 @@ out:
 	return ret;
 }
 
-static int uvcg_streaming_class_drop_link(struct config_item *src,
+static void uvcg_streaming_class_drop_link(struct config_item *src,
 					  struct config_item *target)
 {
 	struct config_item *streaming, *header;
@@ -2059,7 +2049,6 @@ static int uvcg_streaming_class_drop_link(struct config_item *src,
 	struct mutex *su_mutex = &src->ci_group->cg_subsys->su_mutex;
 	struct uvc_descriptor_header ***class_array;
 	struct uvcg_streaming_header *target_hdr;
-	int ret = -EINVAL;
 
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */
 
@@ -2076,23 +2065,19 @@ static int uvcg_streaming_class_drop_link(struct config_item *src,
 	if (!class_array || !*class_array)
 		goto unlock;
 
-	if (opts->refcnt) {
-		ret = -EBUSY;
+	if (opts->refcnt)
 		goto unlock;
-	}
 
 	target_hdr = to_uvcg_streaming_header(target);
 	--target_hdr->linked;
 	kfree(**class_array);
 	kfree(*class_array);
 	*class_array = NULL;
-	ret = 0;
 
 unlock:
 	mutex_unlock(&opts->lock);
 out:
 	mutex_unlock(su_mutex);
-	return ret;
 }
 
 static struct configfs_item_operations uvcg_streaming_class_item_ops = {
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index d9d6a9d77489..9a30b921f740 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -228,7 +228,7 @@ static struct configfs_bin_attribute _pfx##attr_##_name = {	\
 struct configfs_item_operations {
 	void (*release)(struct config_item *);
 	int (*allow_link)(struct config_item *src, struct config_item *target);
-	int (*drop_link)(struct config_item *src, struct config_item *target);
+	void (*drop_link)(struct config_item *src, struct config_item *target);
 };
 
 struct configfs_group_operations {
-- 
cgit v1.2.3


From 864e0981fd1bc042c6e932d1731d358dde60fed9 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Wed, 30 Nov 2016 15:16:08 +0200
Subject: crypto: api - fix comment typo

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 8348d83d8b5e..167aea29d41e 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -90,7 +90,7 @@
 #define CRYPTO_ALG_TESTED		0x00000400
 
 /*
- * Set if the algorithm is an instance that is build from templates.
+ * Set if the algorithm is an instance that is built from templates.
  */
 #define CRYPTO_ALG_INSTANCE		0x00000800
 
-- 
cgit v1.2.3


From 4a057549d6044c2dea47e80f8369a76225ec9d90 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Mon, 28 Nov 2016 14:35:21 -0800
Subject: alarmtimer: Add tracepoints for alarm timers

Alarm timers are one of the mechanisms to wake up a system from suspend,
but there exist no tracepoints to analyse which process/thread armed an
alarmtimer.

Add tracepoints for start/cancel/expire of individual alarm timers and one
for tracing the suspend time decision when to resume the system.

The following trace excerpt illustrates the new mechanism:

Binder:3292_2-3304  [000] d..2   149.981123: alarmtimer_cancel:
alarmtimer:ffffffc1319a7800 type:REALTIME
expires:1325463120000000000 now:1325376810370370245

Binder:3292_2-3304  [000] d..2   149.981136: alarmtimer_start:
alarmtimer:ffffffc1319a7800 type:REALTIME
expires:1325376840000000000 now:1325376810370384591

Binder:3292_9-3953  [000] d..2   150.212991: alarmtimer_cancel:
alarmtimer:ffffffc1319a5a00 type:BOOTTIME
expires:179552000000 now:150154008122

Binder:3292_9-3953  [000] d..2   150.213006: alarmtimer_start:
alarmtimer:ffffffc1319a5a00 type:BOOTTIME
expires:179551000000 now:150154025622

system_server-3000  [002] ...1  162.701940: alarmtimer_suspend:
alarmtimer type:REALTIME expires:1325376840000000000

The wakeup time which is selected at suspend time allows to map it back to
the task arming the timer: Binder:3292_2.

[ tglx: Store alarm timer expiry time instead of some useless RTC relative
  	information, add proper type information for wakeups which are
  	handled via the clock_nanosleep/freezer and massage the changelog. ]

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Link: http://lkml.kernel.org/r/1480372524-15181-5-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/alarmtimer.h        |  5 ++
 include/trace/events/alarmtimer.h | 96 +++++++++++++++++++++++++++++++++++++++
 kernel/time/alarmtimer.c          | 53 +++++++++++++++++----
 3 files changed, 144 insertions(+), 10 deletions(-)
 create mode 100644 include/trace/events/alarmtimer.h

(limited to 'include/linux')

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 9d8031257a90..c70aac13244a 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -10,7 +10,12 @@ enum alarmtimer_type {
 	ALARM_REALTIME,
 	ALARM_BOOTTIME,
 
+	/* Supported types end here */
 	ALARM_NUMTYPE,
+
+	/* Used for tracing information. No usable types. */
+	ALARM_REALTIME_FREEZER,
+	ALARM_BOOTTIME_FREEZER,
 };
 
 enum alarmtimer_restart {
diff --git a/include/trace/events/alarmtimer.h b/include/trace/events/alarmtimer.h
new file mode 100644
index 000000000000..a1c108c16c9c
--- /dev/null
+++ b/include/trace/events/alarmtimer.h
@@ -0,0 +1,96 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM alarmtimer
+
+#if !defined(_TRACE_ALARMTIMER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ALARMTIMER_H
+
+#include <linux/alarmtimer.h>
+#include <linux/rtc.h>
+#include <linux/tracepoint.h>
+
+TRACE_DEFINE_ENUM(ALARM_REALTIME);
+TRACE_DEFINE_ENUM(ALARM_BOOTTIME);
+TRACE_DEFINE_ENUM(ALARM_REALTIME_FREEZER);
+TRACE_DEFINE_ENUM(ALARM_BOOTTIME_FREEZER);
+
+#define show_alarm_type(type)	__print_flags(type, " | ",	\
+	{ 1 << ALARM_REALTIME, "REALTIME" },			\
+	{ 1 << ALARM_BOOTTIME, "BOOTTIME" },			\
+	{ 1 << ALARM_REALTIME_FREEZER, "REALTIME Freezer" },	\
+	{ 1 << ALARM_BOOTTIME_FREEZER, "BOOTTIME Freezer" })
+
+TRACE_EVENT(alarmtimer_suspend,
+
+	TP_PROTO(ktime_t expires, int flag),
+
+	TP_ARGS(expires, flag),
+
+	TP_STRUCT__entry(
+		__field(s64, expires)
+		__field(unsigned char, alarm_type)
+	),
+
+	TP_fast_assign(
+		__entry->expires = expires.tv64;
+		__entry->alarm_type = flag;
+	),
+
+	TP_printk("alarmtimer type:%s expires:%llu",
+		  show_alarm_type((1 << __entry->alarm_type)),
+		  __entry->expires
+	)
+);
+
+DECLARE_EVENT_CLASS(alarm_class,
+
+	TP_PROTO(struct alarm *alarm, ktime_t now),
+
+	TP_ARGS(alarm, now),
+
+	TP_STRUCT__entry(
+		__field(void *,	alarm)
+		__field(unsigned char, alarm_type)
+		__field(s64, expires)
+		__field(s64, now)
+	),
+
+	TP_fast_assign(
+		__entry->alarm = alarm;
+		__entry->alarm_type = alarm->type;
+		__entry->expires = alarm->node.expires.tv64;
+		__entry->now = now.tv64;
+	),
+
+	TP_printk("alarmtimer:%p type:%s expires:%llu now:%llu",
+		  __entry->alarm,
+		  show_alarm_type((1 << __entry->alarm_type)),
+		  __entry->expires,
+		  __entry->now
+	)
+);
+
+DEFINE_EVENT(alarm_class, alarmtimer_fired,
+
+	TP_PROTO(struct alarm *alarm, ktime_t now),
+
+	TP_ARGS(alarm, now)
+);
+
+DEFINE_EVENT(alarm_class, alarmtimer_start,
+
+	TP_PROTO(struct alarm *alarm, ktime_t now),
+
+	TP_ARGS(alarm, now)
+);
+
+DEFINE_EVENT(alarm_class, alarmtimer_cancel,
+
+	TP_PROTO(struct alarm *alarm, ktime_t now),
+
+	TP_ARGS(alarm, now)
+);
+
+#endif /* _TRACE_ALARMTIMER_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index a15caa3d1721..9b08ca391aed 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,6 +26,9 @@
 #include <linux/workqueue.h>
 #include <linux/freezer.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/alarmtimer.h>
+
 /**
  * struct alarm_base - Alarm timer bases
  * @lock:		Lock for syncrhonized access to the base
@@ -40,7 +43,9 @@ static struct alarm_base {
 	clockid_t		base_clockid;
 } alarm_bases[ALARM_NUMTYPE];
 
-/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */
+/* freezer information to handle clock_nanosleep triggered wakeups */
+static enum alarmtimer_type freezer_alarmtype;
+static ktime_t freezer_expires;
 static ktime_t freezer_delta;
 static DEFINE_SPINLOCK(freezer_delta_lock);
 
@@ -194,6 +199,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
 	}
 	spin_unlock_irqrestore(&base->lock, flags);
 
+	trace_alarmtimer_fired(alarm, base->gettime());
 	return ret;
 
 }
@@ -218,15 +224,16 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
  */
 static int alarmtimer_suspend(struct device *dev)
 {
-	struct rtc_time tm;
-	ktime_t min, now;
-	unsigned long flags;
+	ktime_t min, now, expires;
+	int i, ret, type;
 	struct rtc_device *rtc;
-	int i;
-	int ret;
+	unsigned long flags;
+	struct rtc_time tm;
 
 	spin_lock_irqsave(&freezer_delta_lock, flags);
 	min = freezer_delta;
+	expires = freezer_expires;
+	type = freezer_alarmtype;
 	freezer_delta = ktime_set(0, 0);
 	spin_unlock_irqrestore(&freezer_delta_lock, flags);
 
@@ -247,8 +254,11 @@ static int alarmtimer_suspend(struct device *dev)
 		if (!next)
 			continue;
 		delta = ktime_sub(next->expires, base->gettime());
-		if (!min.tv64 || (delta.tv64 < min.tv64))
+		if (!min.tv64 || (delta.tv64 < min.tv64)) {
+			expires = next->expires;
 			min = delta;
+			type = i;
+		}
 	}
 	if (min.tv64 == 0)
 		return 0;
@@ -258,6 +268,8 @@ static int alarmtimer_suspend(struct device *dev)
 		return -EBUSY;
 	}
 
+	trace_alarmtimer_suspend(expires, type);
+
 	/* Setup an rtc timer to fire that far in the future */
 	rtc_timer_cancel(rtc, &rtctimer);
 	rtc_read_time(rtc, &tm);
@@ -295,15 +307,32 @@ static int alarmtimer_resume(struct device *dev)
 
 static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
 {
-	ktime_t delta;
+	struct alarm_base *base;
 	unsigned long flags;
-	struct alarm_base *base = &alarm_bases[type];
+	ktime_t delta;
+
+	switch(type) {
+	case ALARM_REALTIME:
+		base = &alarm_bases[ALARM_REALTIME];
+		type = ALARM_REALTIME_FREEZER;
+		break;
+	case ALARM_BOOTTIME:
+		base = &alarm_bases[ALARM_BOOTTIME];
+		type = ALARM_BOOTTIME_FREEZER;
+		break;
+	default:
+		WARN_ONCE(1, "Invalid alarm type: %d\n", type);
+		return;
+	}
 
 	delta = ktime_sub(absexp, base->gettime());
 
 	spin_lock_irqsave(&freezer_delta_lock, flags);
-	if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64))
+	if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64)) {
 		freezer_delta = delta;
+		freezer_expires = absexp;
+		freezer_alarmtype = type;
+	}
 	spin_unlock_irqrestore(&freezer_delta_lock, flags);
 }
 
@@ -342,6 +371,8 @@ void alarm_start(struct alarm *alarm, ktime_t start)
 	alarmtimer_enqueue(base, alarm);
 	hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS);
 	spin_unlock_irqrestore(&base->lock, flags);
+
+	trace_alarmtimer_start(alarm, base->gettime());
 }
 EXPORT_SYMBOL_GPL(alarm_start);
 
@@ -390,6 +421,8 @@ int alarm_try_to_cancel(struct alarm *alarm)
 	if (ret >= 0)
 		alarmtimer_dequeue(base, alarm);
 	spin_unlock_irqrestore(&base->lock, flags);
+
+	trace_alarmtimer_cancel(alarm, base->gettime());
 	return ret;
 }
 EXPORT_SYMBOL_GPL(alarm_try_to_cancel);
-- 
cgit v1.2.3


From e73c23ff736e1ea371dfa419d7bf8e77ee53044a Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Date: Wed, 30 Nov 2016 12:28:58 -0800
Subject: block: add async variant of blkdev_issue_zeroout

Similar to __blkdev_issue_discard this variant allows submitting
the final bio asynchronously and chaining multiple ranges
into a single completion.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-lib.c        | 115 ++++++++++++++++++++++++++++++++++---------------
 include/linux/blkdev.h |   3 ++
 2 files changed, 84 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 18abda862915..bfb28b03765e 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -137,24 +137,24 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 EXPORT_SYMBOL(blkdev_issue_discard);
 
 /**
- * blkdev_issue_write_same - queue a write same operation
+ * __blkdev_issue_write_same - generate number of bios with same page
  * @bdev:	target blockdev
  * @sector:	start sector
  * @nr_sects:	number of sectors to write
  * @gfp_mask:	memory allocation flags (for bio_alloc)
  * @page:	page containing data to write
+ * @biop:	pointer to anchor bio
  *
  * Description:
- *    Issue a write same request for the sectors in question.
+ *  Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
  */
-int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
-			    sector_t nr_sects, gfp_t gfp_mask,
-			    struct page *page)
+static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+		sector_t nr_sects, gfp_t gfp_mask, struct page *page,
+		struct bio **biop)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 	unsigned int max_write_same_sectors;
-	struct bio *bio = NULL;
-	int ret = 0;
+	struct bio *bio = *biop;
 	sector_t bs_mask;
 
 	if (!q)
@@ -164,6 +164,9 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 	if ((sector | nr_sects) & bs_mask)
 		return -EINVAL;
 
+	if (!bdev_write_same(bdev))
+		return -EOPNOTSUPP;
+
 	/* Ensure that max_write_same_sectors doesn't overflow bi_size */
 	max_write_same_sectors = UINT_MAX >> 9;
 
@@ -185,32 +188,63 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 			bio->bi_iter.bi_size = nr_sects << 9;
 			nr_sects = 0;
 		}
+		cond_resched();
 	}
 
-	if (bio) {
+	*biop = bio;
+	return 0;
+}
+
+/**
+ * blkdev_issue_write_same - queue a write same operation
+ * @bdev:	target blockdev
+ * @sector:	start sector
+ * @nr_sects:	number of sectors to write
+ * @gfp_mask:	memory allocation flags (for bio_alloc)
+ * @page:	page containing data
+ *
+ * Description:
+ *    Issue a write same request for the sectors in question.
+ */
+int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+				sector_t nr_sects, gfp_t gfp_mask,
+				struct page *page)
+{
+	struct bio *bio = NULL;
+	struct blk_plug plug;
+	int ret;
+
+	blk_start_plug(&plug);
+	ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
+			&bio);
+	if (ret == 0 && bio) {
 		ret = submit_bio_wait(bio);
 		bio_put(bio);
 	}
+	blk_finish_plug(&plug);
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_write_same);
 
 /**
- * blkdev_issue_zeroout - generate number of zero filed write bios
+ * __blkdev_issue_zeroout - generate number of zero filed write bios
  * @bdev:	blockdev to issue
  * @sector:	start sector
  * @nr_sects:	number of sectors to write
  * @gfp_mask:	memory allocation flags (for bio_alloc)
+ * @biop:	pointer to anchor bio
+ * @discard:	discard flag
  *
  * Description:
  *  Generate and issue number of bios with zerofiled pages.
  */
-
-static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
-				  sector_t nr_sects, gfp_t gfp_mask)
+int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
+		bool discard)
 {
 	int ret;
-	struct bio *bio = NULL;
+	int bi_size = 0;
+	struct bio *bio = *biop;
 	unsigned int sz;
 	sector_t bs_mask;
 
@@ -218,6 +252,19 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	if ((sector | nr_sects) & bs_mask)
 		return -EINVAL;
 
+	if (discard) {
+		ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
+				BLKDEV_DISCARD_ZERO, biop);
+		if (ret == 0 || (ret && ret != -EOPNOTSUPP))
+			goto out;
+	}
+
+	ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
+			ZERO_PAGE(0), biop);
+	if (ret == 0 || (ret && ret != -EOPNOTSUPP))
+		goto out;
+
+	ret = 0;
 	while (nr_sects != 0) {
 		bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
 				gfp_mask);
@@ -227,21 +274,20 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 
 		while (nr_sects != 0) {
 			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
-			ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
-			nr_sects -= ret >> 9;
-			sector += ret >> 9;
-			if (ret < (sz << 9))
+			bi_size = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
+			nr_sects -= bi_size >> 9;
+			sector += bi_size >> 9;
+			if (bi_size < (sz << 9))
 				break;
 		}
+		cond_resched();
 	}
 
-	if (bio) {
-		ret = submit_bio_wait(bio);
-		bio_put(bio);
-		return ret;
-	}
-	return 0;
+	*biop = bio;
+out:
+	return ret;
 }
+EXPORT_SYMBOL(__blkdev_issue_zeroout);
 
 /**
  * blkdev_issue_zeroout - zero-fill a block range
@@ -263,21 +309,22 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
  *  clearing the block range. Otherwise the zeroing will be performed
  *  using regular WRITE calls.
  */
-
 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 			 sector_t nr_sects, gfp_t gfp_mask, bool discard)
 {
-	if (discard) {
-		if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
-				BLKDEV_DISCARD_ZERO))
-			return 0;
-	}
+	int ret;
+	struct bio *bio = NULL;
+	struct blk_plug plug;
 
-	if (bdev_write_same(bdev) &&
-	    blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
-				    ZERO_PAGE(0)) == 0)
-		return 0;
+	blk_start_plug(&plug);
+	ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
+			&bio, discard);
+	if (ret == 0 && bio) {
+		ret = submit_bio_wait(bio);
+		bio_put(bio);
+	}
+	blk_finish_plug(&plug);
 
-	return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
+	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 541fdd8787a5..7e9d8a0895be 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1269,6 +1269,9 @@ extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		struct bio **biop);
 extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, struct page *page);
+extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
+		bool discard);
 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, bool discard);
 static inline int sb_issue_discard(struct super_block *sb, sector_t block,
-- 
cgit v1.2.3


From a6f0788ec2881ac14e97ff7fa6a78a807f87b5ba Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Date: Wed, 30 Nov 2016 12:28:59 -0800
Subject: block: add support for REQ_OP_WRITE_ZEROES

This adds a new block layer operation to zero out a range of
LBAs. This allows to implement zeroing for devices that don't use
either discard with a predictable zero pattern or WRITE SAME of zeroes.
The prominent example of that is NVMe with the Write Zeroes command,
but in the future, this should also help with improving the way
zeroing discards work. For this operation, suitable entry is exported in
sysfs which indicate the number of maximum bytes allowed in one
write zeroes operation by the device.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/ABI/testing/sysfs-block | 13 ++++++++
 block/bio.c                           |  1 +
 block/blk-core.c                      |  4 +++
 block/blk-lib.c                       | 58 +++++++++++++++++++++++++++++++++--
 block/blk-merge.c                     | 17 +++++++---
 block/blk-settings.c                  | 17 ++++++++++
 block/blk-sysfs.c                     | 11 +++++++
 block/blk-wbt.c                       |  5 +--
 include/linux/bio.h                   | 25 ++++++++-------
 include/linux/blk_types.h             |  2 ++
 include/linux/blkdev.h                | 19 ++++++++++++
 11 files changed, 153 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index ee2d5cd26bfe..2da04ce6aeef 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -235,6 +235,19 @@ Description:
 		write_same_max_bytes is 0, write same is not supported
 		by the device.
 
+What:		/sys/block/<disk>/queue/write_zeroes_max_bytes
+Date:		November 2016
+Contact:	Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Description:
+		Devices that support write zeroes operation in which a
+		single request can be issued to zero out the range of
+		contiguous blocks on storage without having any payload
+		in the request. This can be used to optimize writing zeroes
+		to the devices. write_zeroes_max_bytes indicates how many
+		bytes can be written in a single write zeroes command. If
+		write_zeroes_max_bytes is 0, write zeroes is not supported
+		by the device.
+
 What:		/sys/block/<disk>/queue/zoned
 Date:		September 2016
 Contact:	Damien Le Moal <damien.lemoal@hgst.com>
diff --git a/block/bio.c b/block/bio.c
index de257ced69b1..83db1f37fd0b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -674,6 +674,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
 	switch (bio_op(bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_ZEROES:
 		break;
 	case REQ_OP_WRITE_SAME:
 		bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
diff --git a/block/blk-core.c b/block/blk-core.c
index 6c4a425690fc..3f2eb8d80189 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1950,6 +1950,10 @@ generic_make_request_checks(struct bio *bio)
 		if (!bdev_is_zoned(bio->bi_bdev))
 			goto not_supported;
 		break;
+	case REQ_OP_WRITE_ZEROES:
+		if (!bdev_write_zeroes_sectors(bio->bi_bdev))
+			goto not_supported;
+		break;
 	default:
 		break;
 	}
diff --git a/block/blk-lib.c b/block/blk-lib.c
index bfb28b03765e..510a6fb15318 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -226,6 +226,55 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 }
 EXPORT_SYMBOL(blkdev_issue_write_same);
 
+/**
+ * __blkdev_issue_write_zeroes - generate number of bios with WRITE ZEROES
+ * @bdev:	blockdev to issue
+ * @sector:	start sector
+ * @nr_sects:	number of sectors to write
+ * @gfp_mask:	memory allocation flags (for bio_alloc)
+ * @biop:	pointer to anchor bio
+ *
+ * Description:
+ *  Generate and issue number of bios(REQ_OP_WRITE_ZEROES) with zerofiled pages.
+ */
+static int __blkdev_issue_write_zeroes(struct block_device *bdev,
+		sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+		struct bio **biop)
+{
+	struct bio *bio = *biop;
+	unsigned int max_write_zeroes_sectors;
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (!q)
+		return -ENXIO;
+
+	/* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
+	max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
+
+	if (max_write_zeroes_sectors == 0)
+		return -EOPNOTSUPP;
+
+	while (nr_sects) {
+		bio = next_bio(bio, 0, gfp_mask);
+		bio->bi_iter.bi_sector = sector;
+		bio->bi_bdev = bdev;
+		bio_set_op_attrs(bio, REQ_OP_WRITE_ZEROES, 0);
+
+		if (nr_sects > max_write_zeroes_sectors) {
+			bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
+			nr_sects -= max_write_zeroes_sectors;
+			sector += max_write_zeroes_sectors;
+		} else {
+			bio->bi_iter.bi_size = nr_sects << 9;
+			nr_sects = 0;
+		}
+		cond_resched();
+	}
+
+	*biop = bio;
+	return 0;
+}
+
 /**
  * __blkdev_issue_zeroout - generate number of zero filed write bios
  * @bdev:	blockdev to issue
@@ -259,6 +308,11 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 			goto out;
 	}
 
+	ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
+			biop);
+	if (ret == 0 || (ret && ret != -EOPNOTSUPP))
+		goto out;
+
 	ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
 			ZERO_PAGE(0), biop);
 	if (ret == 0 || (ret && ret != -EOPNOTSUPP))
@@ -304,8 +358,8 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
  *  the discard request fail, if the discard flag is not set, or if
  *  discard_zeroes_data is not supported, this function will resort to
  *  zeroing the blocks manually, thus provisioning (allocating,
- *  anchoring) them. If the block device supports the WRITE SAME command
- *  blkdev_issue_zeroout() will use it to optimize the process of
+ *  anchoring) them. If the block device supports WRITE ZEROES or WRITE SAME
+ *  command(s), blkdev_issue_zeroout() will use it to optimize the process of
  *  clearing the block range. Otherwise the zeroing will be performed
  *  using regular WRITE calls.
  */
diff --git a/block/blk-merge.c b/block/blk-merge.c
index fda6a12fc776..cf2848cb91d8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -199,6 +199,10 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
 	case REQ_OP_SECURE_ERASE:
 		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
 		break;
+	case REQ_OP_WRITE_ZEROES:
+		split = NULL;
+		nsegs = (*bio)->bi_phys_segments;
+		break;
 	case REQ_OP_WRITE_SAME:
 		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
 		break;
@@ -241,11 +245,15 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 	 * This should probably be returning 0, but blk_add_request_payload()
 	 * (Christoph!!!!)
 	 */
-	if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
-		return 1;
-
-	if (bio_op(bio) == REQ_OP_WRITE_SAME)
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_SAME:
+	case REQ_OP_WRITE_ZEROES:
 		return 1;
+	default:
+		break;
+	}
 
 	fbio = bio;
 	cluster = blk_queue_cluster(q);
@@ -416,6 +424,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
 	switch (bio_op(bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_ZEROES:
 		/*
 		 * This is a hack - drivers should be neither modifying the
 		 * biovec, nor relying on bi_vcnt - but because of
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c7ccabc0ec3e..8a2bc124a684 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -96,6 +96,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_dev_sectors = 0;
 	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
+	lim->max_write_zeroes_sectors = 0;
 	lim->max_discard_sectors = 0;
 	lim->max_hw_discard_sectors = 0;
 	lim->discard_granularity = 0;
@@ -132,6 +133,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
 	lim->max_sectors = UINT_MAX;
 	lim->max_dev_sectors = UINT_MAX;
 	lim->max_write_same_sectors = UINT_MAX;
+	lim->max_write_zeroes_sectors = UINT_MAX;
 }
 EXPORT_SYMBOL(blk_set_stacking_limits);
 
@@ -299,6 +301,19 @@ void blk_queue_max_write_same_sectors(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
 
+/**
+ * blk_queue_max_write_zeroes_sectors - set max sectors for a single
+ *                                      write zeroes
+ * @q:  the request queue for the device
+ * @max_write_zeroes_sectors: maximum number of sectors to write per command
+ **/
+void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
+		unsigned int max_write_zeroes_sectors)
+{
+	q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
+
 /**
  * blk_queue_max_segments - set max hw segments for a request for this queue
  * @q:  the request queue for the device
@@ -527,6 +542,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
 	t->max_write_same_sectors = min(t->max_write_same_sectors,
 					b->max_write_same_sectors);
+	t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
+					b->max_write_zeroes_sectors);
 	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
 
 	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index a97841491769..706b27bd73a1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -211,6 +211,11 @@ static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
 		(unsigned long long)q->limits.max_write_same_sectors << 9);
 }
 
+static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
+{
+	return sprintf(page, "%llu\n",
+		(unsigned long long)q->limits.max_write_zeroes_sectors << 9);
+}
 
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
@@ -611,6 +616,11 @@ static struct queue_sysfs_entry queue_write_same_max_entry = {
 	.show = queue_write_same_max_show,
 };
 
+static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
+	.attr = {.name = "write_zeroes_max_bytes", .mode = S_IRUGO },
+	.show = queue_write_zeroes_max_show,
+};
+
 static struct queue_sysfs_entry queue_nonrot_entry = {
 	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_show_nonrot,
@@ -700,6 +710,7 @@ static struct attribute *default_attrs[] = {
 	&queue_discard_max_hw_entry.attr,
 	&queue_discard_zeroes_data_entry.attr,
 	&queue_write_same_max_entry.attr,
+	&queue_write_zeroes_max_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_zoned_entry.attr,
 	&queue_nomerges_entry.attr,
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index b8647343141f..d500e43da5d9 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -575,9 +575,10 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
 	const int op = bio_op(bio);
 
 	/*
-	 * If not a WRITE (or a discard), do nothing
+	 * If not a WRITE (or a discard or write zeroes), do nothing
 	 */
-	if (!(op == REQ_OP_WRITE || op == REQ_OP_DISCARD))
+	if (!(op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
+				op == REQ_OP_WRITE_ZEROES))
 		return false;
 
 	/*
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 70a7244f08a7..b15323934a29 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -76,7 +76,8 @@ static inline bool bio_has_data(struct bio *bio)
 	if (bio &&
 	    bio->bi_iter.bi_size &&
 	    bio_op(bio) != REQ_OP_DISCARD &&
-	    bio_op(bio) != REQ_OP_SECURE_ERASE)
+	    bio_op(bio) != REQ_OP_SECURE_ERASE &&
+	    bio_op(bio) != REQ_OP_WRITE_ZEROES)
 		return true;
 
 	return false;
@@ -86,7 +87,8 @@ static inline bool bio_no_advance_iter(struct bio *bio)
 {
 	return bio_op(bio) == REQ_OP_DISCARD ||
 	       bio_op(bio) == REQ_OP_SECURE_ERASE ||
-	       bio_op(bio) == REQ_OP_WRITE_SAME;
+	       bio_op(bio) == REQ_OP_WRITE_SAME ||
+	       bio_op(bio) == REQ_OP_WRITE_ZEROES;
 }
 
 static inline bool bio_mergeable(struct bio *bio)
@@ -188,18 +190,19 @@ static inline unsigned bio_segments(struct bio *bio)
 	struct bvec_iter iter;
 
 	/*
-	 * We special case discard/write same, because they interpret bi_size
-	 * differently:
+	 * We special case discard/write same/write zeroes, because they
+	 * interpret bi_size differently:
 	 */
 
-	if (bio_op(bio) == REQ_OP_DISCARD)
-		return 1;
-
-	if (bio_op(bio) == REQ_OP_SECURE_ERASE)
-		return 1;
-
-	if (bio_op(bio) == REQ_OP_WRITE_SAME)
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_SAME:
+	case REQ_OP_WRITE_ZEROES:
 		return 1;
+	default:
+		break;
+	}
 
 	bio_for_each_segment(bv, bio, iter)
 		segs++;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index f57458a6a93b..519ea2c9df61 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -159,6 +159,8 @@ enum req_opf {
 	REQ_OP_ZONE_RESET	= 6,
 	/* write the same sector many times */
 	REQ_OP_WRITE_SAME	= 7,
+	/* write the zero filled sector many times */
+	REQ_OP_WRITE_ZEROES	= 8,
 
 	REQ_OP_LAST,
 };
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7e9d8a0895be..ebeef2b79c5a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -323,6 +323,7 @@ struct queue_limits {
 	unsigned int		max_discard_sectors;
 	unsigned int		max_hw_discard_sectors;
 	unsigned int		max_write_same_sectors;
+	unsigned int		max_write_zeroes_sectors;
 	unsigned int		discard_granularity;
 	unsigned int		discard_alignment;
 
@@ -774,6 +775,9 @@ static inline bool rq_mergeable(struct request *rq)
 	if (req_op(rq) == REQ_OP_FLUSH)
 		return false;
 
+	if (req_op(rq) == REQ_OP_WRITE_ZEROES)
+		return false;
+
 	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
 		return false;
 	if (rq->rq_flags & RQF_NOMERGE_FLAGS)
@@ -1004,6 +1008,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 	if (unlikely(op == REQ_OP_WRITE_SAME))
 		return q->limits.max_write_same_sectors;
 
+	if (unlikely(op == REQ_OP_WRITE_ZEROES))
+		return q->limits.max_write_zeroes_sectors;
+
 	return q->limits.max_sectors;
 }
 
@@ -1107,6 +1114,8 @@ extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
 extern void blk_queue_max_write_same_sectors(struct request_queue *q,
 		unsigned int max_write_same_sectors);
+extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
+		unsigned int max_write_same_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
 extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -1475,6 +1484,16 @@ static inline unsigned int bdev_write_same(struct block_device *bdev)
 	return 0;
 }
 
+static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return q->limits.max_write_zeroes_sectors;
+
+	return 0;
+}
+
 static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-- 
cgit v1.2.3


From 3b7c33b28a44d4621e365e090bf8bd332ab232a1 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Date: Wed, 30 Nov 2016 12:29:00 -0800
Subject: nvme.h: add Write Zeroes definitions

Add the command structure, optional command set support (ONCS) bit and
a new error code for the Write Zeroes command.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 18ce9f7cc881..0df9466a7c38 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -237,6 +237,7 @@ enum {
 	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
 	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
 	NVME_CTRL_ONCS_DSM			= 1 << 2,
+	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 };
 
@@ -543,6 +544,23 @@ struct nvme_dsm_range {
 	__le64			slba;
 };
 
+struct nvme_write_zeroes_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	union nvme_data_ptr	dptr;
+	__le64			slba;
+	__le16			length;
+	__le16			control;
+	__le32			dsmgmt;
+	__le32			reftag;
+	__le16			apptag;
+	__le16			appmask;
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
@@ -839,6 +857,7 @@ struct nvme_command {
 		struct nvme_download_firmware dlfw;
 		struct nvme_format_cmd format;
 		struct nvme_dsm_cmd dsm;
+		struct nvme_write_zeroes_cmd write_zeroes;
 		struct nvme_abort_cmd abort;
 		struct nvme_get_log_page_command get_log_page;
 		struct nvmf_common_command fabrics;
@@ -918,6 +937,7 @@ enum {
 	NVME_SC_BAD_ATTRIBUTES		= 0x180,
 	NVME_SC_INVALID_PI		= 0x181,
 	NVME_SC_READ_ONLY		= 0x182,
+	NVME_SC_ONCS_NOT_SUPPORTED	= 0x183,
 
 	/*
 	 * I/O Command Set Specific - Fabrics commands:
-- 
cgit v1.2.3


From f4ed2fe34fb793755ef8cfc3509e783c4709ffc1 Mon Sep 17 00:00:00 2001
From: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Date: Tue, 29 Nov 2016 15:16:46 +0530
Subject: net: phy: add mdix_ctrl to hold the user configuration.

Add new parameter mdix_ctrl to hold the user configuration.
Existing mdix maintain the current status of MDI(X) crossover performed or
not.
mdix_ctrl can configure either ETH_TP_MDI or ETH_TP_MDI_X orETH_TP_MDI_AUTO.

Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index b53177fd38af..feb8a98e8dd3 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -450,6 +450,7 @@ struct phy_device {
 	struct net_device *attached_dev;
 
 	u8 mdix;
+	u8 mdix_ctrl;
 
 	void (*adjust_link)(struct net_device *dev);
 };
-- 
cgit v1.2.3


From 22195cbd3451a75abaf30651a61cf85c89061327 Mon Sep 17 00:00:00 2001
From: Jike Song <jike.song@intel.com>
Date: Thu, 1 Dec 2016 13:20:05 +0800
Subject: vfio: vfio_register_notifier: classify iommu notifier

Currently vfio_register_notifier assumes that there is only one
notifier chain, which is in vfio_iommu. However, the user might
also be interested in events other than vfio_iommu, for example,
vfio_group. Refactor vfio_{un}register_notifier implementation
to make it feasible.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Jike Song <jike.song@intel.com>
[aw: merge with commit 816ca69ea9c7 ("vfio: Fix handling of error returned by 'vfio_group_get_from_dev()'"), remove typedef]
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c             | 84 +++++++++++++++++++++++++++++------------
 drivers/vfio/vfio_iommu_type1.c |  8 ++++
 include/linux/vfio.h            | 13 ++++++-
 3 files changed, 78 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 7b39313abf0d..6cc2a9ffac10 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -2008,59 +2008,44 @@ err_unpin_pages:
 }
 EXPORT_SYMBOL(vfio_unpin_pages);
 
-int vfio_register_notifier(struct device *dev, struct notifier_block *nb)
+static int vfio_register_iommu_notifier(struct vfio_group *group,
+					unsigned long *events,
+					struct notifier_block *nb)
 {
 	struct vfio_container *container;
-	struct vfio_group *group;
 	struct vfio_iommu_driver *driver;
 	int ret;
 
-	if (!dev || !nb)
-		return -EINVAL;
-
-	group = vfio_group_get_from_dev(dev);
-	if (!group)
-		return -ENODEV;
-
 	ret = vfio_group_add_container_user(group);
 	if (ret)
-		goto err_register_nb;
+		return -EINVAL;
 
 	container = group->container;
 	down_read(&container->group_lock);
 
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->register_notifier))
-		ret = driver->ops->register_notifier(container->iommu_data, nb);
+		ret = driver->ops->register_notifier(container->iommu_data,
+						     events, nb);
 	else
 		ret = -ENOTTY;
 
 	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
-err_register_nb:
-	vfio_group_put(group);
 	return ret;
 }
-EXPORT_SYMBOL(vfio_register_notifier);
 
-int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb)
+static int vfio_unregister_iommu_notifier(struct vfio_group *group,
+					  struct notifier_block *nb)
 {
 	struct vfio_container *container;
-	struct vfio_group *group;
 	struct vfio_iommu_driver *driver;
 	int ret;
 
-	if (!dev || !nb)
-		return -EINVAL;
-
-	group = vfio_group_get_from_dev(dev);
-	if (!group)
-		return -ENODEV;
-
 	ret = vfio_group_add_container_user(group);
 	if (ret)
-		goto err_unregister_nb;
+		return -EINVAL;
 
 	container = group->container;
 	down_read(&container->group_lock);
@@ -2075,7 +2060,56 @@ int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb)
 	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
-err_unregister_nb:
+	return ret;
+}
+
+int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
+			   unsigned long *events, struct notifier_block *nb)
+{
+	struct vfio_group *group;
+	int ret;
+
+	if (!dev || !nb || !events || (*events == 0))
+		return -EINVAL;
+
+	group = vfio_group_get_from_dev(dev);
+	if (!group)
+		return -ENODEV;
+
+	switch (type) {
+	case VFIO_IOMMU_NOTIFY:
+		ret = vfio_register_iommu_notifier(group, events, nb);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	vfio_group_put(group);
+	return ret;
+}
+EXPORT_SYMBOL(vfio_register_notifier);
+
+int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type,
+			     struct notifier_block *nb)
+{
+	struct vfio_group *group;
+	int ret;
+
+	if (!dev || !nb)
+		return -EINVAL;
+
+	group = vfio_group_get_from_dev(dev);
+	if (!group)
+		return -ENODEV;
+
+	switch (type) {
+	case VFIO_IOMMU_NOTIFY:
+		ret = vfio_unregister_iommu_notifier(group, nb);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
 	vfio_group_put(group);
 	return ret;
 }
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 51810a95416e..b88ad1e5e7ff 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1584,10 +1584,18 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
 }
 
 static int vfio_iommu_type1_register_notifier(void *iommu_data,
+					      unsigned long *events,
 					      struct notifier_block *nb)
 {
 	struct vfio_iommu *iommu = iommu_data;
 
+	/* clear known events */
+	*events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+
+	/* refuse to register if still events remaining */
+	if (*events)
+		return -EINVAL;
+
 	return blocking_notifier_chain_register(&iommu->notifier, nb);
 }
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 15ff0421b423..0e5201f207d3 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -81,6 +81,7 @@ struct vfio_iommu_driver_ops {
 	int		(*unpin_pages)(void *iommu_data,
 				       unsigned long *user_pfn, int npage);
 	int		(*register_notifier)(void *iommu_data,
+					     unsigned long *events,
 					     struct notifier_block *nb);
 	int		(*unregister_notifier)(void *iommu_data,
 					       struct notifier_block *nb);
@@ -107,12 +108,20 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
 extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
 			    int npage);
 
-#define VFIO_IOMMU_NOTIFY_DMA_UNMAP	(1)
+/* each type has independent events */
+enum vfio_notify_type {
+	VFIO_IOMMU_NOTIFY = 0,
+};
+
+/* events for VFIO_IOMMU_NOTIFY */
+#define VFIO_IOMMU_NOTIFY_DMA_UNMAP	BIT(0)
 
 extern int vfio_register_notifier(struct device *dev,
+				  enum vfio_notify_type type,
+				  unsigned long *required_events,
 				  struct notifier_block *nb);
-
 extern int vfio_unregister_notifier(struct device *dev,
+				    enum vfio_notify_type type,
 				    struct notifier_block *nb);
 
 /*
-- 
cgit v1.2.3


From ccd46dbae77dbf0d33f42e04b59536f108c395e8 Mon Sep 17 00:00:00 2001
From: Jike Song <jike.song@intel.com>
Date: Thu, 1 Dec 2016 13:20:06 +0800
Subject: vfio: support notifier chain in vfio_group

Beyond vfio_iommu events, users might also be interested in
vfio_group events. For example, if a vfio_group is used along
with Qemu/KVM, whenever kvm pointer is set to/cleared from the
vfio_group, users could be notified.

Currently only VFIO_GROUP_NOTIFY_SET_KVM supported.

Cc: Kirti Wankhede <kwankhede@nvidia.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Jike Song <jike.song@intel.com>
[aw: remove use of new typedef]
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c  | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/vfio.h |  7 +++++
 2 files changed, 89 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 6cc2a9ffac10..9901c4671e2f 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -86,6 +86,8 @@ struct vfio_group {
 	struct mutex			unbound_lock;
 	atomic_t			opened;
 	bool				noiommu;
+	struct kvm			*kvm;
+	struct blocking_notifier_head	notifier;
 };
 
 struct vfio_device {
@@ -339,6 +341,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 #ifdef CONFIG_VFIO_NOIOMMU
 	group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
 #endif
+	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
 
 	group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -1581,6 +1584,9 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
 
 	filep->private_data = NULL;
 
+	/* Any user didn't unregister? */
+	WARN_ON(group->notifier.head);
+
 	vfio_group_try_dissolve_container(group);
 
 	atomic_dec(&group->opened);
@@ -2063,6 +2069,76 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
 	return ret;
 }
 
+void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
+{
+	group->kvm = kvm;
+	blocking_notifier_call_chain(&group->notifier,
+				VFIO_GROUP_NOTIFY_SET_KVM, kvm);
+}
+EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
+
+static int vfio_register_group_notifier(struct vfio_group *group,
+					unsigned long *events,
+					struct notifier_block *nb)
+{
+	struct vfio_container *container;
+	int ret;
+	bool set_kvm = false;
+
+	if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
+		set_kvm = true;
+
+	/* clear known events */
+	*events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
+
+	/* refuse to continue if still events remaining */
+	if (*events)
+		return -EINVAL;
+
+	ret = vfio_group_add_container_user(group);
+	if (ret)
+		return -EINVAL;
+
+	container = group->container;
+	down_read(&container->group_lock);
+
+	ret = blocking_notifier_chain_register(&group->notifier, nb);
+
+	/*
+	 * The attaching of kvm and vfio_group might already happen, so
+	 * here we replay once upon registration.
+	 */
+	if (!ret && set_kvm && group->kvm)
+		blocking_notifier_call_chain(&group->notifier,
+					VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
+
+	up_read(&container->group_lock);
+	vfio_group_try_dissolve_container(group);
+
+	return ret;
+}
+
+static int vfio_unregister_group_notifier(struct vfio_group *group,
+					 struct notifier_block *nb)
+{
+	struct vfio_container *container;
+	int ret;
+
+	ret = vfio_group_add_container_user(group);
+	if (ret)
+		return -EINVAL;
+
+	container = group->container;
+	down_read(&container->group_lock);
+
+	ret = blocking_notifier_chain_unregister(&group->notifier, nb);
+
+	up_read(&container->group_lock);
+	vfio_group_try_dissolve_container(group);
+
+	return ret;
+}
+
 int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
 			   unsigned long *events, struct notifier_block *nb)
 {
@@ -2080,6 +2156,9 @@ int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
 	case VFIO_IOMMU_NOTIFY:
 		ret = vfio_register_iommu_notifier(group, events, nb);
 		break;
+	case VFIO_GROUP_NOTIFY:
+		ret = vfio_register_group_notifier(group, events, nb);
+		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -2106,6 +2185,9 @@ int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type,
 	case VFIO_IOMMU_NOTIFY:
 		ret = vfio_unregister_iommu_notifier(group, nb);
 		break;
+	case VFIO_GROUP_NOTIFY:
+		ret = vfio_unregister_group_notifier(group, nb);
+		break;
 	default:
 		ret = -EINVAL;
 	}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 0e5201f207d3..edf9b2cad277 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -111,11 +111,15 @@ extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
 /* each type has independent events */
 enum vfio_notify_type {
 	VFIO_IOMMU_NOTIFY = 0,
+	VFIO_GROUP_NOTIFY = 1,
 };
 
 /* events for VFIO_IOMMU_NOTIFY */
 #define VFIO_IOMMU_NOTIFY_DMA_UNMAP	BIT(0)
 
+/* events for VFIO_GROUP_NOTIFY */
+#define VFIO_GROUP_NOTIFY_SET_KVM	BIT(0)
+
 extern int vfio_register_notifier(struct device *dev,
 				  enum vfio_notify_type type,
 				  unsigned long *required_events,
@@ -124,6 +128,9 @@ extern int vfio_unregister_notifier(struct device *dev,
 				    enum vfio_notify_type type,
 				    struct notifier_block *nb);
 
+struct kvm;
+extern void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm);
+
 /*
  * Sub-module helpers
  */
-- 
cgit v1.2.3


From 00710984eac523ffed4e92850511d7610cfe908b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 30 Nov 2016 14:47:13 -0600
Subject: ACPI: Add acpi_resource_consumer() to find device that claims a
 resource

Add acpi_resource_consumer().  This takes a struct resource and searches
the ACPI namespace for a device whose current resource settings (_CRS)
includes the resource.  It returns the device if it exists, or NULL if no
device uses the resource.

If more than one device uses the resource (this may happen in the case of
bridges), acpi_resource_consumer() returns the first one found by
acpi_get_devices() in its modified depth-first walk of the namespace.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/resource.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h    |  7 ++++++
 2 files changed, 64 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 56241eb341f4..cb57962ef7c4 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -664,3 +664,60 @@ int acpi_dev_filter_resource_type(struct acpi_resource *ares,
 	return (type & types) ? 0 : 1;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type);
+
+static int acpi_dev_consumes_res(struct acpi_device *adev, struct resource *res)
+{
+	struct list_head resource_list;
+	struct resource_entry *rentry;
+	int ret, found = 0;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+	if (ret < 0)
+		return 0;
+
+	list_for_each_entry(rentry, &resource_list, node) {
+		if (resource_contains(rentry->res, res)) {
+			found = 1;
+			break;
+		}
+
+	}
+
+	acpi_dev_free_resource_list(&resource_list);
+	return found;
+}
+
+static acpi_status acpi_res_consumer_cb(acpi_handle handle, u32 depth,
+					 void *context, void **ret)
+{
+	struct resource *res = context;
+	struct acpi_device **consumer = (struct acpi_device **) ret;
+	struct acpi_device *adev;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+
+	if (acpi_dev_consumes_res(adev, res)) {
+		*consumer = adev;
+		return AE_CTRL_TERMINATE;
+	}
+
+	return AE_OK;
+}
+
+/**
+ * acpi_resource_consumer - Find the ACPI device that consumes @res.
+ * @res: Resource to search for.
+ *
+ * Search the current resource settings (_CRS) of every ACPI device node
+ * for @res.  If we find an ACPI device whose _CRS includes @res, return
+ * it.  Otherwise, return NULL.
+ */
+struct acpi_device *acpi_resource_consumer(struct resource *res)
+{
+	struct acpi_device *consumer = NULL;
+
+	acpi_get_devices(NULL, acpi_res_consumer_cb, res, (void **) &consumer);
+	return consumer;
+}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ddbeda6dbdc8..b00ad73c946d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -419,6 +419,8 @@ static inline int acpi_dev_filter_resource_type_cb(struct acpi_resource *ares,
 	return acpi_dev_filter_resource_type(ares, (unsigned long)arg);
 }
 
+struct acpi_device *acpi_resource_consumer(struct resource *res);
+
 int acpi_check_resource_conflict(const struct resource *res);
 
 int acpi_check_region(resource_size_t start, resource_size_t n,
@@ -762,6 +764,11 @@ static inline int acpi_reconfig_notifier_unregister(struct notifier_block *nb)
 	return -EINVAL;
 }
 
+static inline struct acpi_device *acpi_resource_consumer(struct resource *res)
+{
+	return NULL;
+}
+
 #endif	/* !CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
-- 
cgit v1.2.3


From cf80516579ceb87b91205e68fb31d5affd5aea8d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 15 Nov 2016 14:56:07 -0500
Subject: NFSv4: Add encode/decode of the layoutreturn op in CLOSE

Add XDR encoding for the layoutreturn op, and storage for the layoutreturn
arguments to the CLOSE compound.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 49 ++++++++++++++++++++++++++++++++++++++++---------
 fs/nfs/nfs4xdr.c        | 26 ++++++++++++++++++++++++++
 include/linux/nfs_xdr.h |  3 +++
 3 files changed, 69 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 87972cfa62bc..9b9b0eabef9b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3035,10 +3035,14 @@ struct nfs4_closedata {
 	struct nfs4_state *state;
 	struct nfs_closeargs arg;
 	struct nfs_closeres res;
+	struct {
+		struct nfs4_layoutreturn_args arg;
+		struct nfs4_layoutreturn_res res;
+		u32 roc_barrier;
+		bool roc;
+	} lr;
 	struct nfs_fattr fattr;
 	unsigned long timestamp;
-	bool roc;
-	u32 roc_barrier;
 };
 
 static void nfs4_free_closedata(void *data)
@@ -3047,7 +3051,7 @@ static void nfs4_free_closedata(void *data)
 	struct nfs4_state_owner *sp = calldata->state->owner;
 	struct super_block *sb = calldata->state->inode->i_sb;
 
-	if (calldata->roc)
+	if (calldata->lr.roc)
 		pnfs_roc_release(calldata->state->inode);
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
@@ -3067,15 +3071,41 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
 		return;
 	trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status);
+
+	/* Handle Layoutreturn errors */
+	if (calldata->arg.lr_args && task->tk_status != 0) {
+		switch (calldata->res.lr_ret) {
+		default:
+			calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+			break;
+		case 0:
+			calldata->arg.lr_args = NULL;
+			calldata->res.lr_res = NULL;
+			break;
+		case -NFS4ERR_ADMIN_REVOKED:
+		case -NFS4ERR_DELEG_REVOKED:
+		case -NFS4ERR_EXPIRED:
+		case -NFS4ERR_BAD_STATEID:
+		case -NFS4ERR_OLD_STATEID:
+		case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
+		case -NFS4ERR_WRONG_CRED:
+			calldata->arg.lr_args = NULL;
+			calldata->res.lr_res = NULL;
+			calldata->res.lr_ret = 0;
+			rpc_restart_call_prepare(task);
+			return;
+		}
+	}
+
         /* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
 	switch (task->tk_status) {
 		case 0:
 			res_stateid = &calldata->res.stateid;
-			if (calldata->roc)
+			if (calldata->lr.roc)
 				pnfs_roc_set_barrier(state->inode,
-						     calldata->roc_barrier);
+						     calldata->lr.roc_barrier);
 			renew_lease(server, calldata->timestamp);
 			break;
 		case -NFS4ERR_ADMIN_REVOKED:
@@ -3151,7 +3181,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		goto out_no_action;
 	}
 
-	if (nfs4_wait_on_layoutreturn(inode, task)) {
+	if (!calldata->arg.lr_args && nfs4_wait_on_layoutreturn(inode, task)) {
 		nfs_release_seqid(calldata->arg.seqid);
 		goto out_wait;
 	}
@@ -3165,8 +3195,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		else
 			calldata->arg.bitmask = NULL;
 	}
-	if (calldata->roc)
-		pnfs_roc_get_barrier(inode, &calldata->roc_barrier);
+	if (calldata->lr.roc)
+		pnfs_roc_get_barrier(inode, &calldata->lr.roc_barrier);
 
 	calldata->arg.share_access =
 		nfs4_map_atomic_open_share(NFS_SERVER(inode),
@@ -3250,7 +3280,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.server = server;
-	calldata->roc = nfs4_roc(state->inode);
+	calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+	calldata->lr.roc = nfs4_roc(state->inode);
 	nfs_sb_active(calldata->inode->i_sb);
 
 	msg.rpc_argp = &calldata->arg;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0a82b3fb2d27..73d2a68f0698 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -415,6 +415,8 @@ static int nfs4_stat_to_errno(int);
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz	0
 #define decode_sequence_maxsz	0
+#define encode_layoutreturn_maxsz 0
+#define decode_layoutreturn_maxsz 0
 #endif /* CONFIG_NFS_V4_1 */
 
 #define NFS4_enc_compound_sz	(1024)  /* XXX: large enough? */
@@ -508,11 +510,13 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_close_sz	(compound_encode_hdr_maxsz + \
 				 encode_sequence_maxsz + \
 				 encode_putfh_maxsz + \
+				 encode_layoutreturn_maxsz + \
 				 encode_close_maxsz + \
 				 encode_getattr_maxsz)
 #define NFS4_dec_close_sz	(compound_decode_hdr_maxsz + \
 				 decode_sequence_maxsz + \
 				 decode_putfh_maxsz + \
+				 decode_layoutreturn_maxsz + \
 				 decode_close_maxsz + \
 				 decode_getattr_maxsz)
 #define NFS4_enc_setattr_sz	(compound_encode_hdr_maxsz + \
@@ -2061,6 +2065,13 @@ static void encode_free_stateid(struct xdr_stream *xdr,
 	encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr);
 	encode_nfs4_stateid(xdr, &args->stateid);
 }
+#else
+static inline void
+encode_layoutreturn(struct xdr_stream *xdr,
+		    const struct nfs4_layoutreturn_args *args,
+		    struct compound_hdr *hdr)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -2248,6 +2259,8 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
+	if (args->lr_args)
+		encode_layoutreturn(xdr, args->lr_args, &hdr);
 	encode_close(xdr, args, &hdr);
 	if (args->bitmask != NULL)
 		encode_getfattr(xdr, args->bitmask, &hdr);
@@ -6088,6 +6101,13 @@ static int decode_free_stateid(struct xdr_stream *xdr,
 	res->status = decode_op_hdr(xdr, OP_FREE_STATEID);
 	return res->status;
 }
+#else
+static inline
+int decode_layoutreturn(struct xdr_stream *xdr,
+			       struct nfs4_layoutreturn_res *res)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -6440,6 +6460,12 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_putfh(xdr);
 	if (status)
 		goto out;
+	if (res->lr_res) {
+		status = decode_layoutreturn(xdr, res->lr_res);
+		res->lr_ret = status;
+		if (status)
+			goto out;
+	}
 	status = decode_close(xdr, res);
 	if (status != 0)
 		goto out;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index beb1e10f446e..44ed64bb66ae 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -469,6 +469,7 @@ struct nfs_closeargs {
 	fmode_t			fmode;
 	u32			share_access;
 	const u32 *		bitmask;
+	struct nfs4_layoutreturn_args *lr_args;
 };
 
 struct nfs_closeres {
@@ -477,6 +478,8 @@ struct nfs_closeres {
 	struct nfs_fattr *	fattr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
+	struct nfs4_layoutreturn_res *lr_res;
+	int lr_ret;
 };
 /*
  *  * Arguments to the lock,lockt, and locku call.
-- 
cgit v1.2.3


From 586f1c39daf5c840c742b9be1ec236429f26dc13 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 15 Nov 2016 15:03:33 -0500
Subject: NFSv4: Add encode/decode of the layoutreturn op in DELEGRETURN

Add XDR encoding for the layoutreturn op, and storage for the layoutreturn
arguments to the DELEGRETURN compound.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 50 ++++++++++++++++++++++++++++++++++++++++---------
 fs/nfs/nfs4xdr.c        | 10 ++++++++++
 include/linux/nfs_xdr.h |  3 +++
 3 files changed, 54 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9b9b0eabef9b..765af663257a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5608,11 +5608,15 @@ struct nfs4_delegreturndata {
 	struct nfs_fh fh;
 	nfs4_stateid stateid;
 	unsigned long timestamp;
+	struct {
+		struct nfs4_layoutreturn_args arg;
+		struct nfs4_layoutreturn_res res;
+		u32 roc_barrier;
+		bool roc;
+	} lr;
 	struct nfs_fattr fattr;
 	int rpc_status;
 	struct inode *inode;
-	bool roc;
-	u32 roc_barrier;
 };
 
 static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
@@ -5623,6 +5627,32 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 		return;
 
 	trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
+
+	/* Handle Layoutreturn errors */
+	if (data->args.lr_args && task->tk_status != 0) {
+		switch(data->res.lr_ret) {
+		default:
+			data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+			break;
+		case 0:
+			data->args.lr_args = NULL;
+			data->res.lr_res = NULL;
+			break;
+		case -NFS4ERR_ADMIN_REVOKED:
+		case -NFS4ERR_DELEG_REVOKED:
+		case -NFS4ERR_EXPIRED:
+		case -NFS4ERR_BAD_STATEID:
+		case -NFS4ERR_OLD_STATEID:
+		case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
+		case -NFS4ERR_WRONG_CRED:
+			data->args.lr_args = NULL;
+			data->res.lr_res = NULL;
+			data->res.lr_ret = 0;
+			rpc_restart_call_prepare(task);
+			return;
+		}
+	}
+
 	switch (task->tk_status) {
 	case 0:
 		renew_lease(data->res.server, data->timestamp);
@@ -5646,8 +5676,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 		}
 	}
 	data->rpc_status = task->tk_status;
-	if (data->roc && data->rpc_status == 0)
-		pnfs_roc_set_barrier(data->inode, data->roc_barrier);
+	if (data->lr.roc && data->rpc_status == 0)
+		pnfs_roc_set_barrier(data->inode, data->lr.roc_barrier);
 }
 
 static void nfs4_delegreturn_release(void *calldata)
@@ -5656,7 +5686,7 @@ static void nfs4_delegreturn_release(void *calldata)
 	struct inode *inode = data->inode;
 
 	if (inode) {
-		if (data->roc)
+		if (data->lr.roc)
 			pnfs_roc_release(inode);
 		nfs_iput_and_deactive(inode);
 	}
@@ -5669,11 +5699,12 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
 
 	d_data = (struct nfs4_delegreturndata *)data;
 
-	if (nfs4_wait_on_layoutreturn(d_data->inode, task))
+	if (!d_data->args.lr_args &&
+	    nfs4_wait_on_layoutreturn(d_data->inode, task))
 		return;
 
-	if (d_data->roc)
-		pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier);
+	if (d_data->lr.roc)
+		pnfs_roc_get_barrier(d_data->inode, &d_data->lr.roc_barrier);
 
 	nfs4_setup_sequence(d_data->res.server,
 			&d_data->args.seq_args,
@@ -5720,12 +5751,13 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	nfs4_stateid_copy(&data->stateid, stateid);
 	data->res.fattr = &data->fattr;
 	data->res.server = server;
+	data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
 	nfs_fattr_init(data->res.fattr);
 	data->timestamp = jiffies;
 	data->rpc_status = 0;
 	data->inode = nfs_igrab_and_active(inode);
 	if (data->inode)
-		data->roc = nfs4_roc(inode);
+		data->lr.roc = nfs4_roc(inode);
 
 	task_setup_data.callback_data = data;
 	msg.rpc_argp = &data->args;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 73d2a68f0698..84fdcf27138c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -710,11 +710,13 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_delegreturn_sz	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
+				encode_layoutreturn_maxsz + \
 				encode_delegreturn_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
+				decode_layoutreturn_maxsz + \
 				decode_delegreturn_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_getacl_sz	(compound_encode_hdr_maxsz + \
@@ -2683,6 +2685,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fhandle, &hdr);
+	if (args->lr_args)
+		encode_layoutreturn(xdr, args->lr_args, &hdr);
 	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_delegreturn(xdr, args->stateid, &hdr);
 	encode_nops(&hdr);
@@ -6942,6 +6946,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
 	status = decode_putfh(xdr);
 	if (status != 0)
 		goto out;
+	if (res->lr_res) {
+		status = decode_layoutreturn(xdr, res->lr_res);
+		res->lr_ret = status;
+		if (status)
+			goto out;
+	}
 	status = decode_getfattr(xdr, res->fattr, res->server);
 	if (status != 0)
 		goto out;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 44ed64bb66ae..bfbd0cace91b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -552,12 +552,15 @@ struct nfs4_delegreturnargs {
 	const struct nfs_fh *fhandle;
 	const nfs4_stateid *stateid;
 	const u32 * bitmask;
+	struct nfs4_layoutreturn_args *lr_args;
 };
 
 struct nfs4_delegreturnres {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr * fattr;
 	struct nfs_server *server;
+	struct nfs4_layoutreturn_res *lr_res;
+	int lr_ret;
 };
 
 /*
-- 
cgit v1.2.3


From b184b5c38e4640585126e44ef84f2dbdd0d23d5a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 13 Oct 2016 15:26:47 +1100
Subject: NFS: remove l_pid field from nfs_lockowner

this field is not used in any important way and probably should
have been removed by

Commit: 8003d3c4aaa5 ("nfs4: treat lock owners as opaque values")

which removed the pid argument from nfs4_get_lock_state.

Except in unusual and uninteresting cases, two threads with the same
->tgid will have the same ->files pointer, so keeping them both
for comparison brings no benefit.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c         | 3 ---
 fs/nfs/nfs4proc.c      | 1 -
 fs/nfs/pagelist.c      | 3 +--
 fs/nfs/write.c         | 3 +--
 include/linux/nfs_fs.h | 1 -
 5 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3575e3408bd7..fd9913249713 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -703,7 +703,6 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
 {
 	atomic_set(&l_ctx->count, 1);
 	l_ctx->lockowner.l_owner = current->files;
-	l_ctx->lockowner.l_pid = current->tgid;
 	INIT_LIST_HEAD(&l_ctx->list);
 	atomic_set(&l_ctx->io_count, 0);
 }
@@ -716,8 +715,6 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
 	do {
 		if (pos->lockowner.l_owner != current->files)
 			continue;
-		if (pos->lockowner.l_pid != current->tgid)
-			continue;
 		atomic_inc(&pos->count);
 		return pos;
 	} while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5593b088c561..0d108f13ef16 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2947,7 +2947,6 @@ static int _nfs4_do_setattr(struct inode *inode,
 	} else if (truncate && state != NULL) {
 		struct nfs_lockowner lockowner = {
 			.l_owner = current->files,
-			.l_pid = current->tgid,
 		};
 		if (!nfs4_valid_open_stateid(state))
 			return -EBADF;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 965db474f4b0..161f8b13bbaa 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -867,8 +867,7 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
 static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
 		const struct nfs_lock_context *l2)
 {
-	return l1->lockowner.l_owner == l2->lockowner.l_owner
-		&& l1->lockowner.l_pid == l2->lockowner.l_pid;
+	return l1->lockowner.l_owner == l2->lockowner.l_owner;
 }
 
 /**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53211838f72a..4d5897e6d6cb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1151,8 +1151,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 		if (l_ctx && flctx &&
 		    !(list_empty_careful(&flctx->flc_posix) &&
 		      list_empty_careful(&flctx->flc_flock))) {
-			do_flush |= l_ctx->lockowner.l_owner != current->files
-				|| l_ctx->lockowner.l_pid != current->tgid;
+			do_flush |= l_ctx->lockowner.l_owner != current->files;
 		}
 		nfs_release_request(req);
 		if (!do_flush)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 810124b33327..bf8a713c45b4 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -57,7 +57,6 @@ struct nfs_access_entry {
 
 struct nfs_lockowner {
 	fl_owner_t l_owner;
-	pid_t l_pid;
 };
 
 struct nfs_lock_context {
-- 
cgit v1.2.3


From 532d4def2f95623a9b8b2cef7723e14521377911 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 13 Oct 2016 15:26:47 +1100
Subject: NFSv4: add flock_owner to open context

An open file description (struct file) in a given process can be
associated with two different lock owners.

It can have a Posix lock owner which will be different in each process
that has a fd on the file.
It can have a Flock owner which will be the same in all processes.

When searching for a lock stateid to use, we need to consider both of these
owners

So add a new "flock_owner" to the "nfs_open_context" (of which there
is one for each open file description).

This flock_owner does not need to be reference-counted as there is a
1-1 relation between 'struct file' and nfs open contexts,
and it will never be part of a list of contexts.  So there is no need
for a 'flock_context' - just the owner is enough.

The io_count included in the (Posix) lock_context provides no
guarantee that all read-aheads that could use the state have
completed, so not supporting it for flock locks in not a serious
problem.  Synchronization between flock and read-ahead can be added
later if needed.

When creating an open_context for a non-openning create call, we don't have
a 'struct file' to pass in, so the lock context gets initialized with
a NULL owner, but this will never be used.

The flock_owner is not used at all in this patch, that will come later.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c           | 6 +++---
 fs/nfs/inode.c         | 7 +++++--
 fs/nfs/nfs4file.c      | 2 +-
 fs/nfs/nfs4proc.c      | 2 +-
 include/linux/nfs_fs.h | 3 ++-
 5 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5f1af4cd1a33..f1ecfcc632eb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1467,9 +1467,9 @@ static fmode_t flags_to_mode(int flags)
 	return res;
 }
 
-static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags)
+static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
 {
-	return alloc_nfs_open_context(dentry, flags_to_mode(open_flags));
+	return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
 }
 
 static int do_open(struct inode *inode, struct file *filp)
@@ -1554,7 +1554,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 			return finish_no_open(file, dentry);
 	}
 
-	ctx = create_nfs_open_context(dentry, open_flags);
+	ctx = create_nfs_open_context(dentry, open_flags, file);
 	err = PTR_ERR(ctx);
 	if (IS_ERR(ctx))
 		goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index fd9913249713..3a7601fe9dee 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -796,7 +796,9 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
 }
 EXPORT_SYMBOL_GPL(nfs_close_context);
 
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode)
+struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry,
+						fmode_t f_mode,
+						struct file *filp)
 {
 	struct nfs_open_context *ctx;
 	struct rpc_cred *cred = rpc_lookup_cred();
@@ -815,6 +817,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
 	ctx->mode = f_mode;
 	ctx->flags = 0;
 	ctx->error = 0;
+	ctx->flock_owner = (fl_owner_t)filp;
 	nfs_init_lock_context(&ctx->lock_context);
 	ctx->lock_context.open_context = ctx;
 	INIT_LIST_HEAD(&ctx->list);
@@ -939,7 +942,7 @@ int nfs_open(struct inode *inode, struct file *filp)
 {
 	struct nfs_open_context *ctx;
 
-	ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode);
+	ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 	nfs_file_set_open_context(filp, ctx);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 89a77950e0b0..0efba77789b9 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -57,7 +57,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
 	parent = dget_parent(dentry);
 	dir = d_inode(parent);
 
-	ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode);
+	ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp);
 	err = PTR_ERR(ctx);
 	if (IS_ERR(ctx))
 		goto out;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0d108f13ef16..68a75bf431f8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4008,7 +4008,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	struct nfs4_state *state;
 	int status = 0;
 
-	ctx = alloc_nfs_open_context(dentry, FMODE_READ);
+	ctx = alloc_nfs_open_context(dentry, FMODE_READ, NULL);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bf8a713c45b4..0adb02c4744d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -70,6 +70,7 @@ struct nfs_lock_context {
 struct nfs4_state;
 struct nfs_open_context {
 	struct nfs_lock_context lock_context;
+	fl_owner_t flock_owner;
 	struct dentry *dentry;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
@@ -357,7 +358,7 @@ extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
-extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode);
+extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode, struct file *filp);
 extern void nfs_inode_attach_open_context(struct nfs_open_context *ctx);
 extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
 extern void nfs_file_clear_open_context(struct file *flip);
-- 
cgit v1.2.3


From d51fdb87a611f8ef50518df7187173ae10469fd0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 13 Oct 2016 15:26:47 +1100
Subject: NFS: discard nfs_lockowner structure.

It now has only one field and is only used in one structure.
So replaced it in that structure by the field it contains.

Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c         | 4 ++--
 fs/nfs/nfs4state.c     | 2 +-
 fs/nfs/pagelist.c      | 2 +-
 fs/nfs/write.c         | 2 +-
 include/linux/nfs_fs.h | 6 +-----
 5 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3a7601fe9dee..d79e0bac836e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -702,7 +702,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr);
 static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
 {
 	atomic_set(&l_ctx->count, 1);
-	l_ctx->lockowner.l_owner = current->files;
+	l_ctx->lockowner = current->files;
 	INIT_LIST_HEAD(&l_ctx->list);
 	atomic_set(&l_ctx->io_count, 0);
 }
@@ -713,7 +713,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
 	struct nfs_lock_context *pos = head;
 
 	do {
-		if (pos->lockowner.l_owner != current->files)
+		if (pos->lockowner != current->files)
 			continue;
 		atomic_inc(&pos->count);
 		return pos;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 4f1de9be10c6..26b6b8b0cae3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -953,7 +953,7 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
 	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
 		goto out;
 
-	fl_owner = l_ctx->lockowner.l_owner;
+	fl_owner = l_ctx->lockowner;
 	fl_flock_owner = l_ctx->open_context->flock_owner;
 
 	spin_lock(&state->state_lock);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 161f8b13bbaa..6e629b856a00 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -867,7 +867,7 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
 static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
 		const struct nfs_lock_context *l2)
 {
-	return l1->lockowner.l_owner == l2->lockowner.l_owner;
+	return l1->lockowner == l2->lockowner;
 }
 
 /**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4d5897e6d6cb..6e761f3f4cbf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1151,7 +1151,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 		if (l_ctx && flctx &&
 		    !(list_empty_careful(&flctx->flc_posix) &&
 		      list_empty_careful(&flctx->flc_flock))) {
-			do_flush |= l_ctx->lockowner.l_owner != current->files;
+			do_flush |= l_ctx->lockowner != current->files;
 		}
 		nfs_release_request(req);
 		if (!do_flush)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0adb02c4744d..db1002abc95e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -55,15 +55,11 @@ struct nfs_access_entry {
 	struct rcu_head		rcu_head;
 };
 
-struct nfs_lockowner {
-	fl_owner_t l_owner;
-};
-
 struct nfs_lock_context {
 	atomic_t count;
 	struct list_head list;
 	struct nfs_open_context *open_context;
-	struct nfs_lockowner lockowner;
+	fl_owner_t lockowner;
 	atomic_t io_count;
 };
 
-- 
cgit v1.2.3


From b32614c03413f8a6025d8677c2b7c0ee976e63d4 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 27 Nov 2016 00:13:34 +0100
Subject: tracing/rb: Convert to hotplug state machine

Install the callbacks via the state machine. The notifier in struct
ring_buffer is replaced by the multi instance interface.  Upon
__ring_buffer_alloc() invocation, cpuhp_state_add_instance() will invoke
the trace_rb_cpu_prepare() on each CPU.

This callback may now fail. This means __ring_buffer_alloc() will fail and
cleanup (like previously) and during a CPU up event this failure will not
allow the CPU to come up.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161126231350.10321-7-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h  |   1 +
 include/linux/ring_buffer.h |   6 ++
 kernel/trace/ring_buffer.c  | 135 +++++++++++++++-----------------------------
 kernel/trace/trace.c        |  15 ++++-
 4 files changed, 66 insertions(+), 91 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index e3771fb959c0..18bcfeb2463e 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -62,6 +62,7 @@ enum cpuhp_state {
 	CPUHP_TOPOLOGY_PREPARE,
 	CPUHP_NET_IUCV_PREPARE,
 	CPUHP_ARM_BL_PREPARE,
+	CPUHP_TRACE_RB_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 4acc552e9279..b6d4568795a7 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -198,4 +198,10 @@ enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
 };
 
+#ifdef CONFIG_RING_BUFFER
+int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node);
+#else
+#define trace_rb_cpu_prepare	NULL
+#endif
+
 #endif /* _LINUX_RING_BUFFER_H */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9c143739b8d7..a7a055f167c7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -479,9 +479,7 @@ struct ring_buffer {
 
 	struct ring_buffer_per_cpu	**buffers;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	struct notifier_block		cpu_notify;
-#endif
+	struct hlist_node		node;
 	u64				(*clock)(void);
 
 	struct rb_irq_work		irq_work;
@@ -1274,11 +1272,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 	kfree(cpu_buffer);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int rb_cpu_notify(struct notifier_block *self,
-			 unsigned long action, void *hcpu);
-#endif
-
 /**
  * __ring_buffer_alloc - allocate a new ring_buffer
  * @size: the size in bytes per cpu that is needed.
@@ -1296,6 +1289,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
 	long nr_pages;
 	int bsize;
 	int cpu;
+	int ret;
 
 	/* keep it in its own cache line */
 	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1318,17 +1312,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
 	if (nr_pages < 2)
 		nr_pages = 2;
 
-	/*
-	 * In case of non-hotplug cpu, if the ring-buffer is allocated
-	 * in early initcall, it will not be notified of secondary cpus.
-	 * In that off case, we need to allocate for all possible cpus.
-	 */
-#ifdef CONFIG_HOTPLUG_CPU
-	cpu_notifier_register_begin();
-	cpumask_copy(buffer->cpumask, cpu_online_mask);
-#else
-	cpumask_copy(buffer->cpumask, cpu_possible_mask);
-#endif
 	buffer->cpus = nr_cpu_ids;
 
 	bsize = sizeof(void *) * nr_cpu_ids;
@@ -1337,19 +1320,15 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
 	if (!buffer->buffers)
 		goto fail_free_cpumask;
 
-	for_each_buffer_cpu(buffer, cpu) {
-		buffer->buffers[cpu] =
-			rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
-		if (!buffer->buffers[cpu])
-			goto fail_free_buffers;
-	}
+	cpu = raw_smp_processor_id();
+	cpumask_set_cpu(cpu, buffer->cpumask);
+	buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
+	if (!buffer->buffers[cpu])
+		goto fail_free_buffers;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	buffer->cpu_notify.notifier_call = rb_cpu_notify;
-	buffer->cpu_notify.priority = 0;
-	__register_cpu_notifier(&buffer->cpu_notify);
-	cpu_notifier_register_done();
-#endif
+	ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
+	if (ret < 0)
+		goto fail_free_buffers;
 
 	mutex_init(&buffer->mutex);
 
@@ -1364,9 +1343,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
 
  fail_free_cpumask:
 	free_cpumask_var(buffer->cpumask);
-#ifdef CONFIG_HOTPLUG_CPU
-	cpu_notifier_register_done();
-#endif
 
  fail_free_buffer:
 	kfree(buffer);
@@ -1383,18 +1359,11 @@ ring_buffer_free(struct ring_buffer *buffer)
 {
 	int cpu;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	cpu_notifier_register_begin();
-	__unregister_cpu_notifier(&buffer->cpu_notify);
-#endif
+	cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
 
 	for_each_buffer_cpu(buffer, cpu)
 		rb_free_cpu_buffer(buffer->buffers[cpu]);
 
-#ifdef CONFIG_HOTPLUG_CPU
-	cpu_notifier_register_done();
-#endif
-
 	kfree(buffer->buffers);
 	free_cpumask_var(buffer->cpumask);
 
@@ -4633,62 +4602,48 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_read_page);
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int rb_cpu_notify(struct notifier_block *self,
-			 unsigned long action, void *hcpu)
+/*
+ * We only allocate new buffers, never free them if the CPU goes down.
+ * If we were to free the buffer, then the user would lose any trace that was in
+ * the buffer.
+ */
+int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
 {
-	struct ring_buffer *buffer =
-		container_of(self, struct ring_buffer, cpu_notify);
-	long cpu = (long)hcpu;
+	struct ring_buffer *buffer;
 	long nr_pages_same;
 	int cpu_i;
 	unsigned long nr_pages;
 
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		if (cpumask_test_cpu(cpu, buffer->cpumask))
-			return NOTIFY_OK;
-
-		nr_pages = 0;
-		nr_pages_same = 1;
-		/* check if all cpu sizes are same */
-		for_each_buffer_cpu(buffer, cpu_i) {
-			/* fill in the size from first enabled cpu */
-			if (nr_pages == 0)
-				nr_pages = buffer->buffers[cpu_i]->nr_pages;
-			if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
-				nr_pages_same = 0;
-				break;
-			}
-		}
-		/* allocate minimum pages, user can later expand it */
-		if (!nr_pages_same)
-			nr_pages = 2;
-		buffer->buffers[cpu] =
-			rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
-		if (!buffer->buffers[cpu]) {
-			WARN(1, "failed to allocate ring buffer on CPU %ld\n",
-			     cpu);
-			return NOTIFY_OK;
+	buffer = container_of(node, struct ring_buffer, node);
+	if (cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	nr_pages = 0;
+	nr_pages_same = 1;
+	/* check if all cpu sizes are same */
+	for_each_buffer_cpu(buffer, cpu_i) {
+		/* fill in the size from first enabled cpu */
+		if (nr_pages == 0)
+			nr_pages = buffer->buffers[cpu_i]->nr_pages;
+		if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
+			nr_pages_same = 0;
+			break;
 		}
-		smp_wmb();
-		cpumask_set_cpu(cpu, buffer->cpumask);
-		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		/*
-		 * Do nothing.
-		 *  If we were to free the buffer, then the user would
-		 *  lose any trace that was in the buffer.
-		 */
-		break;
-	default:
-		break;
 	}
-	return NOTIFY_OK;
+	/* allocate minimum pages, user can later expand it */
+	if (!nr_pages_same)
+		nr_pages = 2;
+	buffer->buffers[cpu] =
+		rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
+	if (!buffer->buffers[cpu]) {
+		WARN(1, "failed to allocate ring buffer on CPU %u\n",
+		     cpu);
+		return -ENOMEM;
+	}
+	smp_wmb();
+	cpumask_set_cpu(cpu, buffer->cpumask);
+	return 0;
 }
-#endif
 
 #ifdef CONFIG_RING_BUFFER_STARTUP_TEST
 /*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8696ce6bf2f6..465d56febc5b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7659,10 +7659,21 @@ __init static int tracer_alloc_buffers(void)
 
 	raw_spin_lock_init(&global_trace.start_lock);
 
+	/*
+	 * The prepare callbacks allocates some memory for the ring buffer. We
+	 * don't free the buffer if the if the CPU goes down. If we were to free
+	 * the buffer, then the user would lose any trace that was in the
+	 * buffer. The memory will be removed once the "instance" is removed.
+	 */
+	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
+				      "trace/RB:preapre", trace_rb_cpu_prepare,
+				      NULL);
+	if (ret < 0)
+		goto out_free_cpumask;
 	/* Used for event triggers */
 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
 	if (!temp_buffer)
-		goto out_free_cpumask;
+		goto out_rm_hp_state;
 
 	if (trace_create_savedcmd() < 0)
 		goto out_free_temp_buffer;
@@ -7723,6 +7734,8 @@ out_free_savedcmd:
 	free_saved_cmdlines_buffer(savedcmd);
 out_free_temp_buffer:
 	ring_buffer_free(temp_buffer);
+out_rm_hp_state:
+	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
 out_free_cpumask:
 	free_cpumask_var(global_trace.tracing_cpumask);
 out_free_buffer_mask:
-- 
cgit v1.2.3


From 5438da977f83c945d4e72ee4f9c4508c0eb64e15 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 29 Nov 2016 15:52:21 +0100
Subject: mm/vmstat: Convert to hotplug state machine

Install the callbacks via the state machine, but do not invoke them as we
can initialize the node state without calling the callbacks on all online
CPUs.

start_shepherd_timer() is now called outside the get_online_cpus() block
which is safe as it only operates on cpu possible mask.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Cc: rt@linutronix.de
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Link: http://lkml.kernel.org/r/20161129145221.ffc3kg3hd7lxiwj6@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 mm/vmstat.c                | 76 +++++++++++++++++++++-------------------------
 2 files changed, 36 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 18bcfeb2463e..4ebd1bc27f8d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -20,6 +20,7 @@ enum cpuhp_state {
 	CPUHP_VIRT_NET_DEAD,
 	CPUHP_SLUB_DEAD,
 	CPUHP_MM_WRITEBACK_DEAD,
+	CPUHP_MM_VMSTAT_DEAD,
 	CPUHP_SOFTIRQ_DEAD,
 	CPUHP_NET_MVNETA_DEAD,
 	CPUHP_CPUIDLE_DEAD,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 5152cd1c490f..7c28df36f50f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1728,64 +1728,58 @@ static void __init init_cpu_node_state(void)
 	}
 }
 
-static void vmstat_cpu_dead(int node)
+static int vmstat_cpu_online(unsigned int cpu)
+{
+	refresh_zone_stat_thresholds();
+	node_set_state(cpu_to_node(cpu), N_CPU);
+	return 0;
+}
+
+static int vmstat_cpu_down_prep(unsigned int cpu)
+{
+	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+	return 0;
+}
+
+static int vmstat_cpu_dead(unsigned int cpu)
 {
 	const struct cpumask *node_cpus;
+	int node;
 
+	node = cpu_to_node(cpu);
+
+	refresh_zone_stat_thresholds();
 	node_cpus = cpumask_of_node(node);
 	if (cpumask_weight(node_cpus) > 0)
-		return;
+		return 0;
 
 	node_clear_state(node, N_CPU);
+	return 0;
 }
 
-/*
- * Use the cpu notifier to insure that the thresholds are recalculated
- * when necessary.
- */
-static int vmstat_cpuup_callback(struct notifier_block *nfb,
-		unsigned long action,
-		void *hcpu)
-{
-	long cpu = (long)hcpu;
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		refresh_zone_stat_thresholds();
-		node_set_state(cpu_to_node(cpu), N_CPU);
-		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
-		break;
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		refresh_zone_stat_thresholds();
-		vmstat_cpu_dead(cpu_to_node(cpu));
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vmstat_notifier =
-	{ &vmstat_cpuup_callback, NULL, 0 };
 #endif
 
 static int __init setup_vmstat(void)
 {
 #ifdef CONFIG_SMP
-	cpu_notifier_register_begin();
-	__register_cpu_notifier(&vmstat_notifier);
+	int ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
+					NULL, vmstat_cpu_dead);
+	if (ret < 0)
+		pr_err("vmstat: failed to register 'dead' hotplug state\n");
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
+					vmstat_cpu_online,
+					vmstat_cpu_down_prep);
+	if (ret < 0)
+		pr_err("vmstat: failed to register 'online' hotplug state\n");
+
+	get_online_cpus();
 	init_cpu_node_state();
+	put_online_cpus();
 
 	start_shepherd_timer();
-	cpu_notifier_register_done();
 #endif
 #ifdef CONFIG_PROC_FS
 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
-- 
cgit v1.2.3


From 215c89d055e085c42cea4fd571a1d0db0b6d5648 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 27 Nov 2016 00:13:38 +0100
Subject: mm/zsmalloc: Convert to hotplug state machine

Install the callbacks via the state machine and let the core invoke
the callbacks on the already online CPUs.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-mm@kvack.org
Cc: Minchan Kim <minchan@kernel.org>
Cc: rt@linutronix.de
Cc: Nitin Gupta <ngupta@vflare.org>
Link: http://lkml.kernel.org/r/20161126231350.10321-11-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 mm/zsmalloc.c              | 67 +++++++++-------------------------------------
 2 files changed, 14 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 4ebd1bc27f8d..9f29dd996088 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -64,6 +64,7 @@ enum cpuhp_state {
 	CPUHP_NET_IUCV_PREPARE,
 	CPUHP_ARM_BL_PREPARE,
 	CPUHP_TRACE_RB_PREPARE,
+	CPUHP_MM_ZS_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b0bc023d25c5..9cc3c0b2c2c1 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1284,61 +1284,21 @@ out:
 
 #endif /* CONFIG_PGTABLE_MAPPING */
 
-static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
-				void *pcpu)
+static int zs_cpu_prepare(unsigned int cpu)
 {
-	int ret, cpu = (long)pcpu;
 	struct mapping_area *area;
 
-	switch (action) {
-	case CPU_UP_PREPARE:
-		area = &per_cpu(zs_map_area, cpu);
-		ret = __zs_cpu_up(area);
-		if (ret)
-			return notifier_from_errno(ret);
-		break;
-	case CPU_DEAD:
-	case CPU_UP_CANCELED:
-		area = &per_cpu(zs_map_area, cpu);
-		__zs_cpu_down(area);
-		break;
-	}
-
-	return NOTIFY_OK;
+	area = &per_cpu(zs_map_area, cpu);
+	return __zs_cpu_up(area);
 }
 
-static struct notifier_block zs_cpu_nb = {
-	.notifier_call = zs_cpu_notifier
-};
-
-static int zs_register_cpu_notifier(void)
+static int zs_cpu_dead(unsigned int cpu)
 {
-	int cpu, uninitialized_var(ret);
-
-	cpu_notifier_register_begin();
-
-	__register_cpu_notifier(&zs_cpu_nb);
-	for_each_online_cpu(cpu) {
-		ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
-		if (notifier_to_errno(ret))
-			break;
-	}
-
-	cpu_notifier_register_done();
-	return notifier_to_errno(ret);
-}
-
-static void zs_unregister_cpu_notifier(void)
-{
-	int cpu;
-
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(cpu)
-		zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
-	__unregister_cpu_notifier(&zs_cpu_nb);
+	struct mapping_area *area;
 
-	cpu_notifier_register_done();
+	area = &per_cpu(zs_map_area, cpu);
+	__zs_cpu_down(area);
+	return 0;
 }
 
 static void __init init_zs_size_classes(void)
@@ -2534,10 +2494,10 @@ static int __init zs_init(void)
 	if (ret)
 		goto out;
 
-	ret = zs_register_cpu_notifier();
-
+	ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare",
+				zs_cpu_prepare, zs_cpu_dead);
 	if (ret)
-		goto notifier_fail;
+		goto hp_setup_fail;
 
 	init_zs_size_classes();
 
@@ -2549,8 +2509,7 @@ static int __init zs_init(void)
 
 	return 0;
 
-notifier_fail:
-	zs_unregister_cpu_notifier();
+hp_setup_fail:
 	zsmalloc_unmount();
 out:
 	return ret;
@@ -2562,7 +2521,7 @@ static void __exit zs_exit(void)
 	zpool_unregister_driver(&zs_zpool_driver);
 #endif
 	zsmalloc_unmount();
-	zs_unregister_cpu_notifier();
+	cpuhp_remove_state(CPUHP_MM_ZS_PREPARE);
 
 	zs_stat_exit();
 }
-- 
cgit v1.2.3


From ad7ed7708db9ff388450935645816d44bf08a56d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 27 Nov 2016 00:13:39 +0100
Subject: mm/zswap: Convert dst-mem to hotplug state machine

Install the callbacks via the state machine and let the core invoke
the callbacks on the already online CPUs.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-mm@kvack.org
Cc: Seth Jennings <sjenning@redhat.com>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161126231350.10321-12-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 mm/zswap.c                 | 73 +++++++++++-----------------------------------
 2 files changed, 18 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 9f29dd996088..62f51a4e8676 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -65,6 +65,7 @@ enum cpuhp_state {
 	CPUHP_ARM_BL_PREPARE,
 	CPUHP_TRACE_RB_PREPARE,
 	CPUHP_MM_ZS_PREPARE,
+	CPUHP_MM_ZSWP_MEM_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
diff --git a/mm/zswap.c b/mm/zswap.c
index 275b22cc8df4..b13aa5706348 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -352,70 +352,28 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
 **********************************/
 static DEFINE_PER_CPU(u8 *, zswap_dstmem);
 
-static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
+static int zswap_dstmem_prepare(unsigned int cpu)
 {
 	u8 *dst;
 
-	switch (action) {
-	case CPU_UP_PREPARE:
-		dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
-		if (!dst) {
-			pr_err("can't allocate compressor buffer\n");
-			return NOTIFY_BAD;
-		}
-		per_cpu(zswap_dstmem, cpu) = dst;
-		break;
-	case CPU_DEAD:
-	case CPU_UP_CANCELED:
-		dst = per_cpu(zswap_dstmem, cpu);
-		kfree(dst);
-		per_cpu(zswap_dstmem, cpu) = NULL;
-		break;
-	default:
-		break;
+	dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+	if (!dst) {
+		pr_err("can't allocate compressor buffer\n");
+		return -ENOMEM;
 	}
-	return NOTIFY_OK;
+	per_cpu(zswap_dstmem, cpu) = dst;
+	return 0;
 }
 
-static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
-				     unsigned long action, void *pcpu)
+static int zswap_dstmem_dead(unsigned int cpu)
 {
-	return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
-}
-
-static struct notifier_block zswap_dstmem_notifier = {
-	.notifier_call =	zswap_cpu_dstmem_notifier,
-};
+	u8 *dst;
 
-static int __init zswap_cpu_dstmem_init(void)
-{
-	unsigned long cpu;
+	dst = per_cpu(zswap_dstmem, cpu);
+	kfree(dst);
+	per_cpu(zswap_dstmem, cpu) = NULL;
 
-	cpu_notifier_register_begin();
-	for_each_online_cpu(cpu)
-		if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
-		    NOTIFY_BAD)
-			goto cleanup;
-	__register_cpu_notifier(&zswap_dstmem_notifier);
-	cpu_notifier_register_done();
 	return 0;
-
-cleanup:
-	for_each_online_cpu(cpu)
-		__zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
-	cpu_notifier_register_done();
-	return -ENOMEM;
-}
-
-static void zswap_cpu_dstmem_destroy(void)
-{
-	unsigned long cpu;
-
-	cpu_notifier_register_begin();
-	for_each_online_cpu(cpu)
-		__zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
-	__unregister_cpu_notifier(&zswap_dstmem_notifier);
-	cpu_notifier_register_done();
 }
 
 static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
@@ -1238,6 +1196,7 @@ static void __exit zswap_debugfs_exit(void) { }
 static int __init init_zswap(void)
 {
 	struct zswap_pool *pool;
+	int ret;
 
 	zswap_init_started = true;
 
@@ -1246,7 +1205,9 @@ static int __init init_zswap(void)
 		goto cache_fail;
 	}
 
-	if (zswap_cpu_dstmem_init()) {
+	ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare",
+				zswap_dstmem_prepare, zswap_dstmem_dead);
+	if (ret) {
 		pr_err("dstmem alloc failed\n");
 		goto dstmem_fail;
 	}
@@ -1267,7 +1228,7 @@ static int __init init_zswap(void)
 	return 0;
 
 pool_fail:
-	zswap_cpu_dstmem_destroy();
+	cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
 dstmem_fail:
 	zswap_entry_cache_destroy();
 cache_fail:
-- 
cgit v1.2.3


From cab7a7e5b6c5c2638b00c72559ff9fb715583c98 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 27 Nov 2016 00:13:40 +0100
Subject: mm/zswap: Convert pool to hotplug state machine

Install the callbacks via the state machine. Multi state is used to address the
per-pool notifier. Uppon adding of the intance the callback is invoked for all
online CPUs so the manual init can go.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-mm@kvack.org
Cc: Seth Jennings <sjenning@redhat.com>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161126231350.10321-13-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h |  1 +
 mm/zswap.c                 | 99 ++++++++++++++++------------------------------
 2 files changed, 35 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 62f51a4e8676..c7d0d76ef0ee 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -66,6 +66,7 @@ enum cpuhp_state {
 	CPUHP_TRACE_RB_PREPARE,
 	CPUHP_MM_ZS_PREPARE,
 	CPUHP_MM_ZSWP_MEM_PREPARE,
+	CPUHP_MM_ZSWP_POOL_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
diff --git a/mm/zswap.c b/mm/zswap.c
index b13aa5706348..067a0d62f318 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -118,7 +118,7 @@ struct zswap_pool {
 	struct kref kref;
 	struct list_head list;
 	struct work_struct work;
-	struct notifier_block notifier;
+	struct hlist_node node;
 	char tfm_name[CRYPTO_MAX_ALG_NAME];
 };
 
@@ -376,77 +376,34 @@ static int zswap_dstmem_dead(unsigned int cpu)
 	return 0;
 }
 
-static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
-				     unsigned long action, unsigned long cpu)
+static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
 {
+	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
 	struct crypto_comp *tfm;
 
-	switch (action) {
-	case CPU_UP_PREPARE:
-		if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
-			break;
-		tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
-		if (IS_ERR_OR_NULL(tfm)) {
-			pr_err("could not alloc crypto comp %s : %ld\n",
-			       pool->tfm_name, PTR_ERR(tfm));
-			return NOTIFY_BAD;
-		}
-		*per_cpu_ptr(pool->tfm, cpu) = tfm;
-		break;
-	case CPU_DEAD:
-	case CPU_UP_CANCELED:
-		tfm = *per_cpu_ptr(pool->tfm, cpu);
-		if (!IS_ERR_OR_NULL(tfm))
-			crypto_free_comp(tfm);
-		*per_cpu_ptr(pool->tfm, cpu) = NULL;
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static int zswap_cpu_comp_notifier(struct notifier_block *nb,
-				   unsigned long action, void *pcpu)
-{
-	unsigned long cpu = (unsigned long)pcpu;
-	struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
-
-	return __zswap_cpu_comp_notifier(pool, action, cpu);
-}
+	if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
+		return 0;
 
-static int zswap_cpu_comp_init(struct zswap_pool *pool)
-{
-	unsigned long cpu;
-
-	memset(&pool->notifier, 0, sizeof(pool->notifier));
-	pool->notifier.notifier_call = zswap_cpu_comp_notifier;
-
-	cpu_notifier_register_begin();
-	for_each_online_cpu(cpu)
-		if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
-		    NOTIFY_BAD)
-			goto cleanup;
-	__register_cpu_notifier(&pool->notifier);
-	cpu_notifier_register_done();
+	tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
+	if (IS_ERR_OR_NULL(tfm)) {
+		pr_err("could not alloc crypto comp %s : %ld\n",
+		       pool->tfm_name, PTR_ERR(tfm));
+		return -ENOMEM;
+	}
+	*per_cpu_ptr(pool->tfm, cpu) = tfm;
 	return 0;
-
-cleanup:
-	for_each_online_cpu(cpu)
-		__zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
-	cpu_notifier_register_done();
-	return -ENOMEM;
 }
 
-static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
+static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
 {
-	unsigned long cpu;
+	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
+	struct crypto_comp *tfm;
 
-	cpu_notifier_register_begin();
-	for_each_online_cpu(cpu)
-		__zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
-	__unregister_cpu_notifier(&pool->notifier);
-	cpu_notifier_register_done();
+	tfm = *per_cpu_ptr(pool->tfm, cpu);
+	if (!IS_ERR_OR_NULL(tfm))
+		crypto_free_comp(tfm);
+	*per_cpu_ptr(pool->tfm, cpu) = NULL;
+	return 0;
 }
 
 /*********************************
@@ -527,6 +484,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 	struct zswap_pool *pool;
 	char name[38]; /* 'zswap' + 32 char (max) num + \0 */
 	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+	int ret;
 
 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
 	if (!pool) {
@@ -551,7 +509,9 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 		goto error;
 	}
 
-	if (zswap_cpu_comp_init(pool))
+	ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
+				       &pool->node);
+	if (ret)
 		goto error;
 	pr_debug("using %s compressor\n", pool->tfm_name);
 
@@ -605,7 +565,7 @@ static void zswap_pool_destroy(struct zswap_pool *pool)
 {
 	zswap_pool_debug("destroying", pool);
 
-	zswap_cpu_comp_destroy(pool);
+	cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
 	free_percpu(pool->tfm);
 	zpool_destroy_pool(pool->zpool);
 	kfree(pool);
@@ -1212,6 +1172,13 @@ static int __init init_zswap(void)
 		goto dstmem_fail;
 	}
 
+	ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
+				      "mm/zswap_pool:prepare",
+				      zswap_cpu_comp_prepare,
+				      zswap_cpu_comp_dead);
+	if (ret)
+		goto hp_fail;
+
 	pool = __zswap_pool_create_fallback();
 	if (!pool) {
 		pr_err("pool creation failed\n");
@@ -1228,6 +1195,8 @@ static int __init init_zswap(void)
 	return 0;
 
 pool_fail:
+	cpuhp_remove_state_nocalls(CPUHP_MM_ZSWP_POOL_PREPARE);
+hp_fail:
 	cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
 dstmem_fail:
 	zswap_entry_cache_destroy();
-- 
cgit v1.2.3


From 21647615db288d9dacad0de6a5df846b39d51bea Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Sun, 27 Nov 2016 00:13:41 +0100
Subject: iommu/vt-d: Convert to hotplug state machine

Install the callbacks via the state machine.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: iommu@lists.linux-foundation.org
Cc: rt@linutronix.de
Cc: David Woodhouse <dwmw2@infradead.org>
Link: http://lkml.kernel.org/r/20161126231350.10321-14-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/iommu/intel-iommu.c | 24 ++++++------------------
 include/linux/cpuhotplug.h  |  1 +
 2 files changed, 7 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a4407eabf0e6..fd7962560e56 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4665,25 +4665,13 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
 	}
 }
 
-static int intel_iommu_cpu_notifier(struct notifier_block *nfb,
-				    unsigned long action, void *v)
+static int intel_iommu_cpu_dead(unsigned int cpu)
 {
-	unsigned int cpu = (unsigned long)v;
-
-	switch (action) {
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		free_all_cpu_cached_iovas(cpu);
-		flush_unmaps_timeout(cpu);
-		break;
-	}
-	return NOTIFY_OK;
+	free_all_cpu_cached_iovas(cpu);
+	flush_unmaps_timeout(cpu);
+	return 0;
 }
 
-static struct notifier_block intel_iommu_cpu_nb = {
-	.notifier_call = intel_iommu_cpu_notifier,
-};
-
 static ssize_t intel_iommu_show_version(struct device *dev,
 					struct device_attribute *attr,
 					char *buf)
@@ -4832,8 +4820,8 @@ int __init intel_iommu_init(void)
 	bus_register_notifier(&pci_bus_type, &device_nb);
 	if (si_domain && !hw_pass_through)
 		register_memory_notifier(&intel_iommu_memory_nb);
-	register_hotcpu_notifier(&intel_iommu_cpu_nb);
-
+	cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
+			  intel_iommu_cpu_dead);
 	intel_iommu_enabled = 1;
 
 	return 0;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index c7d0d76ef0ee..853f8176594d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -40,6 +40,7 @@ enum cpuhp_state {
 	CPUHP_PAGE_ALLOC_DEAD,
 	CPUHP_NET_DEV_DEAD,
 	CPUHP_PCI_XGENE_DEAD,
+	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-- 
cgit v1.2.3


From 3f7cd919f3df05918535de39273174710409eb40 Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Sun, 27 Nov 2016 00:13:45 +0100
Subject: KVM/PPC/Book3S HV: Convert to hotplug state machine
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Install the callbacks via the state machine.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: kvm@vger.kernel.org
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: kvm-ppc@vger.kernel.org
Cc: Paul Mackerras <paulus@samba.org>
Cc: rt@linutronix.de
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alexander Graf <agraf@suse.com>
Link: http://lkml.kernel.org/r/20161126231350.10321-18-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/powerpc/kvm/book3s_hv.c | 48 ++++++++++----------------------------------
 include/linux/cpuhotplug.h   |  1 +
 2 files changed, 12 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3686471be32b..39ef1f4a7b02 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2254,12 +2254,12 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
  * enter the guest. Only do this if it is the primary thread of the
  * core (not if a subcore) that is entering the guest.
  */
-static inline void kvmppc_clear_host_core(int cpu)
+static inline int kvmppc_clear_host_core(unsigned int cpu)
 {
 	int core;
 
 	if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
-		return;
+		return 0;
 	/*
 	 * Memory barrier can be omitted here as we will do a smp_wmb()
 	 * later in kvmppc_start_thread and we need ensure that state is
@@ -2267,6 +2267,7 @@ static inline void kvmppc_clear_host_core(int cpu)
 	 */
 	core = cpu >> threads_shift;
 	kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
+	return 0;
 }
 
 /*
@@ -2274,12 +2275,12 @@ static inline void kvmppc_clear_host_core(int cpu)
  * Only need to do this if it is the primary thread of the core that is
  * exiting.
  */
-static inline void kvmppc_set_host_core(int cpu)
+static inline int kvmppc_set_host_core(unsigned int cpu)
 {
 	int core;
 
 	if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
-		return;
+		return 0;
 
 	/*
 	 * Memory barrier can be omitted here because we do a spin_unlock
@@ -2287,6 +2288,7 @@ static inline void kvmppc_set_host_core(int cpu)
 	 */
 	core = cpu >> threads_shift;
 	kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
+	return 0;
 }
 
 /*
@@ -3094,36 +3096,6 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 }
 
 #ifdef CONFIG_KVM_XICS
-static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
-			void *hcpu)
-{
-	unsigned long cpu = (long)hcpu;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		kvmppc_set_host_core(cpu);
-		break;
-
-#ifdef CONFIG_HOTPLUG_CPU
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-		kvmppc_clear_host_core(cpu);
-		break;
-#endif
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block kvmppc_cpu_notifier = {
-	    .notifier_call = kvmppc_cpu_notify,
-};
-
 /*
  * Allocate a per-core structure for managing state about which cores are
  * running in the host versus the guest and for exchanging data between
@@ -3185,15 +3157,17 @@ void kvmppc_alloc_host_rm_ops(void)
 		return;
 	}
 
-	register_cpu_notifier(&kvmppc_cpu_notifier);
-
+	cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+				  "ppc/kvm_book3s:prepare",
+				  kvmppc_set_host_core,
+				  kvmppc_clear_host_core);
 	put_online_cpus();
 }
 
 void kvmppc_free_host_rm_ops(void)
 {
 	if (kvmppc_host_rm_ops_hv) {
-		unregister_cpu_notifier(&kvmppc_cpu_notifier);
+		cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE);
 		kfree(kvmppc_host_rm_ops_hv->rm_core);
 		kfree(kvmppc_host_rm_ops_hv);
 		kvmppc_host_rm_ops_hv = NULL;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 853f8176594d..71c6822dd5be 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -68,6 +68,7 @@ enum cpuhp_state {
 	CPUHP_MM_ZS_PREPARE,
 	CPUHP_MM_ZSWP_MEM_PREPARE,
 	CPUHP_MM_ZSWP_POOL_PREPARE,
+	CPUHP_KVM_PPC_BOOK3S_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
-- 
cgit v1.2.3


From 1dd6c834fa4a75a86fecefb6d1f1525f1cb755c7 Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Sun, 27 Nov 2016 00:13:46 +0100
Subject: zram: Convert to hotplug state machine

Install the callbacks via the state machine with multi instance support and let
the core invoke the callbacks on the already online CPUs.

[bigeasy: wire up the multi instance stuff]
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: rt@linutronix.de
Cc: Nitin Gupta <ngupta@vflare.org>
Link: http://lkml.kernel.org/r/20161126231350.10321-19-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/block/zram/zcomp.c    | 76 ++++++++++++++-----------------------------
 drivers/block/zram/zcomp.h    |  5 +--
 drivers/block/zram/zram_drv.c |  9 +++++
 include/linux/cpuhotplug.h    |  1 +
 4 files changed, 38 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 4b5cd3a7b2b6..12046f4f00e4 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -160,82 +160,56 @@ int zcomp_decompress(struct zcomp_strm *zstrm,
 			dst, &dst_len);
 }
 
-static int __zcomp_cpu_notifier(struct zcomp *comp,
-		unsigned long action, unsigned long cpu)
+int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
 {
+	struct zcomp *comp = hlist_entry(node, struct zcomp, node);
 	struct zcomp_strm *zstrm;
 
-	switch (action) {
-	case CPU_UP_PREPARE:
-		if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
-			break;
-		zstrm = zcomp_strm_alloc(comp);
-		if (IS_ERR_OR_NULL(zstrm)) {
-			pr_err("Can't allocate a compression stream\n");
-			return NOTIFY_BAD;
-		}
-		*per_cpu_ptr(comp->stream, cpu) = zstrm;
-		break;
-	case CPU_DEAD:
-	case CPU_UP_CANCELED:
-		zstrm = *per_cpu_ptr(comp->stream, cpu);
-		if (!IS_ERR_OR_NULL(zstrm))
-			zcomp_strm_free(zstrm);
-		*per_cpu_ptr(comp->stream, cpu) = NULL;
-		break;
-	default:
-		break;
+	if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
+		return 0;
+
+	zstrm = zcomp_strm_alloc(comp);
+	if (IS_ERR_OR_NULL(zstrm)) {
+		pr_err("Can't allocate a compression stream\n");
+		return -ENOMEM;
 	}
-	return NOTIFY_OK;
+	*per_cpu_ptr(comp->stream, cpu) = zstrm;
+	return 0;
 }
 
-static int zcomp_cpu_notifier(struct notifier_block *nb,
-		unsigned long action, void *pcpu)
+int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node)
 {
-	unsigned long cpu = (unsigned long)pcpu;
-	struct zcomp *comp = container_of(nb, typeof(*comp), notifier);
+	struct zcomp *comp = hlist_entry(node, struct zcomp, node);
+	struct zcomp_strm *zstrm;
 
-	return __zcomp_cpu_notifier(comp, action, cpu);
+	zstrm = *per_cpu_ptr(comp->stream, cpu);
+	if (!IS_ERR_OR_NULL(zstrm))
+		zcomp_strm_free(zstrm);
+	*per_cpu_ptr(comp->stream, cpu) = NULL;
+	return 0;
 }
 
 static int zcomp_init(struct zcomp *comp)
 {
-	unsigned long cpu;
 	int ret;
 
-	comp->notifier.notifier_call = zcomp_cpu_notifier;
-
 	comp->stream = alloc_percpu(struct zcomp_strm *);
 	if (!comp->stream)
 		return -ENOMEM;
 
-	cpu_notifier_register_begin();
-	for_each_online_cpu(cpu) {
-		ret = __zcomp_cpu_notifier(comp, CPU_UP_PREPARE, cpu);
-		if (ret == NOTIFY_BAD)
-			goto cleanup;
-	}
-	__register_cpu_notifier(&comp->notifier);
-	cpu_notifier_register_done();
+	ret = cpuhp_state_add_instance(CPUHP_ZCOMP_PREPARE, &comp->node);
+	if (ret < 0)
+		goto cleanup;
 	return 0;
 
 cleanup:
-	for_each_online_cpu(cpu)
-		__zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
-	cpu_notifier_register_done();
-	return -ENOMEM;
+	free_percpu(comp->stream);
+	return ret;
 }
 
 void zcomp_destroy(struct zcomp *comp)
 {
-	unsigned long cpu;
-
-	cpu_notifier_register_begin();
-	for_each_online_cpu(cpu)
-		__zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
-	__unregister_cpu_notifier(&comp->notifier);
-	cpu_notifier_register_done();
-
+	cpuhp_state_remove_instance(CPUHP_ZCOMP_PREPARE, &comp->node);
 	free_percpu(comp->stream);
 	kfree(comp);
 }
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index 478cac2ed465..41c1002a7d7d 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -19,11 +19,12 @@ struct zcomp_strm {
 /* dynamic per-device compression frontend */
 struct zcomp {
 	struct zcomp_strm * __percpu *stream;
-	struct notifier_block notifier;
-
 	const char *name;
+	struct hlist_node node;
 };
 
+int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node);
+int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node);
 ssize_t zcomp_available_show(const char *comp, char *buf);
 bool zcomp_available_algorithm(const char *comp);
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 04365b17ee67..511c35fd5c6f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -30,6 +30,7 @@
 #include <linux/err.h>
 #include <linux/idr.h>
 #include <linux/sysfs.h>
+#include <linux/cpuhotplug.h>
 
 #include "zram_drv.h"
 
@@ -1436,15 +1437,22 @@ static void destroy_devices(void)
 	idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
 	idr_destroy(&zram_index_idr);
 	unregister_blkdev(zram_major, "zram");
+	cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
 }
 
 static int __init zram_init(void)
 {
 	int ret;
 
+	ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
+				      zcomp_cpu_up_prepare, zcomp_cpu_dead);
+	if (ret < 0)
+		return ret;
+
 	ret = class_register(&zram_control_class);
 	if (ret) {
 		pr_err("Unable to register zram-control class\n");
+		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
 		return ret;
 	}
 
@@ -1452,6 +1460,7 @@ static int __init zram_init(void)
 	if (zram_major <= 0) {
 		pr_err("Unable to get major number\n");
 		class_unregister(&zram_control_class);
+		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
 		return -EBUSY;
 	}
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 71c6822dd5be..22acee76cf4c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -69,6 +69,7 @@ enum cpuhp_state {
 	CPUHP_MM_ZSWP_MEM_PREPARE,
 	CPUHP_MM_ZSWP_POOL_PREPARE,
 	CPUHP_KVM_PPC_BOOK3S_PREPARE,
+	CPUHP_ZCOMP_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
-- 
cgit v1.2.3


From 1c1b522808a18402f043c1418b4e48c7355480cc Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 30 Nov 2016 17:59:37 +0200
Subject: net/mlx5e: Implement Fragmented Work Queue (WQ)

Add new type of struct mlx5_frag_buf which is used to allocate fragmented
buffers rather than contiguous, and make the Completion Queues (CQs) use
it as they are big (default of 2MB per CQ in Striding RQ).

This fixes the failures of type:
"mlx5e_open_locked: mlx5e_open_channels failed, -12"
due to dma_zalloc_coherent insufficient contiguous coherent memory to
satisfy the driver's request when the user tries to setup more or larger
rings.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/alloc.c   | 66 +++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 ++--
 drivers/net/ethernet/mellanox/mlx5/core/wq.c      | 26 ++++++---
 drivers/net/ethernet/mellanox/mlx5/core/wq.h      | 18 +++++--
 include/linux/mlx5/driver.h                       | 11 ++++
 6 files changed, 116 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 2c6e3c7b7417..44791de5afe6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -106,6 +106,63 @@ void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
 }
 EXPORT_SYMBOL_GPL(mlx5_buf_free);
 
+int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+			     struct mlx5_frag_buf *buf, int node)
+{
+	int i;
+
+	buf->size = size;
+	buf->npages = 1 << get_order(size);
+	buf->page_shift = PAGE_SHIFT;
+	buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list),
+			     GFP_KERNEL);
+	if (!buf->frags)
+		goto err_out;
+
+	for (i = 0; i < buf->npages; i++) {
+		struct mlx5_buf_list *frag = &buf->frags[i];
+		int frag_sz = min_t(int, size, PAGE_SIZE);
+
+		frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz,
+							  &frag->map, node);
+		if (!frag->buf)
+			goto err_free_buf;
+		if (frag->map & ((1 << buf->page_shift) - 1)) {
+			dma_free_coherent(&dev->pdev->dev, frag_sz,
+					  buf->frags[i].buf, buf->frags[i].map);
+			mlx5_core_warn(dev, "unexpected map alignment: 0x%p, page_shift=%d\n",
+				       (void *)frag->map, buf->page_shift);
+			goto err_free_buf;
+		}
+		size -= frag_sz;
+	}
+
+	return 0;
+
+err_free_buf:
+	while (i--)
+		dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->frags[i].buf,
+				  buf->frags[i].map);
+	kfree(buf->frags);
+err_out:
+	return -ENOMEM;
+}
+
+void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
+{
+	int size = buf->size;
+	int i;
+
+	for (i = 0; i < buf->npages; i++) {
+		int frag_sz = min_t(int, size, PAGE_SIZE);
+
+		dma_free_coherent(&dev->pdev->dev, frag_sz, buf->frags[i].buf,
+				  buf->frags[i].map);
+		size -= frag_sz;
+	}
+	kfree(buf->frags);
+}
+
 static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
 						 int node)
 {
@@ -230,3 +287,12 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
 	}
 }
 EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
+
+void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas)
+{
+	int i;
+
+	for (i = 0; i < buf->npages; i++)
+		pas[i] = cpu_to_be64(buf->frags[i].map);
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 442dbc3e6be4..f16f7fbd2044 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -286,7 +286,7 @@ struct mlx5e_cq {
 	u16                        decmprs_wqe_counter;
 
 	/* control */
-	struct mlx5_wq_ctrl        wq_ctrl;
+	struct mlx5_frag_wq_ctrl   wq_ctrl;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_rq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6b492ca17d7e..ba25cd361bb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1201,7 +1201,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c,
 
 static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
 {
-	mlx5_wq_destroy(&cq->wq_ctrl);
+	mlx5_cqwq_destroy(&cq->wq_ctrl);
 }
 
 static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
@@ -1218,7 +1218,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
-		sizeof(u64) * cq->wq_ctrl.buf.npages;
+		sizeof(u64) * cq->wq_ctrl.frag_buf.npages;
 	in = mlx5_vzalloc(inlen);
 	if (!in)
 		return -ENOMEM;
@@ -1227,15 +1227,15 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 
 	memcpy(cqc, param->cqc, sizeof(param->cqc));
 
-	mlx5_fill_page_array(&cq->wq_ctrl.buf,
-			     (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+	mlx5_fill_page_frag_array(&cq->wq_ctrl.frag_buf,
+				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
 
 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
 
 	MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
 	MLX5_SET(cqc,   cqc, c_eqn,         eqn);
 	MLX5_SET(cqc,   cqc, uar_page,      mcq->uar->index);
-	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift -
 					    MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 821a087c7ae2..921673c42bc9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -101,13 +101,15 @@ err_db_free:
 
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
-		     struct mlx5_wq_ctrl *wq_ctrl)
+		     struct mlx5_frag_wq_ctrl *wq_ctrl)
 {
 	int err;
 
-	wq->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz);
-	wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size);
-	wq->sz_m1 = (1 << wq->log_sz) - 1;
+	wq->log_stride	= 6 + MLX5_GET(cqc, cqc, cqe_sz);
+	wq->log_sz	= MLX5_GET(cqc, cqc, log_cq_size);
+	wq->sz_m1	= (1 << wq->log_sz) - 1;
+	wq->log_frag_strides = PAGE_SHIFT - wq->log_stride;
+	wq->frag_sz_m1	= (1 << wq->log_frag_strides) - 1;
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -115,14 +117,16 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		return err;
 	}
 
-	err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
-				  &wq_ctrl->buf, param->buf_numa_node);
+	err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+				       &wq_ctrl->frag_buf,
+				       param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n",
+			       err);
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.direct.buf;
+	wq->frag_buf = wq_ctrl->frag_buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -184,3 +188,9 @@ void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
 	mlx5_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
 	mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
 }
+
+void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl)
+{
+	mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->frag_buf);
+	mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 6c2a8f95093c..d8afed898c31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -47,6 +47,12 @@ struct mlx5_wq_ctrl {
 	struct mlx5_db		db;
 };
 
+struct mlx5_frag_wq_ctrl {
+	struct mlx5_core_dev	*mdev;
+	struct mlx5_frag_buf	frag_buf;
+	struct mlx5_db		db;
+};
+
 struct mlx5_wq_cyc {
 	void			*buf;
 	__be32			*db;
@@ -55,12 +61,14 @@ struct mlx5_wq_cyc {
 };
 
 struct mlx5_cqwq {
-	void			*buf;
+	struct mlx5_frag_buf	frag_buf;
 	__be32			*db;
 	u32			sz_m1;
+	u32			frag_sz_m1;
 	u32			cc; /* consumer counter */
 	u8			log_sz;
 	u8			log_stride;
+	u8			log_frag_strides;
 };
 
 struct mlx5_wq_ll {
@@ -81,7 +89,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
-		     struct mlx5_wq_ctrl *wq_ctrl);
+		     struct mlx5_frag_wq_ctrl *wq_ctrl);
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
 
 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
@@ -90,6 +98,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq);
 
 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
+void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl);
 
 static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 {
@@ -116,7 +125,10 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
 
 static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
 {
-	return wq->buf + (ix << wq->log_stride);
+	unsigned int frag = (ix >> wq->log_frag_strides);
+
+	return wq->frag_buf.frags[frag].buf +
+		((wq->frag_sz_m1 & ix) << wq->log_stride);
 }
 
 static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 68b85efc3908..0ae55361e674 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -318,6 +318,13 @@ struct mlx5_buf {
 	u8			page_shift;
 };
 
+struct mlx5_frag_buf {
+	struct mlx5_buf_list	*frags;
+	int			npages;
+	int			size;
+	u8			page_shift;
+};
+
 struct mlx5_eq_tasklet {
 	struct list_head list;
 	struct list_head process_list;
@@ -822,6 +829,9 @@ int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 			struct mlx5_buf *buf, int node);
 int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
 void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+			     struct mlx5_frag_buf *buf, int node);
+void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
 struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
 						      gfp_t flags, int npages);
 void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
@@ -866,6 +876,7 @@ void mlx5_unregister_debugfs(void);
 int mlx5_eq_init(struct mlx5_core_dev *dev);
 void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
+void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-- 
cgit v1.2.3


From 3a0af8fd61f90920f6fa04e4f1e9a6a73c1b4fd2 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 30 Nov 2016 17:10:10 +0100
Subject: bpf: BPF for lightweight tunnel infrastructure

Registers new BPF program types which correspond to the LWT hooks:
  - BPF_PROG_TYPE_LWT_IN   => dst_input()
  - BPF_PROG_TYPE_LWT_OUT  => dst_output()
  - BPF_PROG_TYPE_LWT_XMIT => lwtunnel_xmit()

The separate program types are required to differentiate between the
capabilities each LWT hook allows:

 * Programs attached to dst_input() or dst_output() are restricted and
   may only read the data of an skb. This prevent modification and
   possible invalidation of already validated packet headers on receive
   and the construction of illegal headers while the IP headers are
   still being assembled.

 * Programs attached to lwtunnel_xmit() are allowed to modify packet
   content as well as prepending an L2 header via a newly introduced
   helper bpf_skb_change_head(). This is safe as lwtunnel_xmit() is
   invoked after the IP header has been assembled completely.

All BPF programs receive an skb with L3 headers attached and may return
one of the following error codes:

 BPF_OK - Continue routing as per nexthop
 BPF_DROP - Drop skb and return EPERM
 BPF_REDIRECT - Redirect skb to device as per redirect() helper.
                (Only valid in lwtunnel_xmit() context)

The return codes are binary compatible with their TC_ACT_
relatives to ease compatibility.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h        |   2 +-
 include/uapi/linux/bpf.h      |  32 +++-
 include/uapi/linux/lwtunnel.h |  23 +++
 kernel/bpf/verifier.c         |  14 +-
 net/Kconfig                   |   8 +
 net/core/Makefile             |   1 +
 net/core/filter.c             | 173 ++++++++++++++++++
 net/core/lwt_bpf.c            | 396 ++++++++++++++++++++++++++++++++++++++++++
 net/core/lwtunnel.c           |   2 +
 9 files changed, 646 insertions(+), 5 deletions(-)
 create mode 100644 net/core/lwt_bpf.c

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7f246a281435..7ba644626553 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -438,7 +438,7 @@ struct xdp_buff {
 };
 
 /* compute the linear packet data range [data, data_end) which
- * will be accessed by cls_bpf and act_bpf programs
+ * will be accessed by cls_bpf, act_bpf and lwt programs
  */
 static inline void bpf_compute_data_end(struct sk_buff *skb)
 {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1370a9d1456f..22ac82792687 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -101,6 +101,9 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_XDP,
 	BPF_PROG_TYPE_PERF_EVENT,
 	BPF_PROG_TYPE_CGROUP_SKB,
+	BPF_PROG_TYPE_LWT_IN,
+	BPF_PROG_TYPE_LWT_OUT,
+	BPF_PROG_TYPE_LWT_XMIT,
 };
 
 enum bpf_attach_type {
@@ -409,6 +412,16 @@ union bpf_attr {
  *
  * int bpf_get_numa_node_id()
  *     Return: Id of current NUMA node.
+ *
+ * int bpf_skb_change_head()
+ *     Grows headroom of skb and adjusts MAC header offset accordingly.
+ *     Will extends/reallocae as required automatically.
+ *     May change skb data pointer and will thus invalidate any check
+ *     performed for direct packet access.
+ *     @skb: pointer to skb
+ *     @len: length of header to be pushed in front
+ *     @flags: Flags (unused for now)
+ *     Return: 0 on success or negative error
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -453,7 +466,8 @@ union bpf_attr {
 	FN(skb_pull_data),		\
 	FN(csum_update),		\
 	FN(set_hash_invalid),		\
-	FN(get_numa_node_id),
+	FN(get_numa_node_id),		\
+	FN(skb_change_head),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -537,6 +551,22 @@ struct bpf_tunnel_key {
 	__u32 tunnel_label;
 };
 
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+	BPF_OK = 0,
+	/* 1 reserved */
+	BPF_DROP = 2,
+	/* 3-6 reserved */
+	BPF_REDIRECT = 7,
+	/* >127 are reserved for prog type specific return codes */
+};
+
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
  * return codes are reserved for future use. Unknown return codes will result
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 453cc6215bfd..92724cba1eba 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -10,6 +10,7 @@ enum lwtunnel_encap_types {
 	LWTUNNEL_ENCAP_ILA,
 	LWTUNNEL_ENCAP_IP6,
 	LWTUNNEL_ENCAP_SEG6,
+	LWTUNNEL_ENCAP_BPF,
 	__LWTUNNEL_ENCAP_MAX,
 };
 
@@ -43,4 +44,26 @@ enum lwtunnel_ip6_t {
 
 #define LWTUNNEL_IP6_MAX (__LWTUNNEL_IP6_MAX - 1)
 
+enum {
+	LWT_BPF_PROG_UNSPEC,
+	LWT_BPF_PROG_FD,
+	LWT_BPF_PROG_NAME,
+	__LWT_BPF_PROG_MAX,
+};
+
+#define LWT_BPF_PROG_MAX (__LWT_BPF_PROG_MAX - 1)
+
+enum {
+	LWT_BPF_UNSPEC,
+	LWT_BPF_IN,
+	LWT_BPF_OUT,
+	LWT_BPF_XMIT,
+	LWT_BPF_XMIT_HEADROOM,
+	__LWT_BPF_MAX,
+};
+
+#define LWT_BPF_MAX (__LWT_BPF_MAX - 1)
+
+#define LWT_BPF_MAX_HEADROOM 256
+
 #endif /* _UAPI_LWTUNNEL_H_ */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8740c5fa02fc..8135cb1077ee 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -633,12 +633,19 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 #define MAX_PACKET_OFF 0xffff
 
 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
-				       const struct bpf_call_arg_meta *meta)
+				       const struct bpf_call_arg_meta *meta,
+				       enum bpf_access_type t)
 {
 	switch (env->prog->type) {
+	case BPF_PROG_TYPE_LWT_IN:
+	case BPF_PROG_TYPE_LWT_OUT:
+		/* dst_input() and dst_output() can't write for now */
+		if (t == BPF_WRITE)
+			return false;
 	case BPF_PROG_TYPE_SCHED_CLS:
 	case BPF_PROG_TYPE_SCHED_ACT:
 	case BPF_PROG_TYPE_XDP:
+	case BPF_PROG_TYPE_LWT_XMIT:
 		if (meta)
 			return meta->pkt_access;
 
@@ -837,7 +844,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 			err = check_stack_read(state, off, size, value_regno);
 		}
 	} else if (state->regs[regno].type == PTR_TO_PACKET) {
-		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
+		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
 			verbose("cannot write into packet\n");
 			return -EACCES;
 		}
@@ -970,7 +977,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		return 0;
 	}
 
-	if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
+	if (type == PTR_TO_PACKET &&
+	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
 		verbose("helper access to the packet is not allowed\n");
 		return -EACCES;
 	}
diff --git a/net/Kconfig b/net/Kconfig
index 7b6cd340b72b..a1005007224c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -402,6 +402,14 @@ config LWTUNNEL
 	  weight tunnel endpoint. Tunnel encapsulation parameters are stored
 	  with light weight tunnel state associated with fib routes.
 
+config LWTUNNEL_BPF
+	bool "Execute BPF program as route nexthop action"
+	depends on LWTUNNEL
+	default y if LWTUNNEL=y
+	---help---
+	  Allows to run BPF programs as a nexthop action following a route
+	  lookup for incoming and outgoing packets.
+
 config DST_CACHE
 	bool
 	default n
diff --git a/net/core/Makefile b/net/core/Makefile
index d6508c2ddca5..f6761b6e3b29 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
+obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
 obj-$(CONFIG_DST_CACHE) += dst_cache.o
 obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
diff --git a/net/core/filter.c b/net/core/filter.c
index 698a262b8ebb..1c4d0faf22c8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1689,6 +1689,12 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
 static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
 				 u32 flags)
 {
+	/* Verify that a link layer header is carried */
+	if (unlikely(skb->mac_header >= skb->network_header)) {
+		kfree_skb(skb);
+		return -ERANGE;
+	}
+
 	bpf_push_mac_rcsum(skb);
 	return flags & BPF_F_INGRESS ?
 	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
@@ -2188,12 +2194,53 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+	   u64, flags)
+{
+	u32 max_len = __bpf_skb_max_len(skb);
+	u32 new_len = skb->len + head_room;
+	int ret;
+
+	if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
+		     new_len < skb->len))
+		return -EINVAL;
+
+	ret = skb_cow(skb, head_room);
+	if (likely(!ret)) {
+		/* Idea for this helper is that we currently only
+		 * allow to expand on mac header. This means that
+		 * skb->protocol network header, etc, stay as is.
+		 * Compared to bpf_skb_change_tail(), we're more
+		 * flexible due to not needing to linearize or
+		 * reset GSO. Intention for this helper is to be
+		 * used by an L3 skb that needs to push mac header
+		 * for redirection into L2 device.
+		 */
+		__skb_push(skb, head_room);
+		memset(skb->data, 0, head_room);
+		skb_reset_mac_header(skb);
+	}
+
+	bpf_compute_data_end(skb);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_change_head_proto = {
+	.func		= bpf_skb_change_head,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_skb_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
 	    func == bpf_skb_vlan_pop ||
 	    func == bpf_skb_store_bytes ||
 	    func == bpf_skb_change_proto ||
+	    func == bpf_skb_change_head ||
 	    func == bpf_skb_change_tail ||
 	    func == bpf_skb_pull_data ||
 	    func == bpf_l3_csum_replace ||
@@ -2639,6 +2686,68 @@ cg_skb_func_proto(enum bpf_func_id func_id)
 	}
 }
 
+static const struct bpf_func_proto *
+lwt_inout_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_load_bytes:
+		return &bpf_skb_load_bytes_proto;
+	case BPF_FUNC_skb_pull_data:
+		return &bpf_skb_pull_data_proto;
+	case BPF_FUNC_csum_diff:
+		return &bpf_csum_diff_proto;
+	case BPF_FUNC_get_cgroup_classid:
+		return &bpf_get_cgroup_classid_proto;
+	case BPF_FUNC_get_route_realm:
+		return &bpf_get_route_realm_proto;
+	case BPF_FUNC_get_hash_recalc:
+		return &bpf_get_hash_recalc_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_skb_event_output_proto;
+	case BPF_FUNC_get_smp_processor_id:
+		return &bpf_get_smp_processor_id_proto;
+	case BPF_FUNC_skb_under_cgroup:
+		return &bpf_skb_under_cgroup_proto;
+	default:
+		return sk_filter_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *
+lwt_xmit_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_get_tunnel_key:
+		return &bpf_skb_get_tunnel_key_proto;
+	case BPF_FUNC_skb_set_tunnel_key:
+		return bpf_get_skb_set_tunnel_proto(func_id);
+	case BPF_FUNC_skb_get_tunnel_opt:
+		return &bpf_skb_get_tunnel_opt_proto;
+	case BPF_FUNC_skb_set_tunnel_opt:
+		return bpf_get_skb_set_tunnel_proto(func_id);
+	case BPF_FUNC_redirect:
+		return &bpf_redirect_proto;
+	case BPF_FUNC_clone_redirect:
+		return &bpf_clone_redirect_proto;
+	case BPF_FUNC_skb_change_tail:
+		return &bpf_skb_change_tail_proto;
+	case BPF_FUNC_skb_change_head:
+		return &bpf_skb_change_head_proto;
+	case BPF_FUNC_skb_store_bytes:
+		return &bpf_skb_store_bytes_proto;
+	case BPF_FUNC_csum_update:
+		return &bpf_csum_update_proto;
+	case BPF_FUNC_l3_csum_replace:
+		return &bpf_l3_csum_replace_proto;
+	case BPF_FUNC_l4_csum_replace:
+		return &bpf_l4_csum_replace_proto;
+	case BPF_FUNC_set_hash_invalid:
+		return &bpf_set_hash_invalid_proto;
+	default:
+		return lwt_inout_func_proto(func_id);
+	}
+}
+
 static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 {
 	if (off < 0 || off >= sizeof(struct __sk_buff))
@@ -2676,6 +2785,39 @@ static bool sk_filter_is_valid_access(int off, int size,
 	return __is_valid_access(off, size, type);
 }
 
+static bool lwt_is_valid_access(int off, int size,
+				enum bpf_access_type type,
+				enum bpf_reg_type *reg_type)
+{
+	switch (off) {
+	case offsetof(struct __sk_buff, tc_classid):
+		return false;
+	}
+
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct __sk_buff, mark):
+		case offsetof(struct __sk_buff, priority):
+		case offsetof(struct __sk_buff, cb[0]) ...
+		     offsetof(struct __sk_buff, cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	switch (off) {
+	case offsetof(struct __sk_buff, data):
+		*reg_type = PTR_TO_PACKET;
+		break;
+	case offsetof(struct __sk_buff, data_end):
+		*reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return __is_valid_access(off, size, type);
+}
+
 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 			       const struct bpf_prog *prog)
 {
@@ -3007,6 +3149,19 @@ static const struct bpf_verifier_ops cg_skb_ops = {
 	.convert_ctx_access	= sk_filter_convert_ctx_access,
 };
 
+static const struct bpf_verifier_ops lwt_inout_ops = {
+	.get_func_proto		= lwt_inout_func_proto,
+	.is_valid_access	= lwt_is_valid_access,
+	.convert_ctx_access	= sk_filter_convert_ctx_access,
+};
+
+static const struct bpf_verifier_ops lwt_xmit_ops = {
+	.get_func_proto		= lwt_xmit_func_proto,
+	.is_valid_access	= lwt_is_valid_access,
+	.convert_ctx_access	= sk_filter_convert_ctx_access,
+	.gen_prologue		= tc_cls_act_prologue,
+};
+
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
 	.ops	= &sk_filter_ops,
 	.type	= BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3032,6 +3187,21 @@ static struct bpf_prog_type_list cg_skb_type __read_mostly = {
 	.type	= BPF_PROG_TYPE_CGROUP_SKB,
 };
 
+static struct bpf_prog_type_list lwt_in_type __read_mostly = {
+	.ops	= &lwt_inout_ops,
+	.type	= BPF_PROG_TYPE_LWT_IN,
+};
+
+static struct bpf_prog_type_list lwt_out_type __read_mostly = {
+	.ops	= &lwt_inout_ops,
+	.type	= BPF_PROG_TYPE_LWT_OUT,
+};
+
+static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
+	.ops	= &lwt_xmit_ops,
+	.type	= BPF_PROG_TYPE_LWT_XMIT,
+};
+
 static int __init register_sk_filter_ops(void)
 {
 	bpf_register_prog_type(&sk_filter_type);
@@ -3039,6 +3209,9 @@ static int __init register_sk_filter_ops(void)
 	bpf_register_prog_type(&sched_act_type);
 	bpf_register_prog_type(&xdp_type);
 	bpf_register_prog_type(&cg_skb_type);
+	bpf_register_prog_type(&lwt_in_type);
+	bpf_register_prog_type(&lwt_out_type);
+	bpf_register_prog_type(&lwt_xmit_type);
 
 	return 0;
 }
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
new file mode 100644
index 000000000000..71bb3e2eca08
--- /dev/null
+++ b/net/core/lwt_bpf.c
@@ -0,0 +1,396 @@
+/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <net/lwtunnel.h>
+
+struct bpf_lwt_prog {
+	struct bpf_prog *prog;
+	char *name;
+};
+
+struct bpf_lwt {
+	struct bpf_lwt_prog in;
+	struct bpf_lwt_prog out;
+	struct bpf_lwt_prog xmit;
+	int family;
+};
+
+#define MAX_PROG_NAME 256
+
+static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt)
+{
+	return (struct bpf_lwt *)lwt->data;
+}
+
+#define NO_REDIRECT false
+#define CAN_REDIRECT true
+
+static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
+		       struct dst_entry *dst, bool can_redirect)
+{
+	int ret;
+
+	/* Preempt disable is needed to protect per-cpu redirect_info between
+	 * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and
+	 * access to maps strictly require a rcu_read_lock() for protection,
+	 * mixing with BH RCU lock doesn't work.
+	 */
+	preempt_disable();
+	rcu_read_lock();
+	bpf_compute_data_end(skb);
+	ret = bpf_prog_run_save_cb(lwt->prog, skb);
+	rcu_read_unlock();
+
+	switch (ret) {
+	case BPF_OK:
+		break;
+
+	case BPF_REDIRECT:
+		if (unlikely(!can_redirect)) {
+			pr_warn_once("Illegal redirect return code in prog %s\n",
+				     lwt->name ? : "<unknown>");
+			ret = BPF_OK;
+		} else {
+			ret = skb_do_redirect(skb);
+			if (ret == 0)
+				ret = BPF_REDIRECT;
+		}
+		break;
+
+	case BPF_DROP:
+		kfree_skb(skb);
+		ret = -EPERM;
+		break;
+
+	default:
+		pr_warn_once("bpf-lwt: Illegal return value %u, expect packet loss\n", ret);
+		kfree_skb(skb);
+		ret = -EINVAL;
+		break;
+	}
+
+	preempt_enable();
+
+	return ret;
+}
+
+static int bpf_input(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct bpf_lwt *bpf;
+	int ret;
+
+	bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+	if (bpf->in.prog) {
+		ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (unlikely(!dst->lwtstate->orig_input)) {
+		pr_warn_once("orig_input not set on dst for prog %s\n",
+			     bpf->out.name);
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return dst->lwtstate->orig_input(skb);
+}
+
+static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct bpf_lwt *bpf;
+	int ret;
+
+	bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+	if (bpf->out.prog) {
+		ret = run_lwt_bpf(skb, &bpf->out, dst, NO_REDIRECT);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (unlikely(!dst->lwtstate->orig_output)) {
+		pr_warn_once("orig_output not set on dst for prog %s\n",
+			     bpf->out.name);
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return dst->lwtstate->orig_output(net, sk, skb);
+}
+
+static int xmit_check_hhlen(struct sk_buff *skb)
+{
+	int hh_len = skb_dst(skb)->dev->hard_header_len;
+
+	if (skb_headroom(skb) < hh_len) {
+		int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
+
+		if (pskb_expand_head(skb, nhead, 0, GFP_ATOMIC))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int bpf_xmit(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct bpf_lwt *bpf;
+
+	bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+	if (bpf->xmit.prog) {
+		int ret;
+
+		ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
+		switch (ret) {
+		case BPF_OK:
+			/* If the header was expanded, headroom might be too
+			 * small for L2 header to come, expand as needed.
+			 */
+			ret = xmit_check_hhlen(skb);
+			if (unlikely(ret))
+				return ret;
+
+			return LWTUNNEL_XMIT_CONTINUE;
+		case BPF_REDIRECT:
+			return LWTUNNEL_XMIT_DONE;
+		default:
+			return ret;
+		}
+	}
+
+	return LWTUNNEL_XMIT_CONTINUE;
+}
+
+static void bpf_lwt_prog_destroy(struct bpf_lwt_prog *prog)
+{
+	if (prog->prog)
+		bpf_prog_put(prog->prog);
+
+	kfree(prog->name);
+}
+
+static void bpf_destroy_state(struct lwtunnel_state *lwt)
+{
+	struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt);
+
+	bpf_lwt_prog_destroy(&bpf->in);
+	bpf_lwt_prog_destroy(&bpf->out);
+	bpf_lwt_prog_destroy(&bpf->xmit);
+}
+
+static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = {
+	[LWT_BPF_PROG_FD]   = { .type = NLA_U32, },
+	[LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
+				.len = MAX_PROG_NAME },
+};
+
+static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,
+			  enum bpf_prog_type type)
+{
+	struct nlattr *tb[LWT_BPF_PROG_MAX + 1];
+	struct bpf_prog *p;
+	int ret;
+	u32 fd;
+
+	ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME])
+		return -EINVAL;
+
+	prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL);
+	if (!prog->name)
+		return -ENOMEM;
+
+	fd = nla_get_u32(tb[LWT_BPF_PROG_FD]);
+	p = bpf_prog_get_type(fd, type);
+	if (IS_ERR(p))
+		return PTR_ERR(p);
+
+	prog->prog = p;
+
+	return 0;
+}
+
+static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = {
+	[LWT_BPF_IN]		= { .type = NLA_NESTED, },
+	[LWT_BPF_OUT]		= { .type = NLA_NESTED, },
+	[LWT_BPF_XMIT]		= { .type = NLA_NESTED, },
+	[LWT_BPF_XMIT_HEADROOM]	= { .type = NLA_U32 },
+};
+
+static int bpf_build_state(struct net_device *dev, struct nlattr *nla,
+			   unsigned int family, const void *cfg,
+			   struct lwtunnel_state **ts)
+{
+	struct nlattr *tb[LWT_BPF_MAX + 1];
+	struct lwtunnel_state *newts;
+	struct bpf_lwt *bpf;
+	int ret;
+
+	if (family != AF_INET && family != AF_INET6)
+		return -EAFNOSUPPORT;
+
+	ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[LWT_BPF_IN] && !tb[LWT_BPF_OUT] && !tb[LWT_BPF_XMIT])
+		return -EINVAL;
+
+	newts = lwtunnel_state_alloc(sizeof(*bpf));
+	if (!newts)
+		return -ENOMEM;
+
+	newts->type = LWTUNNEL_ENCAP_BPF;
+	bpf = bpf_lwt_lwtunnel(newts);
+
+	if (tb[LWT_BPF_IN]) {
+		newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
+		ret = bpf_parse_prog(tb[LWT_BPF_IN], &bpf->in,
+				     BPF_PROG_TYPE_LWT_IN);
+		if (ret  < 0)
+			goto errout;
+	}
+
+	if (tb[LWT_BPF_OUT]) {
+		newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+		ret = bpf_parse_prog(tb[LWT_BPF_OUT], &bpf->out,
+				     BPF_PROG_TYPE_LWT_OUT);
+		if (ret < 0)
+			goto errout;
+	}
+
+	if (tb[LWT_BPF_XMIT]) {
+		newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
+		ret = bpf_parse_prog(tb[LWT_BPF_XMIT], &bpf->xmit,
+				     BPF_PROG_TYPE_LWT_XMIT);
+		if (ret < 0)
+			goto errout;
+	}
+
+	if (tb[LWT_BPF_XMIT_HEADROOM]) {
+		u32 headroom = nla_get_u32(tb[LWT_BPF_XMIT_HEADROOM]);
+
+		if (headroom > LWT_BPF_MAX_HEADROOM) {
+			ret = -ERANGE;
+			goto errout;
+		}
+
+		newts->headroom = headroom;
+	}
+
+	bpf->family = family;
+	*ts = newts;
+
+	return 0;
+
+errout:
+	bpf_destroy_state(newts);
+	kfree(newts);
+	return ret;
+}
+
+static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr,
+			     struct bpf_lwt_prog *prog)
+{
+	struct nlattr *nest;
+
+	if (!prog->prog)
+		return 0;
+
+	nest = nla_nest_start(skb, attr);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (prog->name &&
+	    nla_put_string(skb, LWT_BPF_PROG_NAME, prog->name))
+		return -EMSGSIZE;
+
+	return nla_nest_end(skb, nest);
+}
+
+static int bpf_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt)
+{
+	struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt);
+
+	if (bpf_fill_lwt_prog(skb, LWT_BPF_IN, &bpf->in) < 0 ||
+	    bpf_fill_lwt_prog(skb, LWT_BPF_OUT, &bpf->out) < 0 ||
+	    bpf_fill_lwt_prog(skb, LWT_BPF_XMIT, &bpf->xmit) < 0)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+	int nest_len = nla_total_size(sizeof(struct nlattr)) +
+		       nla_total_size(MAX_PROG_NAME) + /* LWT_BPF_PROG_NAME */
+		       0;
+
+	return nest_len + /* LWT_BPF_IN */
+	       nest_len + /* LWT_BPF_OUT */
+	       nest_len + /* LWT_BPF_XMIT */
+	       0;
+}
+
+int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b)
+{
+	/* FIXME:
+	 * The LWT state is currently rebuilt for delete requests which
+	 * results in a new bpf_prog instance. Comparing names for now.
+	 */
+	if (!a->name && !b->name)
+		return 0;
+
+	if (!a->name || !b->name)
+		return 1;
+
+	return strcmp(a->name, b->name);
+}
+
+static int bpf_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+	struct bpf_lwt *a_bpf = bpf_lwt_lwtunnel(a);
+	struct bpf_lwt *b_bpf = bpf_lwt_lwtunnel(b);
+
+	return bpf_lwt_prog_cmp(&a_bpf->in, &b_bpf->in) ||
+	       bpf_lwt_prog_cmp(&a_bpf->out, &b_bpf->out) ||
+	       bpf_lwt_prog_cmp(&a_bpf->xmit, &b_bpf->xmit);
+}
+
+static const struct lwtunnel_encap_ops bpf_encap_ops = {
+	.build_state	= bpf_build_state,
+	.destroy_state	= bpf_destroy_state,
+	.input		= bpf_input,
+	.output		= bpf_output,
+	.xmit		= bpf_xmit,
+	.fill_encap	= bpf_fill_encap_info,
+	.get_encap_size = bpf_encap_nlsize,
+	.cmp_encap	= bpf_encap_cmp,
+};
+
+static int __init bpf_lwt_init(void)
+{
+	return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);
+}
+
+subsys_initcall(bpf_lwt_init)
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 03976e939818..a5d4e866ce88 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -41,6 +41,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
 		return "ILA";
 	case LWTUNNEL_ENCAP_SEG6:
 		return "SEG6";
+	case LWTUNNEL_ENCAP_BPF:
+		return "BPF";
 	case LWTUNNEL_ENCAP_IP6:
 	case LWTUNNEL_ENCAP_IP:
 	case LWTUNNEL_ENCAP_NONE:
-- 
cgit v1.2.3


From 366cbf2f46048d70005c6c33dc289330f24b54b0 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Nov 2016 22:16:06 +0100
Subject: bpf, xdp: drop rcu_read_lock from bpf_prog_run_xdp and move to caller

After 326fe02d1ed6 ("net/mlx4_en: protect ring->xdp_prog with rcu_read_lock"),
the rcu_read_lock() in bpf_prog_run_xdp() is superfluous, since callers
need to hold rcu_read_lock() already to make sure BPF program doesn't
get released in the background.

Thus, drop it from bpf_prog_run_xdp(), as it can otherwise be misleading.
Still keeping the bpf_prog_run_xdp() is useful as it allows for grepping
in XDP supported drivers and to keep the typecheck on the context intact.
For mlx4, this means we don't have a double rcu_read_lock() anymore. nfp can
just make use of bpf_prog_run_xdp(), too. For qede, just move rcu_read_lock()
out of the helper. When the driver gets atomic replace support, this will
move to call-sites eventually.

mlx5 needs actual fixing as it has the same issue as described already in
326fe02d1ed6 ("net/mlx4_en: protect ring->xdp_prog with rcu_read_lock"),
that is, we're under RCU bh at this time, BPF programs are released via
call_rcu(), and call_rcu() != call_rcu_bh(), so we need to properly mark
read side as programs can get xchg()'ed in mlx5e_xdp_set() without queue
reset.

Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c     |  8 ++++++--
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c |  2 +-
 drivers/net/ethernet/qlogic/qede/qede_main.c        |  7 +++++++
 include/linux/filter.h                              | 18 +++++++++---------
 4 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index b036710ba52c..42cd687e6608 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -737,10 +737,10 @@ static inline
 struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 			     u16 wqe_counter, u32 cqe_bcnt)
 {
-	struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog);
 	struct mlx5e_dma_info *di;
 	struct sk_buff *skb;
 	void *va, *data;
+	bool consumed;
 
 	di             = &rq->dma_info[wqe_counter];
 	va             = page_address(di->page);
@@ -759,7 +759,11 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 		return NULL;
 	}
 
-	if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt))
+	rcu_read_lock();
+	consumed = mlx5e_xdp_handle(rq, READ_ONCE(rq->xdp_prog), di, data,
+				    cqe_bcnt);
+	rcu_read_unlock();
+	if (consumed)
 		return NULL; /* page/packet was consumed by XDP */
 
 	skb = build_skb(va, RQ_PAGE_SIZE(rq));
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 876ab3a92ad5..00d9a03be31d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1518,7 +1518,7 @@ static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len)
 	xdp.data = data;
 	xdp.data_end = data + len;
 
-	return BPF_PROG_RUN(prog, &xdp);
+	return bpf_prog_run_xdp(prog, &xdp);
 }
 
 /**
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 172ff6da92ad..faeaa9f3b197 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1497,7 +1497,14 @@ static bool qede_rx_xdp(struct qede_dev *edev,
 
 	xdp.data = page_address(bd->data) + cqe->placement_offset;
 	xdp.data_end = xdp.data + len;
+
+	/* Queues always have a full reset currently, so for the time
+	 * being until there's atomic program replace just mark read
+	 * side for map helpers.
+	 */
+	rcu_read_lock();
 	act = bpf_prog_run_xdp(prog, &xdp);
+	rcu_read_unlock();
 
 	if (act == XDP_PASS)
 		return true;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7ba644626553..97338134398f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -498,16 +498,16 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
 	return BPF_PROG_RUN(prog, skb);
 }
 
-static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
-				   struct xdp_buff *xdp)
+static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
+					    struct xdp_buff *xdp)
 {
-	u32 ret;
-
-	rcu_read_lock();
-	ret = BPF_PROG_RUN(prog, xdp);
-	rcu_read_unlock();
-
-	return ret;
+	/* Caller needs to hold rcu_read_lock() (!), otherwise program
+	 * can be released while still running, or map elements could be
+	 * freed early while still having concurrent users. XDP fastpath
+	 * already takes rcu_read_lock() when fetching the program, so
+	 * it's not necessary here anymore.
+	 */
+	return BPF_PROG_RUN(prog, xdp);
 }
 
 static inline unsigned int bpf_prog_size(unsigned int proglen)
-- 
cgit v1.2.3


From f8c3cf9d7d7f04718e0d51c28f8430afa6058b3b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 20 Oct 2016 10:12:45 -0400
Subject: NFSv4: Add a generic structure for managing layout-private
 information

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/nfs_xdr.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bfbd0cace91b..331a3200eb01 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -216,6 +216,20 @@ struct nfs4_get_lease_time_res {
 	struct nfs_fsinfo	       *lr_fsinfo;
 };
 
+struct xdr_stream;
+struct nfs4_xdr_opaque_data;
+
+struct nfs4_xdr_opaque_ops {
+	void (*encode)(struct xdr_stream *, const void *args,
+			const struct nfs4_xdr_opaque_data *);
+	void (*free)(struct nfs4_xdr_opaque_data *);
+};
+
+struct nfs4_xdr_opaque_data {
+	const struct nfs4_xdr_opaque_ops *ops;
+	void *data;
+};
+
 #define PNFS_LAYOUT_MAXSIZE 4096
 
 struct nfs4_layoutdriver_data {
-- 
cgit v1.2.3


From fc831825f99eb3a2f1bf3fe7307b392513b642a5 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuval.mintz@cavium.com>
Date: Thu, 1 Dec 2016 00:21:06 -0800
Subject: qed: Add support for hardware offloaded iSCSI.

This adds the backbone required for the various HW initalizations
which are necessary for the iSCSI driver (qedi) for QLogic FastLinQ
4xxxx line of adapters - FW notification, resource initializations, etc.

Signed-off-by: Arun Easi <arun.easi@cavium.com>
Signed-off-by: Yuval Mintz <yuval.mintz@cavium.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/Kconfig            |    3 +
 drivers/net/ethernet/qlogic/qed/Makefile       |    1 +
 drivers/net/ethernet/qlogic/qed/qed.h          |    7 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c      |   12 +
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c    | 1277 ++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_iscsi.h    |   52 +
 drivers/net/ethernet/qlogic/qed/qed_ll2.c      |    4 +-
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |    2 +
 drivers/net/ethernet/qlogic/qed/qed_spq.c      |   15 +
 include/linux/qed/qed_if.h                     |    2 +
 include/linux/qed/qed_iscsi_if.h               |  229 +++++
 11 files changed, 1602 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_iscsi.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_iscsi.h
 create mode 100644 include/linux/qed/qed_iscsi_if.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 32f2a45f4ab2..3cfd10503446 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -110,4 +110,7 @@ config QEDE
 config QED_RDMA
 	bool
 
+config QED_ISCSI
+	bool
+
 endif # NET_VENDOR_QLOGIC
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 967acf322c09..597e15c54a11 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -6,3 +6,4 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
 qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
 qed-$(CONFIG_QED_LL2) += qed_ll2.o
 qed-$(CONFIG_QED_RDMA) += qed_roce.o
+qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 244dd40ccac3..1f423b38faab 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -35,6 +35,7 @@ extern const struct qed_common_ops qed_common_ops_pass;
 
 #define QED_WFQ_UNIT	100
 
+#define ISCSI_BDQ_ID(_port_id) (_port_id)
 #define QED_WID_SIZE            (1024)
 #define QED_PF_DEMS_SIZE        (4)
 
@@ -383,6 +384,7 @@ struct qed_hwfn {
 	bool				using_ll2;
 	struct qed_ll2_info		*p_ll2_info;
 	struct qed_rdma_info		*p_rdma_info;
+	struct qed_iscsi_info		*p_iscsi_info;
 	struct qed_pf_params		pf_params;
 
 	bool b_rdma_enabled_in_prs;
@@ -581,6 +583,8 @@ struct qed_dev {
 	/* Linux specific here */
 	struct  qede_dev		*edev;
 	struct  pci_dev			*pdev;
+	u32 flags;
+#define QED_FLAG_STORAGE_STARTED	(BIT(0))
 	int				msg_enable;
 
 	struct pci_params		pci_params;
@@ -594,6 +598,7 @@ struct qed_dev {
 	union {
 		struct qed_common_cb_ops	*common;
 		struct qed_eth_cb_ops		*eth;
+		struct qed_iscsi_cb_ops		*iscsi;
 	} protocol_ops;
 	void				*ops_cookie;
 
@@ -603,7 +608,7 @@ struct qed_dev {
 	struct qed_cb_ll2_info		*ll2;
 	u8				ll2_mac_address[ETH_ALEN];
 #endif
-
+	DECLARE_HASHTABLE(connections, 10);
 	const struct firmware		*firmware;
 
 	u32 rdma_max_sge;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 00b9a67ba359..cd9810296630 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -29,6 +29,7 @@
 #include "qed_hw.h"
 #include "qed_init_ops.h"
 #include "qed_int.h"
+#include "qed_iscsi.h"
 #include "qed_ll2.h"
 #include "qed_mcp.h"
 #include "qed_reg_addr.h"
@@ -146,6 +147,8 @@ void qed_resc_free(struct qed_dev *cdev)
 #ifdef CONFIG_QED_LL2
 		qed_ll2_free(p_hwfn, p_hwfn->p_ll2_info);
 #endif
+		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+			qed_iscsi_free(p_hwfn, p_hwfn->p_iscsi_info);
 		qed_iov_free(p_hwfn);
 		qed_dmae_info_free(p_hwfn);
 		qed_dcbx_info_free(p_hwfn, p_hwfn->p_dcbx_info);
@@ -402,6 +405,7 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 int qed_resc_alloc(struct qed_dev *cdev)
 {
+	struct qed_iscsi_info *p_iscsi_info;
 #ifdef CONFIG_QED_LL2
 	struct qed_ll2_info *p_ll2_info;
 #endif
@@ -507,6 +511,12 @@ int qed_resc_alloc(struct qed_dev *cdev)
 			p_hwfn->p_ll2_info = p_ll2_info;
 		}
 #endif
+		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+			p_iscsi_info = qed_iscsi_alloc(p_hwfn);
+			if (!p_iscsi_info)
+				goto alloc_no_mem;
+			p_hwfn->p_iscsi_info = p_iscsi_info;
+		}
 
 		/* DMA info initialization */
 		rc = qed_dmae_info_alloc(p_hwfn);
@@ -560,6 +570,8 @@ void qed_resc_setup(struct qed_dev *cdev)
 		if (p_hwfn->using_ll2)
 			qed_ll2_setup(p_hwfn, p_hwfn->p_ll2_info);
 #endif
+		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+			qed_iscsi_setup(p_hwfn, p_hwfn->p_iscsi_info);
 	}
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
new file mode 100644
index 000000000000..00efb1c4c57e
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -0,0 +1,1277 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/param.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/qed/qed_iscsi_if.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_dev_api.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_int.h"
+#include "qed_iscsi.h"
+#include "qed_ll2.h"
+#include "qed_mcp.h"
+#include "qed_sp.h"
+#include "qed_sriov.h"
+#include "qed_reg_addr.h"
+
+struct qed_iscsi_conn {
+	struct list_head list_entry;
+	bool free_on_delete;
+
+	u16 conn_id;
+	u32 icid;
+	u32 fw_cid;
+
+	u8 layer_code;
+	u8 offl_flags;
+	u8 connect_mode;
+	u32 initial_ack;
+	dma_addr_t sq_pbl_addr;
+	struct qed_chain r2tq;
+	struct qed_chain xhq;
+	struct qed_chain uhq;
+
+	struct tcp_upload_params *tcp_upload_params_virt_addr;
+	dma_addr_t tcp_upload_params_phys_addr;
+	struct scsi_terminate_extra_params *queue_cnts_virt_addr;
+	dma_addr_t queue_cnts_phys_addr;
+	dma_addr_t syn_phy_addr;
+
+	u16 syn_ip_payload_length;
+	u8 local_mac[6];
+	u8 remote_mac[6];
+	u16 vlan_id;
+	u8 tcp_flags;
+	u8 ip_version;
+	u32 remote_ip[4];
+	u32 local_ip[4];
+	u8 ka_max_probe_cnt;
+	u8 dup_ack_theshold;
+	u32 rcv_next;
+	u32 snd_una;
+	u32 snd_next;
+	u32 snd_max;
+	u32 snd_wnd;
+	u32 rcv_wnd;
+	u32 snd_wl1;
+	u32 cwnd;
+	u32 ss_thresh;
+	u16 srtt;
+	u16 rtt_var;
+	u32 ts_time;
+	u32 ts_recent;
+	u32 ts_recent_age;
+	u32 total_rt;
+	u32 ka_timeout_delta;
+	u32 rt_timeout_delta;
+	u8 dup_ack_cnt;
+	u8 snd_wnd_probe_cnt;
+	u8 ka_probe_cnt;
+	u8 rt_cnt;
+	u32 flow_label;
+	u32 ka_timeout;
+	u32 ka_interval;
+	u32 max_rt_time;
+	u32 initial_rcv_wnd;
+	u8 ttl;
+	u8 tos_or_tc;
+	u16 remote_port;
+	u16 local_port;
+	u16 mss;
+	u8 snd_wnd_scale;
+	u8 rcv_wnd_scale;
+	u32 ts_ticks_per_second;
+	u16 da_timeout_value;
+	u8 ack_frequency;
+
+	u8 update_flag;
+	u8 default_cq;
+	u32 max_seq_size;
+	u32 max_recv_pdu_length;
+	u32 max_send_pdu_length;
+	u32 first_seq_length;
+	u32 exp_stat_sn;
+	u32 stat_sn;
+	u16 physical_q0;
+	u16 physical_q1;
+	u8 abortive_dsconnect;
+};
+
+static int
+qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
+			enum spq_mode comp_mode,
+			struct qed_spq_comp_cb *p_comp_addr,
+			void *event_context, iscsi_event_cb_t async_event_cb)
+{
+	struct iscsi_init_ramrod_params *p_ramrod = NULL;
+	struct scsi_init_func_queues *p_queue = NULL;
+	struct qed_iscsi_pf_params *p_params = NULL;
+	struct iscsi_spe_func_init *p_init = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = 0;
+	u32 dval;
+	u16 val;
+	u8 i;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qed_spq_get_cid(p_hwfn);
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_INIT_FUNC,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_init;
+	p_init = &p_ramrod->iscsi_init_spe;
+	p_params = &p_hwfn->pf_params.iscsi_pf_params;
+	p_queue = &p_init->q_params;
+
+	SET_FIELD(p_init->hdr.flags,
+		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, ISCSI_SLOW_PATH_LAYER_CODE);
+	p_init->hdr.op_code = ISCSI_RAMROD_CMD_ID_INIT_FUNC;
+
+	val = p_params->half_way_close_timeout;
+	p_init->half_way_close_timeout = cpu_to_le16(val);
+	p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
+	p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
+	p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+	p_init->func_params.log_page_size = p_params->log_page_size;
+	val = p_params->num_tasks;
+	p_init->func_params.num_tasks = cpu_to_le16(val);
+	p_init->debug_mode.flags = p_params->debug_mode;
+
+	DMA_REGPAIR_LE(p_queue->glbl_q_params_addr,
+		       p_params->glbl_q_params_addr);
+
+	val = p_params->cq_num_entries;
+	p_queue->cq_num_entries = cpu_to_le16(val);
+	val = p_params->cmdq_num_entries;
+	p_queue->cmdq_num_entries = cpu_to_le16(val);
+	p_queue->num_queues = p_params->num_queues;
+	dval = (u8)p_hwfn->hw_info.resc_start[QED_CMDQS_CQS];
+	p_queue->queue_relative_offset = (u8)dval;
+	p_queue->cq_sb_pi = p_params->gl_rq_pi;
+	p_queue->cmdq_sb_pi = p_params->gl_cmd_pi;
+
+	for (i = 0; i < p_params->num_queues; i++) {
+		val = p_hwfn->sbs_info[i]->igu_sb_id;
+		p_queue->cq_cmdq_sb_num_arr[i] = cpu_to_le16(val);
+	}
+
+	p_queue->bdq_resource_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+
+	DMA_REGPAIR_LE(p_queue->bdq_pbl_base_address[BDQ_ID_RQ],
+		       p_params->bdq_pbl_base_addr[BDQ_ID_RQ]);
+	p_queue->bdq_pbl_num_entries[BDQ_ID_RQ] =
+	    p_params->bdq_pbl_num_entries[BDQ_ID_RQ];
+	val = p_params->bdq_xoff_threshold[BDQ_ID_RQ];
+	p_queue->bdq_xoff_threshold[BDQ_ID_RQ] = cpu_to_le16(val);
+	val = p_params->bdq_xon_threshold[BDQ_ID_RQ];
+	p_queue->bdq_xon_threshold[BDQ_ID_RQ] = cpu_to_le16(val);
+
+	DMA_REGPAIR_LE(p_queue->bdq_pbl_base_address[BDQ_ID_IMM_DATA],
+		       p_params->bdq_pbl_base_addr[BDQ_ID_IMM_DATA]);
+	p_queue->bdq_pbl_num_entries[BDQ_ID_IMM_DATA] =
+	    p_params->bdq_pbl_num_entries[BDQ_ID_IMM_DATA];
+	val = p_params->bdq_xoff_threshold[BDQ_ID_IMM_DATA];
+	p_queue->bdq_xoff_threshold[BDQ_ID_IMM_DATA] = cpu_to_le16(val);
+	val = p_params->bdq_xon_threshold[BDQ_ID_IMM_DATA];
+	p_queue->bdq_xon_threshold[BDQ_ID_IMM_DATA] = cpu_to_le16(val);
+	val = p_params->rq_buffer_size;
+	p_queue->rq_buffer_size = cpu_to_le16(val);
+	if (p_params->is_target) {
+		SET_FIELD(p_queue->q_validity,
+			  SCSI_INIT_FUNC_QUEUES_RQ_VALID, 1);
+		if (p_queue->bdq_pbl_num_entries[BDQ_ID_IMM_DATA])
+			SET_FIELD(p_queue->q_validity,
+				  SCSI_INIT_FUNC_QUEUES_IMM_DATA_VALID, 1);
+		SET_FIELD(p_queue->q_validity,
+			  SCSI_INIT_FUNC_QUEUES_CMD_VALID, 1);
+	} else {
+		SET_FIELD(p_queue->q_validity,
+			  SCSI_INIT_FUNC_QUEUES_RQ_VALID, 1);
+	}
+	p_ramrod->tcp_init.two_msl_timer = cpu_to_le32(p_params->two_msl_timer);
+	val = p_params->tx_sws_timer;
+	p_ramrod->tcp_init.tx_sws_timer = cpu_to_le16(val);
+	p_ramrod->tcp_init.maxfinrt = p_params->max_fin_rt;
+
+	p_hwfn->p_iscsi_info->event_context = event_context;
+	p_hwfn->p_iscsi_info->event_cb = async_event_cb;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_offload(struct qed_hwfn *p_hwfn,
+				     struct qed_iscsi_conn *p_conn,
+				     enum spq_mode comp_mode,
+				     struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_spe_conn_offload *p_ramrod = NULL;
+	struct tcp_offload_params_opt2 *p_tcp2 = NULL;
+	struct tcp_offload_params *p_tcp = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	union qed_qm_pq_params pq_params;
+	u16 pq0_id = 0, pq1_id = 0;
+	dma_addr_t r2tq_pbl_addr;
+	dma_addr_t xhq_pbl_addr;
+	dma_addr_t uhq_pbl_addr;
+	int rc = 0;
+	u32 dval;
+	u16 wval;
+	u8 i;
+	u16 *p;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_conn_offload;
+
+	/* Transmission PQ is the first of the PF */
+	memset(&pq_params, 0, sizeof(pq_params));
+	pq0_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ISCSI, &pq_params);
+	p_conn->physical_q0 = cpu_to_le16(pq0_id);
+	p_ramrod->iscsi.physical_q0 = cpu_to_le16(pq0_id);
+
+	/* iSCSI Pure-ACK PQ */
+	pq_params.iscsi.q_idx = 1;
+	pq1_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ISCSI, &pq_params);
+	p_conn->physical_q1 = cpu_to_le16(pq1_id);
+	p_ramrod->iscsi.physical_q1 = cpu_to_le16(pq1_id);
+
+	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN;
+	SET_FIELD(p_ramrod->hdr.flags, ISCSI_SLOW_PATH_HDR_LAYER_CODE,
+		  p_conn->layer_code);
+
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+
+	DMA_REGPAIR_LE(p_ramrod->iscsi.sq_pbl_addr, p_conn->sq_pbl_addr);
+
+	r2tq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->r2tq);
+	DMA_REGPAIR_LE(p_ramrod->iscsi.r2tq_pbl_addr, r2tq_pbl_addr);
+
+	xhq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->xhq);
+	DMA_REGPAIR_LE(p_ramrod->iscsi.xhq_pbl_addr, xhq_pbl_addr);
+
+	uhq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->uhq);
+	DMA_REGPAIR_LE(p_ramrod->iscsi.uhq_pbl_addr, uhq_pbl_addr);
+
+	p_ramrod->iscsi.initial_ack = cpu_to_le32(p_conn->initial_ack);
+	p_ramrod->iscsi.flags = p_conn->offl_flags;
+	p_ramrod->iscsi.default_cq = p_conn->default_cq;
+	p_ramrod->iscsi.stat_sn = cpu_to_le32(p_conn->stat_sn);
+
+	if (!GET_FIELD(p_ramrod->iscsi.flags,
+		       ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B)) {
+		p_tcp = &p_ramrod->tcp;
+
+		p = (u16 *)p_conn->local_mac;
+		p_tcp->local_mac_addr_hi = swab16(get_unaligned(p));
+		p_tcp->local_mac_addr_mid = swab16(get_unaligned(p + 1));
+		p_tcp->local_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+		p = (u16 *)p_conn->remote_mac;
+		p_tcp->remote_mac_addr_hi = swab16(get_unaligned(p));
+		p_tcp->remote_mac_addr_mid = swab16(get_unaligned(p + 1));
+		p_tcp->remote_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+		p_tcp->vlan_id = cpu_to_le16(p_conn->vlan_id);
+
+		p_tcp->flags = p_conn->tcp_flags;
+		p_tcp->ip_version = p_conn->ip_version;
+		for (i = 0; i < 4; i++) {
+			dval = p_conn->remote_ip[i];
+			p_tcp->remote_ip[i] = cpu_to_le32(dval);
+			dval = p_conn->local_ip[i];
+			p_tcp->local_ip[i] = cpu_to_le32(dval);
+		}
+		p_tcp->ka_max_probe_cnt = p_conn->ka_max_probe_cnt;
+		p_tcp->dup_ack_theshold = p_conn->dup_ack_theshold;
+
+		p_tcp->rcv_next = cpu_to_le32(p_conn->rcv_next);
+		p_tcp->snd_una = cpu_to_le32(p_conn->snd_una);
+		p_tcp->snd_next = cpu_to_le32(p_conn->snd_next);
+		p_tcp->snd_max = cpu_to_le32(p_conn->snd_max);
+		p_tcp->snd_wnd = cpu_to_le32(p_conn->snd_wnd);
+		p_tcp->rcv_wnd = cpu_to_le32(p_conn->rcv_wnd);
+		p_tcp->snd_wl1 = cpu_to_le32(p_conn->snd_wl1);
+		p_tcp->cwnd = cpu_to_le32(p_conn->cwnd);
+		p_tcp->ss_thresh = cpu_to_le32(p_conn->ss_thresh);
+		p_tcp->srtt = cpu_to_le16(p_conn->srtt);
+		p_tcp->rtt_var = cpu_to_le16(p_conn->rtt_var);
+		p_tcp->ts_time = cpu_to_le32(p_conn->ts_time);
+		p_tcp->ts_recent = cpu_to_le32(p_conn->ts_recent);
+		p_tcp->ts_recent_age = cpu_to_le32(p_conn->ts_recent_age);
+		p_tcp->total_rt = cpu_to_le32(p_conn->total_rt);
+		dval = p_conn->ka_timeout_delta;
+		p_tcp->ka_timeout_delta = cpu_to_le32(dval);
+		dval = p_conn->rt_timeout_delta;
+		p_tcp->rt_timeout_delta = cpu_to_le32(dval);
+		p_tcp->dup_ack_cnt = p_conn->dup_ack_cnt;
+		p_tcp->snd_wnd_probe_cnt = p_conn->snd_wnd_probe_cnt;
+		p_tcp->ka_probe_cnt = p_conn->ka_probe_cnt;
+		p_tcp->rt_cnt = p_conn->rt_cnt;
+		p_tcp->flow_label = cpu_to_le32(p_conn->flow_label);
+		p_tcp->ka_timeout = cpu_to_le32(p_conn->ka_timeout);
+		p_tcp->ka_interval = cpu_to_le32(p_conn->ka_interval);
+		p_tcp->max_rt_time = cpu_to_le32(p_conn->max_rt_time);
+		dval = p_conn->initial_rcv_wnd;
+		p_tcp->initial_rcv_wnd = cpu_to_le32(dval);
+		p_tcp->ttl = p_conn->ttl;
+		p_tcp->tos_or_tc = p_conn->tos_or_tc;
+		p_tcp->remote_port = cpu_to_le16(p_conn->remote_port);
+		p_tcp->local_port = cpu_to_le16(p_conn->local_port);
+		p_tcp->mss = cpu_to_le16(p_conn->mss);
+		p_tcp->snd_wnd_scale = p_conn->snd_wnd_scale;
+		p_tcp->rcv_wnd_scale = p_conn->rcv_wnd_scale;
+		dval = p_conn->ts_ticks_per_second;
+		p_tcp->ts_ticks_per_second = cpu_to_le32(dval);
+		wval = p_conn->da_timeout_value;
+		p_tcp->da_timeout_value = cpu_to_le16(wval);
+		p_tcp->ack_frequency = p_conn->ack_frequency;
+		p_tcp->connect_mode = p_conn->connect_mode;
+	} else {
+		p_tcp2 =
+		    &((struct iscsi_spe_conn_offload_option2 *)p_ramrod)->tcp;
+
+		p = (u16 *)p_conn->local_mac;
+		p_tcp2->local_mac_addr_hi = swab16(get_unaligned(p));
+		p_tcp2->local_mac_addr_mid = swab16(get_unaligned(p + 1));
+		p_tcp2->local_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+		p = (u16 *)p_conn->remote_mac;
+		p_tcp2->remote_mac_addr_hi = swab16(get_unaligned(p));
+		p_tcp2->remote_mac_addr_mid = swab16(get_unaligned(p + 1));
+		p_tcp2->remote_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+		p_tcp2->vlan_id = cpu_to_le16(p_conn->vlan_id);
+		p_tcp2->flags = p_conn->tcp_flags;
+
+		p_tcp2->ip_version = p_conn->ip_version;
+		for (i = 0; i < 4; i++) {
+			dval = p_conn->remote_ip[i];
+			p_tcp2->remote_ip[i] = cpu_to_le32(dval);
+			dval = p_conn->local_ip[i];
+			p_tcp2->local_ip[i] = cpu_to_le32(dval);
+		}
+
+		p_tcp2->flow_label = cpu_to_le32(p_conn->flow_label);
+		p_tcp2->ttl = p_conn->ttl;
+		p_tcp2->tos_or_tc = p_conn->tos_or_tc;
+		p_tcp2->remote_port = cpu_to_le16(p_conn->remote_port);
+		p_tcp2->local_port = cpu_to_le16(p_conn->local_port);
+		p_tcp2->mss = cpu_to_le16(p_conn->mss);
+		p_tcp2->rcv_wnd_scale = p_conn->rcv_wnd_scale;
+		p_tcp2->connect_mode = p_conn->connect_mode;
+		wval = p_conn->syn_ip_payload_length;
+		p_tcp2->syn_ip_payload_length = cpu_to_le16(wval);
+		p_tcp2->syn_phy_addr_lo = DMA_LO_LE(p_conn->syn_phy_addr);
+		p_tcp2->syn_phy_addr_hi = DMA_HI_LE(p_conn->syn_phy_addr);
+	}
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_update(struct qed_hwfn *p_hwfn,
+				    struct qed_iscsi_conn *p_conn,
+				    enum spq_mode comp_mode,
+				    struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_conn_update_ramrod_params *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+	u32 dval;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_UPDATE_CONN,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_conn_update;
+	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_UPDATE_CONN;
+	SET_FIELD(p_ramrod->hdr.flags,
+		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+	p_ramrod->flags = p_conn->update_flag;
+	p_ramrod->max_seq_size = cpu_to_le32(p_conn->max_seq_size);
+	dval = p_conn->max_recv_pdu_length;
+	p_ramrod->max_recv_pdu_length = cpu_to_le32(dval);
+	dval = p_conn->max_send_pdu_length;
+	p_ramrod->max_send_pdu_length = cpu_to_le32(dval);
+	dval = p_conn->first_seq_length;
+	p_ramrod->first_seq_length = cpu_to_le32(dval);
+	p_ramrod->exp_stat_sn = cpu_to_le32(p_conn->exp_stat_sn);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_terminate(struct qed_hwfn *p_hwfn,
+				       struct qed_iscsi_conn *p_conn,
+				       enum spq_mode comp_mode,
+				       struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_spe_conn_termination *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_TERMINATION_CONN,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_conn_terminate;
+	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_TERMINATION_CONN;
+	SET_FIELD(p_ramrod->hdr.flags,
+		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+	p_ramrod->abortive = p_conn->abortive_dsconnect;
+
+	DMA_REGPAIR_LE(p_ramrod->query_params_addr,
+		       p_conn->tcp_upload_params_phys_addr);
+	DMA_REGPAIR_LE(p_ramrod->queue_cnts_addr, p_conn->queue_cnts_phys_addr);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_clear_sq(struct qed_hwfn *p_hwfn,
+				      struct qed_iscsi_conn *p_conn,
+				      enum spq_mode comp_mode,
+				      struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_slow_path_hdr *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_CLEAR_SQ,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_empty;
+	p_ramrod->op_code = ISCSI_RAMROD_CMD_ID_CLEAR_SQ;
+	SET_FIELD(p_ramrod->flags,
+		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_func_stop(struct qed_hwfn *p_hwfn,
+				  enum spq_mode comp_mode,
+				  struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_spe_func_dstry *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = 0;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qed_spq_get_cid(p_hwfn);
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_DESTROY_FUNC,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_destroy;
+	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_DESTROY_FUNC;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static void __iomem *qed_iscsi_get_db_addr(struct qed_hwfn *p_hwfn, u32 cid)
+{
+	return (u8 __iomem *)p_hwfn->doorbells +
+			     qed_db_addr(cid, DQ_DEMS_LEGACY);
+}
+
+static void __iomem *qed_iscsi_get_primary_bdq_prod(struct qed_hwfn *p_hwfn,
+						    u8 bdq_id)
+{
+	u8 bdq_function_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+
+	return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_MSDM_RAM +
+			     MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id,
+							     bdq_id);
+}
+
+static void __iomem *qed_iscsi_get_secondary_bdq_prod(struct qed_hwfn *p_hwfn,
+						      u8 bdq_id)
+{
+	u8 bdq_function_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+
+	return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_TSDM_RAM +
+			     TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id,
+							     bdq_id);
+}
+
+static int qed_iscsi_setup_connection(struct qed_hwfn *p_hwfn,
+				      struct qed_iscsi_conn *p_conn)
+{
+	if (!p_conn->queue_cnts_virt_addr)
+		goto nomem;
+	memset(p_conn->queue_cnts_virt_addr, 0,
+	       sizeof(*p_conn->queue_cnts_virt_addr));
+
+	if (!p_conn->tcp_upload_params_virt_addr)
+		goto nomem;
+	memset(p_conn->tcp_upload_params_virt_addr, 0,
+	       sizeof(*p_conn->tcp_upload_params_virt_addr));
+
+	if (!p_conn->r2tq.p_virt_addr)
+		goto nomem;
+	qed_chain_pbl_zero_mem(&p_conn->r2tq);
+
+	if (!p_conn->uhq.p_virt_addr)
+		goto nomem;
+	qed_chain_pbl_zero_mem(&p_conn->uhq);
+
+	if (!p_conn->xhq.p_virt_addr)
+		goto nomem;
+	qed_chain_pbl_zero_mem(&p_conn->xhq);
+
+	return 0;
+nomem:
+	return -ENOMEM;
+}
+
+static int qed_iscsi_allocate_connection(struct qed_hwfn *p_hwfn,
+					 struct qed_iscsi_conn **p_out_conn)
+{
+	u16 uhq_num_elements = 0, xhq_num_elements = 0, r2tq_num_elements = 0;
+	struct scsi_terminate_extra_params *p_q_cnts = NULL;
+	struct qed_iscsi_pf_params *p_params = NULL;
+	struct tcp_upload_params *p_tcp = NULL;
+	struct qed_iscsi_conn *p_conn = NULL;
+	int rc = 0;
+
+	/* Try finding a free connection that can be used */
+	spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+	if (!list_empty(&p_hwfn->p_iscsi_info->free_list))
+		p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list,
+					  struct qed_iscsi_conn, list_entry);
+	if (p_conn) {
+		list_del(&p_conn->list_entry);
+		spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+		*p_out_conn = p_conn;
+		return 0;
+	}
+	spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+
+	/* Need to allocate a new connection */
+	p_params = &p_hwfn->pf_params.iscsi_pf_params;
+
+	p_conn = kzalloc(sizeof(*p_conn), GFP_KERNEL);
+	if (!p_conn)
+		return -ENOMEM;
+
+	p_q_cnts = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				      sizeof(*p_q_cnts),
+				      &p_conn->queue_cnts_phys_addr,
+				      GFP_KERNEL);
+	if (!p_q_cnts)
+		goto nomem_queue_cnts_param;
+	p_conn->queue_cnts_virt_addr = p_q_cnts;
+
+	p_tcp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				   sizeof(*p_tcp),
+				   &p_conn->tcp_upload_params_phys_addr,
+				   GFP_KERNEL);
+	if (!p_tcp)
+		goto nomem_upload_param;
+	p_conn->tcp_upload_params_virt_addr = p_tcp;
+
+	r2tq_num_elements = p_params->num_r2tq_pages_in_ring *
+			    QED_CHAIN_PAGE_SIZE / 0x80;
+	rc = qed_chain_alloc(p_hwfn->cdev,
+			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+			     QED_CHAIN_MODE_PBL,
+			     QED_CHAIN_CNT_TYPE_U16,
+			     r2tq_num_elements, 0x80, &p_conn->r2tq);
+	if (rc)
+		goto nomem_r2tq;
+
+	uhq_num_elements = p_params->num_uhq_pages_in_ring *
+			   QED_CHAIN_PAGE_SIZE / sizeof(struct iscsi_uhqe);
+	rc = qed_chain_alloc(p_hwfn->cdev,
+			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+			     QED_CHAIN_MODE_PBL,
+			     QED_CHAIN_CNT_TYPE_U16,
+			     uhq_num_elements,
+			     sizeof(struct iscsi_uhqe), &p_conn->uhq);
+	if (rc)
+		goto nomem_uhq;
+
+	xhq_num_elements = uhq_num_elements;
+	rc = qed_chain_alloc(p_hwfn->cdev,
+			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+			     QED_CHAIN_MODE_PBL,
+			     QED_CHAIN_CNT_TYPE_U16,
+			     xhq_num_elements,
+			     sizeof(struct iscsi_xhqe), &p_conn->xhq);
+	if (rc)
+		goto nomem;
+
+	p_conn->free_on_delete = true;
+	*p_out_conn = p_conn;
+	return 0;
+
+nomem:
+	qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+nomem_uhq:
+	qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+nomem_r2tq:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct tcp_upload_params),
+			  p_conn->tcp_upload_params_virt_addr,
+			  p_conn->tcp_upload_params_phys_addr);
+nomem_upload_param:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct scsi_terminate_extra_params),
+			  p_conn->queue_cnts_virt_addr,
+			  p_conn->queue_cnts_phys_addr);
+nomem_queue_cnts_param:
+	kfree(p_conn);
+
+	return -ENOMEM;
+}
+
+static int qed_iscsi_acquire_connection(struct qed_hwfn *p_hwfn,
+					struct qed_iscsi_conn *p_in_conn,
+					struct qed_iscsi_conn **p_out_conn)
+{
+	struct qed_iscsi_conn *p_conn = NULL;
+	int rc = 0;
+	u32 icid;
+
+	spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ISCSI, &icid);
+	spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+	if (rc)
+		return rc;
+
+	/* Use input connection or allocate a new one */
+	if (p_in_conn)
+		p_conn = p_in_conn;
+	else
+		rc = qed_iscsi_allocate_connection(p_hwfn, &p_conn);
+
+	if (!rc)
+		rc = qed_iscsi_setup_connection(p_hwfn, p_conn);
+
+	if (rc) {
+		spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+		qed_cxt_release_cid(p_hwfn, icid);
+		spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+		return rc;
+	}
+
+	p_conn->icid = icid;
+	p_conn->conn_id = (u16)icid;
+	p_conn->fw_cid = (p_hwfn->hw_info.opaque_fid << 16) | icid;
+
+	*p_out_conn = p_conn;
+
+	return rc;
+}
+
+static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn,
+					 struct qed_iscsi_conn *p_conn)
+{
+	spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+	list_add_tail(&p_conn->list_entry, &p_hwfn->p_iscsi_info->free_list);
+	qed_cxt_release_cid(p_hwfn, p_conn->icid);
+	spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+}
+
+struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_iscsi_info *p_iscsi_info;
+
+	p_iscsi_info = kzalloc(sizeof(*p_iscsi_info), GFP_KERNEL);
+	if (!p_iscsi_info)
+		return NULL;
+
+	INIT_LIST_HEAD(&p_iscsi_info->free_list);
+	return p_iscsi_info;
+}
+
+void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
+		     struct qed_iscsi_info *p_iscsi_info)
+{
+	spin_lock_init(&p_iscsi_info->lock);
+}
+
+void qed_iscsi_free(struct qed_hwfn *p_hwfn,
+		    struct qed_iscsi_info *p_iscsi_info)
+{
+	kfree(p_iscsi_info);
+}
+
+static void _qed_iscsi_get_tstats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct tstorm_iscsi_stats_drv tstats;
+	u32 tstats_addr;
+
+	memset(&tstats, 0, sizeof(tstats));
+	tstats_addr = BAR0_MAP_REG_TSDM_RAM +
+		      TSTORM_ISCSI_RX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats));
+
+	p_stats->iscsi_rx_bytes_cnt =
+	    HILO_64_REGPAIR(tstats.iscsi_rx_bytes_cnt);
+	p_stats->iscsi_rx_packet_cnt =
+	    HILO_64_REGPAIR(tstats.iscsi_rx_packet_cnt);
+	p_stats->iscsi_cmdq_threshold_cnt =
+	    le32_to_cpu(tstats.iscsi_cmdq_threshold_cnt);
+	p_stats->iscsi_rq_threshold_cnt =
+	    le32_to_cpu(tstats.iscsi_rq_threshold_cnt);
+	p_stats->iscsi_immq_threshold_cnt =
+	    le32_to_cpu(tstats.iscsi_immq_threshold_cnt);
+}
+
+static void _qed_iscsi_get_mstats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct mstorm_iscsi_stats_drv mstats;
+	u32 mstats_addr;
+
+	memset(&mstats, 0, sizeof(mstats));
+	mstats_addr = BAR0_MAP_REG_MSDM_RAM +
+		      MSTORM_ISCSI_RX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &mstats, mstats_addr, sizeof(mstats));
+
+	p_stats->iscsi_rx_dropped_pdus_task_not_valid =
+	    HILO_64_REGPAIR(mstats.iscsi_rx_dropped_pdus_task_not_valid);
+}
+
+static void _qed_iscsi_get_ustats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct ustorm_iscsi_stats_drv ustats;
+	u32 ustats_addr;
+
+	memset(&ustats, 0, sizeof(ustats));
+	ustats_addr = BAR0_MAP_REG_USDM_RAM +
+		      USTORM_ISCSI_RX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats));
+
+	p_stats->iscsi_rx_data_pdu_cnt =
+	    HILO_64_REGPAIR(ustats.iscsi_rx_data_pdu_cnt);
+	p_stats->iscsi_rx_r2t_pdu_cnt =
+	    HILO_64_REGPAIR(ustats.iscsi_rx_r2t_pdu_cnt);
+	p_stats->iscsi_rx_total_pdu_cnt =
+	    HILO_64_REGPAIR(ustats.iscsi_rx_total_pdu_cnt);
+}
+
+static void _qed_iscsi_get_xstats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct xstorm_iscsi_stats_drv xstats;
+	u32 xstats_addr;
+
+	memset(&xstats, 0, sizeof(xstats));
+	xstats_addr = BAR0_MAP_REG_XSDM_RAM +
+		      XSTORM_ISCSI_TX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &xstats, xstats_addr, sizeof(xstats));
+
+	p_stats->iscsi_tx_go_to_slow_start_event_cnt =
+	    HILO_64_REGPAIR(xstats.iscsi_tx_go_to_slow_start_event_cnt);
+	p_stats->iscsi_tx_fast_retransmit_event_cnt =
+	    HILO_64_REGPAIR(xstats.iscsi_tx_fast_retransmit_event_cnt);
+}
+
+static void _qed_iscsi_get_ystats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct ystorm_iscsi_stats_drv ystats;
+	u32 ystats_addr;
+
+	memset(&ystats, 0, sizeof(ystats));
+	ystats_addr = BAR0_MAP_REG_YSDM_RAM +
+		      YSTORM_ISCSI_TX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &ystats, ystats_addr, sizeof(ystats));
+
+	p_stats->iscsi_tx_data_pdu_cnt =
+	    HILO_64_REGPAIR(ystats.iscsi_tx_data_pdu_cnt);
+	p_stats->iscsi_tx_r2t_pdu_cnt =
+	    HILO_64_REGPAIR(ystats.iscsi_tx_r2t_pdu_cnt);
+	p_stats->iscsi_tx_total_pdu_cnt =
+	    HILO_64_REGPAIR(ystats.iscsi_tx_total_pdu_cnt);
+}
+
+static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct pstorm_iscsi_stats_drv pstats;
+	u32 pstats_addr;
+
+	memset(&pstats, 0, sizeof(pstats));
+	pstats_addr = BAR0_MAP_REG_PSDM_RAM +
+		      PSTORM_ISCSI_TX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats));
+
+	p_stats->iscsi_tx_bytes_cnt =
+	    HILO_64_REGPAIR(pstats.iscsi_tx_bytes_cnt);
+	p_stats->iscsi_tx_packet_cnt =
+	    HILO_64_REGPAIR(pstats.iscsi_tx_packet_cnt);
+}
+
+static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
+			       struct qed_iscsi_stats *stats)
+{
+	struct qed_ptt *p_ptt;
+
+	memset(stats, 0, sizeof(*stats));
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt) {
+		DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+		return -EAGAIN;
+	}
+
+	_qed_iscsi_get_tstats(p_hwfn, p_ptt, stats);
+	_qed_iscsi_get_mstats(p_hwfn, p_ptt, stats);
+	_qed_iscsi_get_ustats(p_hwfn, p_ptt, stats);
+
+	_qed_iscsi_get_xstats(p_hwfn, p_ptt, stats);
+	_qed_iscsi_get_ystats(p_hwfn, p_ptt, stats);
+	_qed_iscsi_get_pstats(p_hwfn, p_ptt, stats);
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return 0;
+}
+
+struct qed_hash_iscsi_con {
+	struct hlist_node node;
+	struct qed_iscsi_conn *con;
+};
+
+static int qed_fill_iscsi_dev_info(struct qed_dev *cdev,
+				   struct qed_dev_iscsi_info *info)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+
+	int rc;
+
+	memset(info, 0, sizeof(*info));
+	rc = qed_fill_dev_info(cdev, &info->common);
+
+	info->primary_dbq_rq_addr =
+	    qed_iscsi_get_primary_bdq_prod(hwfn, BDQ_ID_RQ);
+	info->secondary_bdq_rq_addr =
+	    qed_iscsi_get_secondary_bdq_prod(hwfn, BDQ_ID_RQ);
+
+	return rc;
+}
+
+static void qed_register_iscsi_ops(struct qed_dev *cdev,
+				   struct qed_iscsi_cb_ops *ops, void *cookie)
+{
+	cdev->protocol_ops.iscsi = ops;
+	cdev->ops_cookie = cookie;
+}
+
+static struct qed_hash_iscsi_con *qed_iscsi_get_hash(struct qed_dev *cdev,
+						     u32 handle)
+{
+	struct qed_hash_iscsi_con *hash_con = NULL;
+
+	if (!(cdev->flags & QED_FLAG_STORAGE_STARTED))
+		return NULL;
+
+	hash_for_each_possible(cdev->connections, hash_con, node, handle) {
+		if (hash_con->con->icid == handle)
+			break;
+	}
+
+	if (!hash_con || (hash_con->con->icid != handle))
+		return NULL;
+
+	return hash_con;
+}
+
+static int qed_iscsi_stop(struct qed_dev *cdev)
+{
+	int rc;
+
+	if (!(cdev->flags & QED_FLAG_STORAGE_STARTED)) {
+		DP_NOTICE(cdev, "iscsi already stopped\n");
+		return 0;
+	}
+
+	if (!hash_empty(cdev->connections)) {
+		DP_NOTICE(cdev,
+			  "Can't stop iscsi - not all connections were returned\n");
+		return -EINVAL;
+	}
+
+	/* Stop the iscsi */
+	rc = qed_sp_iscsi_func_stop(QED_LEADING_HWFN(cdev),
+				    QED_SPQ_MODE_EBLOCK, NULL);
+	cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
+
+	return rc;
+}
+
+static int qed_iscsi_start(struct qed_dev *cdev,
+			   struct qed_iscsi_tid *tasks,
+			   void *event_context,
+			   iscsi_event_cb_t async_event_cb)
+{
+	int rc;
+	struct qed_tid_mem *tid_info;
+
+	if (cdev->flags & QED_FLAG_STORAGE_STARTED) {
+		DP_NOTICE(cdev, "iscsi already started;\n");
+		return 0;
+	}
+
+	rc = qed_sp_iscsi_func_start(QED_LEADING_HWFN(cdev),
+				     QED_SPQ_MODE_EBLOCK, NULL, event_context,
+				     async_event_cb);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to start iscsi\n");
+		return rc;
+	}
+
+	cdev->flags |= QED_FLAG_STORAGE_STARTED;
+	hash_init(cdev->connections);
+
+	if (!tasks)
+		return 0;
+
+	tid_info = kzalloc(sizeof(*tid_info), GFP_KERNEL);
+
+	if (!tid_info) {
+		qed_iscsi_stop(cdev);
+		return -ENOMEM;
+	}
+
+	rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev),
+				      tid_info);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to gather task information\n");
+		qed_iscsi_stop(cdev);
+		kfree(tid_info);
+		return rc;
+	}
+
+	/* Fill task information */
+	tasks->size = tid_info->tid_size;
+	tasks->num_tids_per_block = tid_info->num_tids_per_block;
+	memcpy(tasks->blocks, tid_info->blocks,
+	       MAX_TID_BLOCKS_ISCSI * sizeof(u8 *));
+
+	kfree(tid_info);
+
+	return 0;
+}
+
+static int qed_iscsi_acquire_conn(struct qed_dev *cdev,
+				  u32 *handle,
+				  u32 *fw_cid, void __iomem **p_doorbell)
+{
+	struct qed_hash_iscsi_con *hash_con;
+	int rc;
+
+	/* Allocate a hashed connection */
+	hash_con = kzalloc(sizeof(*hash_con), GFP_ATOMIC);
+	if (!hash_con)
+		return -ENOMEM;
+
+	/* Acquire the connection */
+	rc = qed_iscsi_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+					  &hash_con->con);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to acquire Connection\n");
+		kfree(hash_con);
+		return rc;
+	}
+
+	/* Added the connection to hash table */
+	*handle = hash_con->con->icid;
+	*fw_cid = hash_con->con->fw_cid;
+	hash_add(cdev->connections, &hash_con->node, *handle);
+
+	if (p_doorbell)
+		*p_doorbell = qed_iscsi_get_db_addr(QED_LEADING_HWFN(cdev),
+						    *handle);
+
+	return 0;
+}
+
+static int qed_iscsi_release_conn(struct qed_dev *cdev, u32 handle)
+{
+	struct qed_hash_iscsi_con *hash_con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	hlist_del(&hash_con->node);
+	qed_iscsi_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+	kfree(hash_con);
+
+	return 0;
+}
+
+static int qed_iscsi_offload_conn(struct qed_dev *cdev,
+				  u32 handle,
+				  struct qed_iscsi_params_offload *conn_info)
+{
+	struct qed_hash_iscsi_con *hash_con;
+	struct qed_iscsi_conn *con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	/* Update the connection with information from the params */
+	con = hash_con->con;
+
+	ether_addr_copy(con->local_mac, conn_info->src.mac);
+	ether_addr_copy(con->remote_mac, conn_info->dst.mac);
+	memcpy(con->local_ip, conn_info->src.ip, sizeof(con->local_ip));
+	memcpy(con->remote_ip, conn_info->dst.ip, sizeof(con->remote_ip));
+	con->local_port = conn_info->src.port;
+	con->remote_port = conn_info->dst.port;
+
+	con->layer_code = conn_info->layer_code;
+	con->sq_pbl_addr = conn_info->sq_pbl_addr;
+	con->initial_ack = conn_info->initial_ack;
+	con->vlan_id = conn_info->vlan_id;
+	con->tcp_flags = conn_info->tcp_flags;
+	con->ip_version = conn_info->ip_version;
+	con->default_cq = conn_info->default_cq;
+	con->ka_max_probe_cnt = conn_info->ka_max_probe_cnt;
+	con->dup_ack_theshold = conn_info->dup_ack_theshold;
+	con->rcv_next = conn_info->rcv_next;
+	con->snd_una = conn_info->snd_una;
+	con->snd_next = conn_info->snd_next;
+	con->snd_max = conn_info->snd_max;
+	con->snd_wnd = conn_info->snd_wnd;
+	con->rcv_wnd = conn_info->rcv_wnd;
+	con->snd_wl1 = conn_info->snd_wl1;
+	con->cwnd = conn_info->cwnd;
+	con->ss_thresh = conn_info->ss_thresh;
+	con->srtt = conn_info->srtt;
+	con->rtt_var = conn_info->rtt_var;
+	con->ts_time = conn_info->ts_time;
+	con->ts_recent = conn_info->ts_recent;
+	con->ts_recent_age = conn_info->ts_recent_age;
+	con->total_rt = conn_info->total_rt;
+	con->ka_timeout_delta = conn_info->ka_timeout_delta;
+	con->rt_timeout_delta = conn_info->rt_timeout_delta;
+	con->dup_ack_cnt = conn_info->dup_ack_cnt;
+	con->snd_wnd_probe_cnt = conn_info->snd_wnd_probe_cnt;
+	con->ka_probe_cnt = conn_info->ka_probe_cnt;
+	con->rt_cnt = conn_info->rt_cnt;
+	con->flow_label = conn_info->flow_label;
+	con->ka_timeout = conn_info->ka_timeout;
+	con->ka_interval = conn_info->ka_interval;
+	con->max_rt_time = conn_info->max_rt_time;
+	con->initial_rcv_wnd = conn_info->initial_rcv_wnd;
+	con->ttl = conn_info->ttl;
+	con->tos_or_tc = conn_info->tos_or_tc;
+	con->remote_port = conn_info->remote_port;
+	con->local_port = conn_info->local_port;
+	con->mss = conn_info->mss;
+	con->snd_wnd_scale = conn_info->snd_wnd_scale;
+	con->rcv_wnd_scale = conn_info->rcv_wnd_scale;
+	con->ts_ticks_per_second = conn_info->ts_ticks_per_second;
+	con->da_timeout_value = conn_info->da_timeout_value;
+	con->ack_frequency = conn_info->ack_frequency;
+
+	/* Set default values on other connection fields */
+	con->offl_flags = 0x1;
+
+	return qed_sp_iscsi_conn_offload(QED_LEADING_HWFN(cdev), con,
+					 QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_update_conn(struct qed_dev *cdev,
+				 u32 handle,
+				 struct qed_iscsi_params_update *conn_info)
+{
+	struct qed_hash_iscsi_con *hash_con;
+	struct qed_iscsi_conn *con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	/* Update the connection with information from the params */
+	con = hash_con->con;
+	con->update_flag = conn_info->update_flag;
+	con->max_seq_size = conn_info->max_seq_size;
+	con->max_recv_pdu_length = conn_info->max_recv_pdu_length;
+	con->max_send_pdu_length = conn_info->max_send_pdu_length;
+	con->first_seq_length = conn_info->first_seq_length;
+	con->exp_stat_sn = conn_info->exp_stat_sn;
+
+	return qed_sp_iscsi_conn_update(QED_LEADING_HWFN(cdev), con,
+					QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_clear_conn_sq(struct qed_dev *cdev, u32 handle)
+{
+	struct qed_hash_iscsi_con *hash_con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	return qed_sp_iscsi_conn_clear_sq(QED_LEADING_HWFN(cdev),
+					  hash_con->con,
+					  QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
+				  u32 handle, u8 abrt_conn)
+{
+	struct qed_hash_iscsi_con *hash_con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	hash_con->con->abortive_dsconnect = abrt_conn;
+
+	return qed_sp_iscsi_conn_terminate(QED_LEADING_HWFN(cdev),
+					   hash_con->con,
+					   QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
+{
+	return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
+}
+
+static const struct qed_iscsi_ops qed_iscsi_ops_pass = {
+	.common = &qed_common_ops_pass,
+	.ll2 = &qed_ll2_ops_pass,
+	.fill_dev_info = &qed_fill_iscsi_dev_info,
+	.register_ops = &qed_register_iscsi_ops,
+	.start = &qed_iscsi_start,
+	.stop = &qed_iscsi_stop,
+	.acquire_conn = &qed_iscsi_acquire_conn,
+	.release_conn = &qed_iscsi_release_conn,
+	.offload_conn = &qed_iscsi_offload_conn,
+	.update_conn = &qed_iscsi_update_conn,
+	.destroy_conn = &qed_iscsi_destroy_conn,
+	.clear_sq = &qed_iscsi_clear_conn_sq,
+	.get_stats = &qed_iscsi_stats,
+};
+
+const struct qed_iscsi_ops *qed_get_iscsi_ops()
+{
+	return &qed_iscsi_ops_pass;
+}
+EXPORT_SYMBOL(qed_get_iscsi_ops);
+
+void qed_put_iscsi_ops(void)
+{
+}
+EXPORT_SYMBOL(qed_put_iscsi_ops);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
new file mode 100644
index 000000000000..67c25f3db4d5
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
@@ -0,0 +1,52 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_ISCSI_H
+#define _QED_ISCSI_H
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/qed/tcp_common.h>
+#include <linux/qed/qed_iscsi_if.h>
+#include <linux/qed/qed_chain.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_mcp.h"
+#include "qed_sp.h"
+
+struct qed_iscsi_info {
+	spinlock_t lock; /* Connection resources. */
+	struct list_head free_list;
+	u16 max_num_outstanding_tasks;
+	void *event_context;
+	iscsi_event_cb_t event_cb;
+};
+
+#ifdef CONFIG_QED_LL2
+extern const struct qed_ll2_ops qed_ll2_ops_pass;
+#endif
+
+#if IS_ENABLED(CONFIG_QED_ISCSI)
+struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn);
+
+void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
+		     struct qed_iscsi_info *p_iscsi_info);
+
+void qed_iscsi_free(struct qed_hwfn *p_hwfn,
+		    struct qed_iscsi_info *p_iscsi_info);
+#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
+static inline struct qed_iscsi_info *qed_iscsi_alloc(
+		struct qed_hwfn *p_hwfn) { return NULL; }
+static inline void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
+				   struct qed_iscsi_info *p_iscsi_info) {}
+static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn,
+				  struct qed_iscsi_info *p_iscsi_info) {}
+#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index f95385cbbd40..84d7e9146b00 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -1517,6 +1517,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	enum qed_ll2_conn_type conn_type;
 	struct qed_ptt *p_ptt;
 	int rc, i;
+	u8 gsi_enable = 1;
 
 	/* Initialize LL2 locks & lists */
 	INIT_LIST_HEAD(&cdev->ll2->list);
@@ -1548,6 +1549,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	switch (QED_LEADING_HWFN(cdev)->hw_info.personality) {
 	case QED_PCI_ISCSI:
 		conn_type = QED_LL2_TYPE_ISCSI;
+		gsi_enable = 0;
 		break;
 	case QED_PCI_ETH_ROCE:
 		conn_type = QED_LL2_TYPE_ROCE;
@@ -1564,7 +1566,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping;
 	ll2_info.tx_tc = 0;
 	ll2_info.tx_dest = CORE_TX_DEST_NW;
-	ll2_info.gsi_enable = 1;
+	ll2_info.gsi_enable = gsi_enable;
 
 	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info,
 					QED_LL2_RX_SIZE, QED_LL2_TX_SIZE,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index b414a0542177..97544205a8c1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -82,6 +82,8 @@
 	0x1c80000UL
 #define BAR0_MAP_REG_XSDM_RAM \
 	0x1e00000UL
+#define BAR0_MAP_REG_YSDM_RAM \
+	0x1e80000UL
 #define  NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF \
 	0x5011f4UL
 #define  PRS_REG_SEARCH_TCP \
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 019960b7855a..56d2f64a3655 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -24,6 +24,7 @@
 #include "qed_hsi.h"
 #include "qed_hw.h"
 #include "qed_int.h"
+#include "qed_iscsi.h"
 #include "qed_mcp.h"
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
@@ -277,6 +278,20 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn,
 		return qed_sriov_eqe_event(p_hwfn,
 					   p_eqe->opcode,
 					   p_eqe->echo, &p_eqe->data);
+	case PROTOCOLID_ISCSI:
+		if (!IS_ENABLED(CONFIG_QED_ISCSI))
+			return -EINVAL;
+
+		if (p_hwfn->p_iscsi_info->event_cb) {
+			struct qed_iscsi_info *p_iscsi = p_hwfn->p_iscsi_info;
+
+			return p_iscsi->event_cb(p_iscsi->event_context,
+						 p_eqe->opcode, &p_eqe->data);
+		} else {
+			DP_NOTICE(p_hwfn,
+				  "iSCSI async completion is not set\n");
+			return -EINVAL;
+		}
 	default:
 		DP_NOTICE(p_hwfn,
 			  "Unknown Async completion for protocol: %d\n",
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index ea095b4893aa..4b454f4f5b25 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -166,6 +166,7 @@ struct qed_iscsi_pf_params {
 	u32 max_cwnd;
 	u16 cq_num_entries;
 	u16 cmdq_num_entries;
+	u32 two_msl_timer;
 	u16 dup_ack_threshold;
 	u16 tx_sws_timer;
 	u16 min_rto;
@@ -275,6 +276,7 @@ struct qed_dev_info {
 enum qed_sb_type {
 	QED_SB_TYPE_L2_QUEUE,
 	QED_SB_TYPE_CNQ,
+	QED_SB_TYPE_STORAGE,
 };
 
 enum qed_protocol {
diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h
new file mode 100644
index 000000000000..d27912480cb3
--- /dev/null
+++ b/include/linux/qed/qed_iscsi_if.h
@@ -0,0 +1,229 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_ISCSI_IF_H
+#define _QED_ISCSI_IF_H
+#include <linux/types.h>
+#include <linux/qed/qed_if.h>
+
+typedef int (*iscsi_event_cb_t) (void *context,
+				 u8 fw_event_code, void *fw_handle);
+struct qed_iscsi_stats {
+	u64 iscsi_rx_bytes_cnt;
+	u64 iscsi_rx_packet_cnt;
+	u64 iscsi_rx_new_ooo_isle_events_cnt;
+	u32 iscsi_cmdq_threshold_cnt;
+	u32 iscsi_rq_threshold_cnt;
+	u32 iscsi_immq_threshold_cnt;
+
+	u64 iscsi_rx_dropped_pdus_task_not_valid;
+
+	u64 iscsi_rx_data_pdu_cnt;
+	u64 iscsi_rx_r2t_pdu_cnt;
+	u64 iscsi_rx_total_pdu_cnt;
+
+	u64 iscsi_tx_go_to_slow_start_event_cnt;
+	u64 iscsi_tx_fast_retransmit_event_cnt;
+
+	u64 iscsi_tx_data_pdu_cnt;
+	u64 iscsi_tx_r2t_pdu_cnt;
+	u64 iscsi_tx_total_pdu_cnt;
+
+	u64 iscsi_tx_bytes_cnt;
+	u64 iscsi_tx_packet_cnt;
+};
+
+struct qed_dev_iscsi_info {
+	struct qed_dev_info common;
+
+	void __iomem *primary_dbq_rq_addr;
+	void __iomem *secondary_bdq_rq_addr;
+};
+
+struct qed_iscsi_id_params {
+	u8 mac[ETH_ALEN];
+	u32 ip[4];
+	u16 port;
+};
+
+struct qed_iscsi_params_offload {
+	u8 layer_code;
+	dma_addr_t sq_pbl_addr;
+	u32 initial_ack;
+
+	struct qed_iscsi_id_params src;
+	struct qed_iscsi_id_params dst;
+	u16 vlan_id;
+	u8 tcp_flags;
+	u8 ip_version;
+	u8 default_cq;
+
+	u8 ka_max_probe_cnt;
+	u8 dup_ack_theshold;
+	u32 rcv_next;
+	u32 snd_una;
+	u32 snd_next;
+	u32 snd_max;
+	u32 snd_wnd;
+	u32 rcv_wnd;
+	u32 snd_wl1;
+	u32 cwnd;
+	u32 ss_thresh;
+	u16 srtt;
+	u16 rtt_var;
+	u32 ts_time;
+	u32 ts_recent;
+	u32 ts_recent_age;
+	u32 total_rt;
+	u32 ka_timeout_delta;
+	u32 rt_timeout_delta;
+	u8 dup_ack_cnt;
+	u8 snd_wnd_probe_cnt;
+	u8 ka_probe_cnt;
+	u8 rt_cnt;
+	u32 flow_label;
+	u32 ka_timeout;
+	u32 ka_interval;
+	u32 max_rt_time;
+	u32 initial_rcv_wnd;
+	u8 ttl;
+	u8 tos_or_tc;
+	u16 remote_port;
+	u16 local_port;
+	u16 mss;
+	u8 snd_wnd_scale;
+	u8 rcv_wnd_scale;
+	u32 ts_ticks_per_second;
+	u16 da_timeout_value;
+	u8 ack_frequency;
+};
+
+struct qed_iscsi_params_update {
+	u8 update_flag;
+#define QED_ISCSI_CONN_HD_EN            BIT(0)
+#define QED_ISCSI_CONN_DD_EN            BIT(1)
+#define QED_ISCSI_CONN_INITIAL_R2T      BIT(2)
+#define QED_ISCSI_CONN_IMMEDIATE_DATA   BIT(3)
+
+	u32 max_seq_size;
+	u32 max_recv_pdu_length;
+	u32 max_send_pdu_length;
+	u32 first_seq_length;
+	u32 exp_stat_sn;
+};
+
+#define MAX_TID_BLOCKS_ISCSI (512)
+struct qed_iscsi_tid {
+	u32 size;		/* In bytes per task */
+	u32 num_tids_per_block;
+	u8 *blocks[MAX_TID_BLOCKS_ISCSI];
+};
+
+struct qed_iscsi_cb_ops {
+	struct qed_common_cb_ops common;
+};
+
+/**
+ * struct qed_iscsi_ops - qed iSCSI operations.
+ * @common:		common operations pointer
+ * @ll2:		light L2 operations pointer
+ * @fill_dev_info:	fills iSCSI specific information
+ *			@param cdev
+ *			@param info
+ *			@return 0 on sucesss, otherwise error value.
+ * @register_ops:	register iscsi operations
+ *			@param cdev
+ *			@param ops - specified using qed_iscsi_cb_ops
+ *			@param cookie - driver private
+ * @start:		iscsi in FW
+ *			@param cdev
+ *			@param tasks - qed will fill information about tasks
+ *			return 0 on success, otherwise error value.
+ * @stop:		iscsi in FW
+ *			@param cdev
+ *			return 0 on success, otherwise error value.
+ * @acquire_conn:	acquire a new iscsi connection
+ *			@param cdev
+ *			@param handle - qed will fill handle that should be
+ *				used henceforth as identifier of the
+ *				connection.
+ *			@param p_doorbell - qed will fill the address of the
+ *				doorbell.
+ *			@return 0 on sucesss, otherwise error value.
+ * @release_conn:	release a previously acquired iscsi connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@return 0 on success, otherwise error value.
+ * @offload_conn:	configures an offloaded connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@param conn_info - the configuration to use for the
+ *				offload.
+ *			@return 0 on success, otherwise error value.
+ * @update_conn:	updates an offloaded connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@param conn_info - the configuration to use for the
+ *				offload.
+ *			@return 0 on success, otherwise error value.
+ * @destroy_conn:	stops an offloaded connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@return 0 on success, otherwise error value.
+ * @clear_sq:		clear all task in sq
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@return 0 on success, otherwise error value.
+ * @get_stats:		iSCSI related statistics
+ *			@param cdev
+ *			@param stats - pointer to struck that would be filled
+ *				we stats
+ *			@return 0 on success, error otherwise.
+ */
+struct qed_iscsi_ops {
+	const struct qed_common_ops *common;
+
+	const struct qed_ll2_ops *ll2;
+
+	int (*fill_dev_info)(struct qed_dev *cdev,
+			     struct qed_dev_iscsi_info *info);
+
+	void (*register_ops)(struct qed_dev *cdev,
+			     struct qed_iscsi_cb_ops *ops, void *cookie);
+
+	int (*start)(struct qed_dev *cdev,
+		     struct qed_iscsi_tid *tasks,
+		     void *event_context, iscsi_event_cb_t async_event_cb);
+
+	int (*stop)(struct qed_dev *cdev);
+
+	int (*acquire_conn)(struct qed_dev *cdev,
+			    u32 *handle,
+			    u32 *fw_cid, void __iomem **p_doorbell);
+
+	int (*release_conn)(struct qed_dev *cdev, u32 handle);
+
+	int (*offload_conn)(struct qed_dev *cdev,
+			    u32 handle,
+			    struct qed_iscsi_params_offload *conn_info);
+
+	int (*update_conn)(struct qed_dev *cdev,
+			   u32 handle,
+			   struct qed_iscsi_params_update *conn_info);
+
+	int (*destroy_conn)(struct qed_dev *cdev, u32 handle, u8 abrt_conn);
+
+	int (*clear_sq)(struct qed_dev *cdev, u32 handle);
+
+	int (*get_stats)(struct qed_dev *cdev,
+			 struct qed_iscsi_stats *stats);
+};
+
+const struct qed_iscsi_ops *qed_get_iscsi_ops(void);
+void qed_put_iscsi_ops(void);
+#endif
-- 
cgit v1.2.3


From 95a22caee396cef0bb2ca8fafdd82966a49367bb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Dec 2016 11:32:06 +0100
Subject: tcp: randomize tcp timestamp offsets for each connection

jiffies based timestamps allow for easy inference of number of devices
behind NAT translators and also makes tracking of hosts simpler.

commit ceaa1fef65a7c2e ("tcp: adding a per-socket timestamp offset")
added the main infrastructure that is needed for per-connection ts
randomization, in particular writing/reading the on-wire tcp header
format takes the offset into account so rest of stack can use normal
tcp_time_stamp (jiffies).

So only two items are left:
 - add a tsoffset for request sockets
 - extend the tcp isn generator to also return another 32bit number
   in addition to the ISN.

Re-use of ISN generator also means timestamps are still monotonically
increasing for same connection quadruple, i.e. PAWS will still work.

Includes fixes from Eric Dumazet.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  1 +
 include/net/secure_seq.h |  8 ++++----
 include/net/tcp.h        |  2 +-
 net/core/secure_seq.c    | 10 ++++++----
 net/ipv4/syncookies.c    |  1 +
 net/ipv4/tcp_input.c     |  7 ++++++-
 net/ipv4/tcp_ipv4.c      |  9 +++++----
 net/ipv4/tcp_minisocks.c |  4 +++-
 net/ipv4/tcp_output.c    |  2 +-
 net/ipv6/syncookies.c    |  1 +
 net/ipv6/tcp_ipv6.c      | 10 ++++++----
 11 files changed, 35 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 00e0ee8f001f..734bab4c3bef 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -123,6 +123,7 @@ struct tcp_request_sock {
 	u32				txhash;
 	u32				rcv_isn;
 	u32				snt_isn;
+	u32				ts_off;
 	u32				last_oow_ack_time; /* last SYNACK */
 	u32				rcv_nxt; /* the ack # by SYNACK. For
 						  * FastOpen it's the seq#
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index 3f36d45b714a..0caee631a836 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -6,10 +6,10 @@
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport);
-__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
-				 __be16 sport, __be16 dport);
-__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
-				   __be16 sport, __be16 dport);
+u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
+			       __be16 sport, __be16 dport, u32 *tsoff);
+u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
+				 __be16 sport, __be16 dport, u32 *tsoff);
 u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 				__be16 sport, __be16 dport);
 u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3e097e39d4d2..207147b4c6b2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1827,7 +1827,7 @@ struct tcp_request_sock_ops {
 	struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
 				       const struct request_sock *req,
 				       bool *strict);
-	__u32 (*init_seq)(const struct sk_buff *skb);
+	__u32 (*init_seq)(const struct sk_buff *skb, u32 *tsoff);
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,
 			   struct tcp_fastopen_cookie *foc,
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index fd3ce461fbe6..a8d6062cbb4a 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -40,8 +40,8 @@ static u32 seq_scale(u32 seq)
 #endif
 
 #if IS_ENABLED(CONFIG_IPV6)
-__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
-				   __be16 sport, __be16 dport)
+u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
+				 __be16 sport, __be16 dport, u32 *tsoff)
 {
 	u32 secret[MD5_MESSAGE_BYTES / 4];
 	u32 hash[MD5_DIGEST_WORDS];
@@ -58,6 +58,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 
 	md5_transform(hash, secret);
 
+	*tsoff = hash[1];
 	return seq_scale(hash[0]);
 }
 EXPORT_SYMBOL(secure_tcpv6_sequence_number);
@@ -86,8 +87,8 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 
 #ifdef CONFIG_INET
 
-__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
-				 __be16 sport, __be16 dport)
+u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
+			       __be16 sport, __be16 dport, u32 *tsoff)
 {
 	u32 hash[MD5_DIGEST_WORDS];
 
@@ -99,6 +100,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 
 	md5_transform(hash, net_secret);
 
+	*tsoff = hash[1];
 	return seq_scale(hash[0]);
 }
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 0dc6286272aa..3e88467d70ee 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	treq = tcp_rsk(req);
 	treq->rcv_isn		= ntohl(th->seq) - 1;
 	treq->snt_isn		= cookie;
+	treq->ts_off		= 0;
 	req->mss		= mss;
 	ireq->ir_num		= ntohs(th->dest);
 	ireq->ir_rmt_port	= th->source;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 56fe736fd64d..2257de244622 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6307,6 +6307,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		goto drop;
 
 	tcp_rsk(req)->af_specific = af_ops;
+	tcp_rsk(req)->ts_off = 0;
 
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = af_ops->mss_clamp;
@@ -6328,6 +6329,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
+	if (isn && tmp_opt.tstamp_ok)
+		af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+
 	if (!want_cookie && !isn) {
 		/* VJ's idea. We save last timestamp seen
 		 * from the destination in peer table, when entering
@@ -6368,7 +6372,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 			goto drop_and_release;
 		}
 
-		isn = af_ops->init_seq(skb);
+		isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
 	}
 	if (!dst) {
 		dst = af_ops->route_req(sk, &fl, req, NULL);
@@ -6380,6 +6384,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 	if (want_cookie) {
 		isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
+		tcp_rsk(req)->ts_off = 0;
 		req->cookie_ts = tmp_opt.tstamp_ok;
 		if (!tmp_opt.tstamp_ok)
 			inet_rsk(req)->ecn_ok = 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5555eb86e549..b50f05905ced 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -95,12 +95,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
-static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
+static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
 {
 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 					  ip_hdr(skb)->saddr,
 					  tcp_hdr(skb)->dest,
-					  tcp_hdr(skb)->source);
+					  tcp_hdr(skb)->source, tsoff);
 }
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -237,7 +237,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 							   inet->inet_daddr,
 							   inet->inet_sport,
-							   usin->sin_port);
+							   usin->sin_port,
+							   &tp->tsoffset);
 
 	inet->inet_id = tp->write_seq ^ jiffies;
 
@@ -824,7 +825,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 	tcp_v4_send_ack(sk, skb, seq,
 			tcp_rsk(req)->rcv_nxt,
 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
-			tcp_time_stamp,
+			tcp_time_stamp + tcp_rsk(req)->ts_off,
 			req->ts_recent,
 			0,
 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6234ebaa7db1..28ce5ee831f5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -532,7 +532,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 			newtp->rx_opt.ts_recent_stamp = 0;
 			newtp->tcp_header_len = sizeof(struct tcphdr);
 		}
-		newtp->tsoffset = 0;
+		newtp->tsoffset = treq->ts_off;
 #ifdef CONFIG_TCP_MD5SIG
 		newtp->md5sig_info = NULL;	/*XXX*/
 		if (newtp->af_specific->md5_lookup(sk, newsk))
@@ -581,6 +581,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
+			if (tmp_opt.rcv_tsecr)
+				tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
 			/* We do not store true stamp, but it is not required,
 			 * it can be estimated (approximately)
 			 * from another data.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d3545d0cff75..c7adcb57654e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -640,7 +640,7 @@ static unsigned int tcp_synack_options(struct request_sock *req,
 	}
 	if (likely(ireq->tstamp_ok)) {
 		opts->options |= OPTION_TS;
-		opts->tsval = tcp_skb_timestamp(skb);
+		opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
 		opts->tsecr = req->ts_recent;
 		remaining -= TCPOLEN_TSTAMP_ALIGNED;
 	}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 97830a6a9cbb..a4d49760bf43 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -209,6 +209,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	treq->snt_synack.v64	= 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
+	treq->ts_off = 0;
 
 	/*
 	 * We need to lookup the dst_entry to get the correct window size.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 28ec0a2e7b72..a2185a214abc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
-static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
+static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff)
 {
 	return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
 					    ipv6_hdr(skb)->saddr.s6_addr32,
 					    tcp_hdr(skb)->dest,
-					    tcp_hdr(skb)->source);
+					    tcp_hdr(skb)->source, tsoff);
 }
 
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -283,7 +283,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 							     sk->sk_v6_daddr.s6_addr32,
 							     inet->inet_sport,
-							     inet->inet_dport);
+							     inet->inet_dport,
+							     &tp->tsoffset);
 
 	err = tcp_connect(sk);
 	if (err)
@@ -956,7 +957,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
 			tcp_rsk(req)->rcv_nxt,
 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
-			tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
+			tcp_time_stamp + tcp_rsk(req)->ts_off,
+			req->ts_recent, sk->sk_bound_dev_if,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
 			0, 0);
 }
-- 
cgit v1.2.3


From 7091d8c7055d7310339435ae3af2fb490a92524d Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Thu, 1 Dec 2016 14:06:37 +0200
Subject: net/sched: cls_flower: Add offload support using egress Hardware
 device

In order to support hardware offloading when the device given by the tc
rule is different from the Hardware underline device, extract the mirred
(egress) device from the tc action when a filter is added, using the new
tc_action_ops, get_dev().

Flower caches the information about the mirred device and use it for
calling ndo_setup_tc in filter change, update stats and delete.

Calling ndo_setup_tc of the mirred (egress) device instead of the
ingress device will allow a resolution between the software ingress
device and the underline hardware device.

The resolution will take place inside the offloading driver using
'egress_device' flag added to tc_to_netdev struct which is provided to
the offloading driver.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/pkt_cls.h     |  2 ++
 net/sched/cls_api.c       | 24 ++++++++++++++++++++++++
 net/sched/cls_flower.c    | 41 ++++++++++++++++++++++++-----------------
 4 files changed, 51 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3755317cc6a9..1ff5ea6e1221 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -802,6 +802,7 @@ struct tc_to_netdev {
 		struct tc_cls_matchall_offload *cls_mall;
 		struct tc_cls_bpf_offload *cls_bpf;
 	};
+	bool egress_dev;
 };
 
 /* These structures hold the attributes of xdp state that are being passed
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 45ad9aab9bba..f0a051480c6c 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -171,6 +171,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 		     struct tcf_exts *src);
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
+int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
+		     struct net_device **hw_dev);
 
 /**
  * struct tcf_pkt_info - packet information
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b05d4a2155b0..3fbba79a4ef0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -682,6 +682,30 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
 }
 EXPORT_SYMBOL(tcf_exts_dump_stats);
 
+int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
+		     struct net_device **hw_dev)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	const struct tc_action *a;
+	LIST_HEAD(actions);
+
+	if (tc_no_actions(exts))
+		return -EINVAL;
+
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		if (a->ops->get_dev) {
+			a->ops->get_dev(a, dev_net(dev), hw_dev);
+			break;
+		}
+	}
+	if (*hw_dev)
+		return 0;
+#endif
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(tcf_exts_get_dev);
+
 static int __init tc_filter_init(void)
 {
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 13b349f426a7..1cacfa5c95f3 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -78,6 +78,8 @@ struct cls_fl_filter {
 	u32 handle;
 	u32 flags;
 	struct rcu_head	rcu;
+	struct tc_to_netdev tc;
+	struct net_device *hw_dev;
 };
 
 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
@@ -203,9 +205,9 @@ static void fl_destroy_filter(struct rcu_head *head)
 
 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
 	struct tc_cls_flower_offload offload = {0};
-	struct tc_to_netdev tc;
+	struct net_device *dev = f->hw_dev;
+	struct tc_to_netdev *tc = &f->tc;
 
 	if (!tc_can_offload(dev, tp))
 		return;
@@ -213,10 +215,10 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 	offload.command = TC_CLSFLOWER_DESTROY;
 	offload.cookie = (unsigned long)f;
 
-	tc.type = TC_SETUP_CLSFLOWER;
-	tc.cls_flower = &offload;
+	tc->type = TC_SETUP_CLSFLOWER;
+	tc->cls_flower = &offload;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -226,11 +228,17 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
 	struct tc_cls_flower_offload offload = {0};
-	struct tc_to_netdev tc;
+	struct tc_to_netdev *tc = &f->tc;
 	int err;
 
-	if (!tc_can_offload(dev, tp))
-		return tc_skip_sw(f->flags) ? -EINVAL : 0;
+	if (!tc_can_offload(dev, tp)) {
+		if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev))
+			return tc_skip_sw(f->flags) ? -EINVAL : 0;
+		dev = f->hw_dev;
+		tc->egress_dev = true;
+	} else {
+		f->hw_dev = dev;
+	}
 
 	offload.command = TC_CLSFLOWER_REPLACE;
 	offload.cookie = (unsigned long)f;
@@ -239,23 +247,22 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 	offload.key = &f->key;
 	offload.exts = &f->exts;
 
-	tc.type = TC_SETUP_CLSFLOWER;
-	tc.cls_flower = &offload;
+	tc->type = TC_SETUP_CLSFLOWER;
+	tc->cls_flower = &offload;
 
 	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
-					    &tc);
+					    tc);
 
 	if (tc_skip_sw(f->flags))
 		return err;
-
 	return 0;
 }
 
 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
 	struct tc_cls_flower_offload offload = {0};
-	struct tc_to_netdev tc;
+	struct net_device *dev = f->hw_dev;
+	struct tc_to_netdev *tc = &f->tc;
 
 	if (!tc_can_offload(dev, tp))
 		return;
@@ -264,10 +271,10 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 	offload.cookie = (unsigned long)f;
 	offload.exts = &f->exts;
 
-	tc.type = TC_SETUP_CLSFLOWER;
-	tc.cls_flower = &offload;
+	tc->type = TC_SETUP_CLSFLOWER;
+	tc->cls_flower = &offload;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
 }
 
 static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
-- 
cgit v1.2.3


From b2cd12574aa3e1625f471ff57cde7f628a18a46b Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 1 Dec 2016 08:48:03 -0800
Subject: bpf: Refactor cgroups code in prep for new type

Code move and rename only; no functional change intended.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h | 46 +++++++++++++++++++++++-----------------------
 kernel/bpf/cgroup.c        | 10 +++++-----
 kernel/bpf/syscall.c       | 28 +++++++++++++++-------------
 3 files changed, 43 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 0cf1adfadd2d..af2ca8b432c0 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -36,31 +36,31 @@ void cgroup_bpf_update(struct cgroup *cgrp,
 		       struct bpf_prog *prog,
 		       enum bpf_attach_type type);
 
-int __cgroup_bpf_run_filter(struct sock *sk,
-			    struct sk_buff *skb,
-			    enum bpf_attach_type type);
-
-/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
-#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb)			\
-({									\
-	int __ret = 0;							\
-	if (cgroup_bpf_enabled)						\
-		__ret = __cgroup_bpf_run_filter(sk, skb,		\
-						BPF_CGROUP_INET_INGRESS); \
-									\
-	__ret;								\
+int __cgroup_bpf_run_filter_skb(struct sock *sk,
+				struct sk_buff *skb,
+				enum bpf_attach_type type);
+
+/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
+({									      \
+	int __ret = 0;							      \
+	if (cgroup_bpf_enabled)						      \
+		__ret = __cgroup_bpf_run_filter_skb(sk, skb,		      \
+						    BPF_CGROUP_INET_INGRESS); \
+									      \
+	__ret;								      \
 })
 
-#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb)				\
-({									\
-	int __ret = 0;							\
-	if (cgroup_bpf_enabled && sk && sk == skb->sk) {		\
-		typeof(sk) __sk = sk_to_full_sk(sk);			\
-		if (sk_fullsock(__sk))					\
-			__ret = __cgroup_bpf_run_filter(__sk, skb,	\
-						BPF_CGROUP_INET_EGRESS); \
-	}								\
-	__ret;								\
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb)			       \
+({									       \
+	int __ret = 0;							       \
+	if (cgroup_bpf_enabled && sk && sk == skb->sk) {		       \
+		typeof(sk) __sk = sk_to_full_sk(sk);			       \
+		if (sk_fullsock(__sk))					       \
+			__ret = __cgroup_bpf_run_filter_skb(__sk, skb,	       \
+						      BPF_CGROUP_INET_EGRESS); \
+	}								       \
+	__ret;								       \
 })
 
 #else
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 8c784f8c67cd..8fe55ffd109d 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -118,7 +118,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
 }
 
 /**
- * __cgroup_bpf_run_filter() - Run a program for packet filtering
+ * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
  * @sk: The socken sending or receiving traffic
  * @skb: The skb that is being sent or received
  * @type: The type of program to be exectuted
@@ -132,9 +132,9 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
  * This function will return %-EPERM if any if an attached program was found
  * and if it returned != 1 during execution. In all other cases, 0 is returned.
  */
-int __cgroup_bpf_run_filter(struct sock *sk,
-			    struct sk_buff *skb,
-			    enum bpf_attach_type type)
+int __cgroup_bpf_run_filter_skb(struct sock *sk,
+				struct sk_buff *skb,
+				enum bpf_attach_type type)
 {
 	struct bpf_prog *prog;
 	struct cgroup *cgrp;
@@ -164,4 +164,4 @@ int __cgroup_bpf_run_filter(struct sock *sk,
 
 	return ret;
 }
-EXPORT_SYMBOL(__cgroup_bpf_run_filter);
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4caa18e6860a..5518a6839ab1 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -856,6 +856,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 {
 	struct bpf_prog *prog;
 	struct cgroup *cgrp;
+	enum bpf_prog_type ptype;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -866,25 +867,26 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	switch (attr->attach_type) {
 	case BPF_CGROUP_INET_INGRESS:
 	case BPF_CGROUP_INET_EGRESS:
-		prog = bpf_prog_get_type(attr->attach_bpf_fd,
-					 BPF_PROG_TYPE_CGROUP_SKB);
-		if (IS_ERR(prog))
-			return PTR_ERR(prog);
-
-		cgrp = cgroup_get_from_fd(attr->target_fd);
-		if (IS_ERR(cgrp)) {
-			bpf_prog_put(prog);
-			return PTR_ERR(cgrp);
-		}
-
-		cgroup_bpf_update(cgrp, prog, attr->attach_type);
-		cgroup_put(cgrp);
+		ptype = BPF_PROG_TYPE_CGROUP_SKB;
 		break;
 
 	default:
 		return -EINVAL;
 	}
 
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp)) {
+		bpf_prog_put(prog);
+		return PTR_ERR(cgrp);
+	}
+
+	cgroup_bpf_update(cgrp, prog, attr->attach_type);
+	cgroup_put(cgrp);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 61023658760032e97869b07d54be9681d2529e77 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 1 Dec 2016 08:48:04 -0800
Subject: bpf: Add new cgroup attach type to enable sock modifications

Add new cgroup based program type, BPF_PROG_TYPE_CGROUP_SOCK. Similar to
BPF_PROG_TYPE_CGROUP_SKB programs can be attached to a cgroup and run
any time a process in the cgroup opens an AF_INET or AF_INET6 socket.
Currently only sk_bound_dev_if is exported to userspace for modification
by a bpf program.

This allows a cgroup to be configured such that AF_INET{6} sockets opened
by processes are automatically bound to a specific device. In turn, this
enables the running of programs that do not support SO_BINDTODEVICE in a
specific VRF context / L3 domain.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h | 14 +++++++++++
 include/uapi/linux/bpf.h   |  6 +++++
 kernel/bpf/cgroup.c        | 33 ++++++++++++++++++++++++
 kernel/bpf/syscall.c       |  5 +++-
 net/core/filter.c          | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/af_inet.c         | 12 ++++++++-
 net/ipv6/af_inet6.c        |  8 ++++++
 7 files changed, 138 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index af2ca8b432c0..7b6e5d168c95 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -40,6 +40,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 				struct sk_buff *skb,
 				enum bpf_attach_type type);
 
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+			       enum bpf_attach_type type);
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
@@ -63,6 +66,16 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 	__ret;								       \
 })
 
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
+({									       \
+	int __ret = 0;							       \
+	if (cgroup_bpf_enabled && sk) {					       \
+		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
+						 BPF_CGROUP_INET_SOCK_CREATE); \
+	}								       \
+	__ret;								       \
+})
+
 #else
 
 struct cgroup_bpf {};
@@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
 
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 
 #endif /* CONFIG_CGROUP_BPF */
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 22ac82792687..bfe5e31a1288 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -101,6 +101,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_XDP,
 	BPF_PROG_TYPE_PERF_EVENT,
 	BPF_PROG_TYPE_CGROUP_SKB,
+	BPF_PROG_TYPE_CGROUP_SOCK,
 	BPF_PROG_TYPE_LWT_IN,
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
@@ -109,6 +110,7 @@ enum bpf_prog_type {
 enum bpf_attach_type {
 	BPF_CGROUP_INET_INGRESS,
 	BPF_CGROUP_INET_EGRESS,
+	BPF_CGROUP_INET_SOCK_CREATE,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -567,6 +569,10 @@ enum bpf_ret_code {
 	/* >127 are reserved for prog type specific return codes */
 };
 
+struct bpf_sock {
+	__u32 bound_dev_if;
+};
+
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
  * return codes are reserved for future use. Unknown return codes will result
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 8fe55ffd109d..a515f7b007c6 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -165,3 +165,36 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 	return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
+
+/**
+ * __cgroup_bpf_run_filter_sk() - Run a program on a sock
+ * @sk: sock structure to manipulate
+ * @type: The type of program to be exectuted
+ *
+ * socket is passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+			       enum bpf_attach_type type)
+{
+	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	struct bpf_prog *prog;
+	int ret = 0;
+
+
+	rcu_read_lock();
+
+	prog = rcu_dereference(cgrp->bpf.effective[type]);
+	if (prog)
+		ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
+
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5518a6839ab1..85af86c496cd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -869,7 +869,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_INET_EGRESS:
 		ptype = BPF_PROG_TYPE_CGROUP_SKB;
 		break;
-
+	case BPF_CGROUP_INET_SOCK_CREATE:
+		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -905,6 +907,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	switch (attr->attach_type) {
 	case BPF_CGROUP_INET_INGRESS:
 	case BPF_CGROUP_INET_EGRESS:
+	case BPF_CGROUP_INET_SOCK_CREATE:
 		cgrp = cgroup_get_from_fd(attr->target_fd);
 		if (IS_ERR(cgrp))
 			return PTR_ERR(cgrp);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1c4d0faf22c8..0ab252e462aa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2818,6 +2818,32 @@ static bool lwt_is_valid_access(int off, int size,
 	return __is_valid_access(off, size, type);
 }
 
+static bool sock_filter_is_valid_access(int off, int size,
+					enum bpf_access_type type,
+					enum bpf_reg_type *reg_type)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct bpf_sock, bound_dev_if):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	if (off < 0 || off + size > sizeof(struct bpf_sock))
+		return false;
+
+	/* The verifier guarantees that size > 0. */
+	if (off % size != 0)
+		return false;
+
+	if (size != sizeof(__u32))
+		return false;
+
+	return true;
+}
+
 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 			       const struct bpf_prog *prog)
 {
@@ -3076,6 +3102,30 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 	return insn - insn_buf;
 }
 
+static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
+					  int dst_reg, int src_reg,
+					  int ctx_off,
+					  struct bpf_insn *insn_buf,
+					  struct bpf_prog *prog)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (ctx_off) {
+	case offsetof(struct bpf_sock, bound_dev_if):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+					offsetof(struct sock, sk_bound_dev_if));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sock, sk_bound_dev_if));
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 					 int src_reg, int ctx_off,
 					 struct bpf_insn *insn_buf,
@@ -3162,6 +3212,12 @@ static const struct bpf_verifier_ops lwt_xmit_ops = {
 	.gen_prologue		= tc_cls_act_prologue,
 };
 
+static const struct bpf_verifier_ops cg_sock_ops = {
+	.get_func_proto		= sk_filter_func_proto,
+	.is_valid_access	= sock_filter_is_valid_access,
+	.convert_ctx_access	= sock_filter_convert_ctx_access,
+};
+
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
 	.ops	= &sk_filter_ops,
 	.type	= BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3202,6 +3258,11 @@ static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
 	.type	= BPF_PROG_TYPE_LWT_XMIT,
 };
 
+static struct bpf_prog_type_list cg_sock_type __read_mostly = {
+	.ops	= &cg_sock_ops,
+	.type	= BPF_PROG_TYPE_CGROUP_SOCK
+};
+
 static int __init register_sk_filter_ops(void)
 {
 	bpf_register_prog_type(&sk_filter_type);
@@ -3209,6 +3270,7 @@ static int __init register_sk_filter_ops(void)
 	bpf_register_prog_type(&sched_act_type);
 	bpf_register_prog_type(&xdp_type);
 	bpf_register_prog_type(&cg_skb_type);
+	bpf_register_prog_type(&cg_sock_type);
 	bpf_register_prog_type(&lwt_in_type);
 	bpf_register_prog_type(&lwt_out_type);
 	bpf_register_prog_type(&lwt_xmit_type);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5ddf5cda07f4..24d2550492ee 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -374,8 +374,18 @@ lookup_protocol:
 
 	if (sk->sk_prot->init) {
 		err = sk->sk_prot->init(sk);
-		if (err)
+		if (err) {
+			sk_common_release(sk);
+			goto out;
+		}
+	}
+
+	if (!kern) {
+		err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+		if (err) {
 			sk_common_release(sk);
+			goto out;
+		}
 	}
 out:
 	return err;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d424f3a3737a..237e654ba717 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -258,6 +258,14 @@ lookup_protocol:
 			goto out;
 		}
 	}
+
+	if (!kern) {
+		err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+		if (err) {
+			sk_common_release(sk);
+			goto out;
+		}
+	}
 out:
 	return err;
 out_rcu_unlock:
-- 
cgit v1.2.3


From b908d3cd812abe3f4a74d7550bbf0a8cbcfbe6ed Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 2 Dec 2016 17:48:51 -0800
Subject: Input: synaptics-rmi4 - allow to add attention data

The HID implementation of RMI4 provides the data during
the interrupt (in the input report). We need to provide
a way for this transport driver to provide the attention
data while calling an IRQ.

We use a fifo in rmi_core to not lose any incoming event.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: Andrew Duggan <aduggan@synaptics.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_driver.c | 49 +++++++++++++++++++++++++++++++++++++++--
 include/linux/rmi.h             | 11 +++++++++
 2 files changed, 58 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index a718e51afb0b..85062e414e73 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -191,16 +191,53 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
 	return 0;
 }
 
+void rmi_set_attn_data(struct rmi_device *rmi_dev, unsigned long irq_status,
+		       void *data, size_t size)
+{
+	struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
+	struct rmi4_attn_data attn_data;
+	void *fifo_data;
+
+	if (!drvdata->enabled)
+		return;
+
+	fifo_data = kmemdup(data, size, GFP_ATOMIC);
+	if (!fifo_data)
+		return;
+
+	attn_data.irq_status = irq_status;
+	attn_data.size = size;
+	attn_data.data = fifo_data;
+
+	kfifo_put(&drvdata->attn_fifo, attn_data);
+}
+EXPORT_SYMBOL_GPL(rmi_set_attn_data);
+
 static irqreturn_t rmi_irq_fn(int irq, void *dev_id)
 {
 	struct rmi_device *rmi_dev = dev_id;
-	int ret;
+	struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
+	struct rmi4_attn_data attn_data = {0};
+	int ret, count;
+
+	count = kfifo_get(&drvdata->attn_fifo, &attn_data);
+	if (count) {
+		*(drvdata->irq_status) = attn_data.irq_status;
+		rmi_dev->xport->attn_data = attn_data.data;
+		rmi_dev->xport->attn_size = attn_data.size;
+	}
 
 	ret = rmi_process_interrupt_requests(rmi_dev);
 	if (ret)
 		rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev,
 			"Failed to process interrupt request: %d\n", ret);
 
+	if (count)
+		kfree(attn_data.data);
+
+	if (!kfifo_is_empty(&drvdata->attn_fifo))
+		return rmi_irq_fn(irq, dev_id);
+
 	return IRQ_HANDLED;
 }
 
@@ -880,8 +917,9 @@ void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake)
 {
 	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
 	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	struct rmi4_attn_data attn_data = {0};
 	int irq = pdata->irq;
-	int retval;
+	int retval, count;
 
 	mutex_lock(&data->enabled_mutex);
 
@@ -898,6 +936,13 @@ void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake)
 				 retval);
 	}
 
+	/* make sure the fifo is clean */
+	while (!kfifo_is_empty(&data->attn_fifo)) {
+		count = kfifo_get(&data->attn_fifo, &attn_data);
+		if (count)
+			kfree(attn_data.data);
+	}
+
 out:
 	mutex_unlock(&data->enabled_mutex);
 }
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index 7780e40a2573..1d4865621493 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -13,6 +13,7 @@
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/input.h>
+#include <linux/kfifo.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -331,6 +332,12 @@ struct rmi_device {
 
 };
 
+struct rmi4_attn_data {
+	unsigned long irq_status;
+	size_t size;
+	void *data;
+};
+
 struct rmi_driver_data {
 	struct list_head function_list;
 
@@ -357,11 +364,15 @@ struct rmi_driver_data {
 
 	bool enabled;
 	struct mutex enabled_mutex;
+	DECLARE_KFIFO(attn_fifo, struct rmi4_attn_data, 16);
 };
 
 int rmi_register_transport_device(struct rmi_transport_dev *xport);
 void rmi_unregister_transport_device(struct rmi_transport_dev *xport);
 
+void rmi_set_attn_data(struct rmi_device *rmi_dev, unsigned long irq_status,
+		       void *data, size_t size);
+
 int rmi_driver_suspend(struct rmi_device *rmi_dev, bool enable_wake);
 int rmi_driver_resume(struct rmi_device *rmi_dev, bool clear_wake);
 #endif
-- 
cgit v1.2.3


From ae9979c31007d5366b73640ee7dcbb271357053e Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 2 Dec 2016 17:49:10 -0800
Subject: Input: synaptics-rmi4 - store the attn data in the driver

Now that we have a proper API to set the attention data, there is
no point in keeping it in the transport driver.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: Andrew Duggan <aduggan@synaptics.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_driver.c |  5 ++---
 drivers/input/rmi4/rmi_f03.c    | 13 +++++++------
 drivers/input/rmi4/rmi_f11.c    | 12 ++++++------
 drivers/input/rmi4/rmi_f12.c    | 43 +++++++++++++++++++++--------------------
 drivers/input/rmi4/rmi_f30.c    | 11 ++++++-----
 include/linux/rmi.h             |  5 ++---
 6 files changed, 45 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 85062e414e73..05a3c4bc9778 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -155,7 +155,7 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
 	if (!data)
 		return 0;
 
-	if (!rmi_dev->xport->attn_data) {
+	if (!data->attn_data.data) {
 		error = rmi_read_block(rmi_dev,
 				data->f01_container->fd.data_base_addr + 1,
 				data->irq_status, data->num_of_irq_regs);
@@ -223,8 +223,7 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id)
 	count = kfifo_get(&drvdata->attn_fifo, &attn_data);
 	if (count) {
 		*(drvdata->irq_status) = attn_data.irq_status;
-		rmi_dev->xport->attn_data = attn_data.data;
-		rmi_dev->xport->attn_size = attn_data.size;
+		drvdata->attn_data = attn_data;
 	}
 
 	ret = rmi_process_interrupt_requests(rmi_dev);
diff --git a/drivers/input/rmi4/rmi_f03.c b/drivers/input/rmi4/rmi_f03.c
index 7a3ec0ed0c27..8a7ca3e2f95e 100644
--- a/drivers/input/rmi4/rmi_f03.c
+++ b/drivers/input/rmi4/rmi_f03.c
@@ -164,6 +164,7 @@ static int rmi_f03_config(struct rmi_function *fn)
 static int rmi_f03_attention(struct rmi_function *fn, unsigned long *irq_bits)
 {
 	struct rmi_device *rmi_dev = fn->rmi_dev;
+	struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
 	struct f03_data *f03 = dev_get_drvdata(&fn->dev);
 	u16 data_addr = fn->fd.data_base_addr;
 	const u8 ob_len = f03->rx_queue_length * RMI_F03_OB_SIZE;
@@ -174,20 +175,20 @@ static int rmi_f03_attention(struct rmi_function *fn, unsigned long *irq_bits)
 	int i;
 	int error;
 
-	if (!rmi_dev || !rmi_dev->xport)
+	if (!rmi_dev)
 		return -ENODEV;
 
-	if (rmi_dev->xport->attn_data) {
+	if (drvdata->attn_data.data) {
 		/* First grab the data passed by the transport device */
-		if (rmi_dev->xport->attn_size < ob_len) {
+		if (drvdata->attn_data.size < ob_len) {
 			dev_warn(&fn->dev, "F03 interrupted, but data is missing!\n");
 			return 0;
 		}
 
-		memcpy(obs, rmi_dev->xport->attn_data, ob_len);
+		memcpy(obs, drvdata->attn_data.data, ob_len);
 
-		rmi_dev->xport->attn_data += ob_len;
-		rmi_dev->xport->attn_size -= ob_len;
+		drvdata->attn_data.data += ob_len;
+		drvdata->attn_data.size -= ob_len;
 	} else {
 		/* Grab all of the data registers, and check them for data */
 		error = rmi_read_block(fn->rmi_dev, data_addr + RMI_F03_OB_OFFSET,
diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
index ffcbbc1745de..68279f3c5130 100644
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -1285,19 +1285,19 @@ static int rmi_f11_attention(struct rmi_function *fn, unsigned long *irq_bits)
 	int error;
 	int valid_bytes = f11->sensor.pkt_size;
 
-	if (rmi_dev->xport->attn_data) {
+	if (drvdata->attn_data.data) {
 		/*
 		 * The valid data in the attention report is less then
 		 * expected. Only process the complete fingers.
 		 */
-		if (f11->sensor.attn_size > rmi_dev->xport->attn_size)
-			valid_bytes = rmi_dev->xport->attn_size;
+		if (f11->sensor.attn_size > drvdata->attn_data.size)
+			valid_bytes = drvdata->attn_data.size;
 		else
 			valid_bytes = f11->sensor.attn_size;
-		memcpy(f11->sensor.data_pkt, rmi_dev->xport->attn_data,
+		memcpy(f11->sensor.data_pkt, drvdata->attn_data.data,
 			valid_bytes);
-		rmi_dev->xport->attn_data += f11->sensor.attn_size;
-		rmi_dev->xport->attn_size -= f11->sensor.attn_size;
+		drvdata->attn_data.data += f11->sensor.attn_size;
+		drvdata->attn_data.size -= f11->sensor.attn_size;
 	} else {
 		error = rmi_read_block(rmi_dev,
 				data_base_addr, f11->sensor.data_pkt,
diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c
index 82a4964e5eb9..8c5360c25266 100644
--- a/drivers/input/rmi4/rmi_f12.c
+++ b/drivers/input/rmi4/rmi_f12.c
@@ -208,19 +208,20 @@ static int rmi_f12_attention(struct rmi_function *fn,
 {
 	int retval;
 	struct rmi_device *rmi_dev = fn->rmi_dev;
+	struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
 	struct f12_data *f12 = dev_get_drvdata(&fn->dev);
 	struct rmi_2d_sensor *sensor = &f12->sensor;
 	int valid_bytes = sensor->pkt_size;
 
-	if (rmi_dev->xport->attn_data) {
-		if (sensor->attn_size > rmi_dev->xport->attn_size)
-			valid_bytes = rmi_dev->xport->attn_size;
+	if (drvdata->attn_data.data) {
+		if (sensor->attn_size > drvdata->attn_data.size)
+			valid_bytes = drvdata->attn_data.size;
 		else
 			valid_bytes = sensor->attn_size;
-		memcpy(sensor->data_pkt, rmi_dev->xport->attn_data,
+		memcpy(sensor->data_pkt, drvdata->attn_data.data,
 			valid_bytes);
-		rmi_dev->xport->attn_data += sensor->attn_size;
-		rmi_dev->xport->attn_size -= sensor->attn_size;
+		drvdata->attn_data.data += sensor->attn_size;
+		drvdata->attn_data.size -= sensor->attn_size;
 	} else {
 		retval = rmi_read_block(rmi_dev, f12->data_addr,
 					sensor->data_pkt, sensor->pkt_size);
@@ -323,7 +324,7 @@ static int rmi_f12_probe(struct rmi_function *fn)
 	const struct rmi_register_desc_item *item;
 	struct rmi_2d_sensor *sensor;
 	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
-	struct rmi_transport_dev *xport = rmi_dev->xport;
+	struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
 	u16 data_offset = 0;
 
 	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s\n", __func__);
@@ -422,7 +423,7 @@ static int rmi_f12_probe(struct rmi_function *fn)
 	 * HID attention reports.
 	 */
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 0);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 1);
@@ -436,15 +437,15 @@ static int rmi_f12_probe(struct rmi_function *fn)
 	}
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 2);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 3);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 4);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 5);
@@ -456,22 +457,22 @@ static int rmi_f12_probe(struct rmi_function *fn)
 	}
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 6);
-	if (item && !xport->attn_data) {
+	if (item && !drvdata->attn_data.data) {
 		f12->data6 = item;
 		f12->data6_offset = data_offset;
 		data_offset += item->reg_size;
 	}
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 7);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 8);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 9);
-	if (item && !xport->attn_data) {
+	if (item && !drvdata->attn_data.data) {
 		f12->data9 = item;
 		f12->data9_offset = data_offset;
 		data_offset += item->reg_size;
@@ -480,27 +481,27 @@ static int rmi_f12_probe(struct rmi_function *fn)
 	}
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 10);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 11);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 12);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 13);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 14);
-	if (item && !xport->attn_data)
+	if (item && !drvdata->attn_data.data)
 		data_offset += item->reg_size;
 
 	item = rmi_get_register_desc_item(&f12->data_reg_desc, 15);
-	if (item && !xport->attn_data) {
+	if (item && !drvdata->attn_data.data) {
 		f12->data15 = item;
 		f12->data15_offset = data_offset;
 		data_offset += item->reg_size;
diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c
index f696137a56f5..f4b491e3e0fd 100644
--- a/drivers/input/rmi4/rmi_f30.c
+++ b/drivers/input/rmi4/rmi_f30.c
@@ -99,6 +99,7 @@ static int rmi_f30_attention(struct rmi_function *fn, unsigned long *irq_bits)
 {
 	struct f30_data *f30 = dev_get_drvdata(&fn->dev);
 	struct rmi_device *rmi_dev = fn->rmi_dev;
+	struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
 	int retval;
 	int gpiled = 0;
 	int value = 0;
@@ -109,15 +110,15 @@ static int rmi_f30_attention(struct rmi_function *fn, unsigned long *irq_bits)
 		return 0;
 
 	/* Read the gpi led data. */
-	if (rmi_dev->xport->attn_data) {
-		if (rmi_dev->xport->attn_size < f30->register_count) {
+	if (drvdata->attn_data.data) {
+		if (drvdata->attn_data.size < f30->register_count) {
 			dev_warn(&fn->dev, "F30 interrupted, but data is missing\n");
 			return 0;
 		}
-		memcpy(f30->data_regs, rmi_dev->xport->attn_data,
+		memcpy(f30->data_regs, drvdata->attn_data.data,
 			f30->register_count);
-		rmi_dev->xport->attn_data += f30->register_count;
-		rmi_dev->xport->attn_size -= f30->register_count;
+		drvdata->attn_data.data += f30->register_count;
+		drvdata->attn_data.size -= f30->register_count;
 	} else {
 		retval = rmi_read_block(rmi_dev, fn->fd.data_base_addr,
 			f30->data_regs, f30->register_count);
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index 1d4865621493..ac910f730688 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -272,9 +272,6 @@ struct rmi_transport_dev {
 	struct rmi_device_platform_data pdata;
 
 	struct input_dev *input;
-
-	void *attn_data;
-	int attn_size;
 };
 
 /**
@@ -364,6 +361,8 @@ struct rmi_driver_data {
 
 	bool enabled;
 	struct mutex enabled_mutex;
+
+	struct rmi4_attn_data attn_data;
 	DECLARE_KFIFO(attn_fifo, struct rmi4_attn_data, 16);
 };
 
-- 
cgit v1.2.3


From fb5f51c7425ebc808d91329257cbc963e2421368 Mon Sep 17 00:00:00 2001
From: Ian Kent <ikent@redhat.com>
Date: Thu, 24 Nov 2016 08:03:41 +1100
Subject: vfs: change d_manage() to take a struct path

For the autofs module to be able to reliably check if a dentry is a
mountpoint in a multiple namespace environment the ->d_manage() dentry
operation will need to take a path argument instead of a dentry.

Link: http://lkml.kernel.org/r/20161011053352.27645.83962.stgit@pluto.themaw.net
Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  2 +-
 fs/autofs4/root.c                 |  5 +++--
 fs/namei.c                        | 13 ++++++-------
 include/linux/dcache.h            |  2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 1b5f15653b1b..4ca3a8d1349d 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -20,7 +20,7 @@ prototypes:
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
 	struct vfsmount *(*d_automount)(struct path *path);
-	int (*d_manage)(struct dentry *, bool);
+	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
 				 unsigned int);
 
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index b5039a00caaf..3893f4d44cd4 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -948,7 +948,7 @@ struct dentry_operations {
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
 	struct vfsmount *(*d_automount)(struct path *);
-	int (*d_manage)(struct dentry *, bool);
+	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
 				 unsigned int);
 };
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index a11f73174877..3a4328218b71 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -32,7 +32,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file);
 static struct dentry *autofs4_lookup(struct inode *,
 				     struct dentry *, unsigned int);
 static struct vfsmount *autofs4_d_automount(struct path *);
-static int autofs4_d_manage(struct dentry *, bool);
+static int autofs4_d_manage(const struct path *, bool);
 static void autofs4_dentry_release(struct dentry *);
 
 const struct file_operations autofs4_root_operations = {
@@ -421,8 +421,9 @@ done:
 	return NULL;
 }
 
-static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
+static int autofs4_d_manage(const struct path *path, bool rcu_walk)
 {
+	struct dentry *dentry = path->dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status;
diff --git a/fs/namei.c b/fs/namei.c
index 5b4eed221530..81ac4736b596 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1200,7 +1200,7 @@ static int follow_managed(struct path *path, struct nameidata *nd)
 		if (managed & DCACHE_MANAGE_TRANSIT) {
 			BUG_ON(!path->dentry->d_op);
 			BUG_ON(!path->dentry->d_op->d_manage);
-			ret = path->dentry->d_op->d_manage(path->dentry, false);
+			ret = path->dentry->d_op->d_manage(path, false);
 			if (ret < 0)
 				break;
 		}
@@ -1263,10 +1263,10 @@ int follow_down_one(struct path *path)
 }
 EXPORT_SYMBOL(follow_down_one);
 
-static inline int managed_dentry_rcu(struct dentry *dentry)
+static inline int managed_dentry_rcu(const struct path *path)
 {
-	return (dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
-		dentry->d_op->d_manage(dentry, true) : 0;
+	return (path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
+		path->dentry->d_op->d_manage(path, true) : 0;
 }
 
 /*
@@ -1282,7 +1282,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
 		 * Don't forget we might have a non-mountpoint managed dentry
 		 * that wants to block transit.
 		 */
-		switch (managed_dentry_rcu(path->dentry)) {
+		switch (managed_dentry_rcu(path)) {
 		case -ECHILD:
 		default:
 			return false;
@@ -1392,8 +1392,7 @@ int follow_down(struct path *path)
 		if (managed & DCACHE_MANAGE_TRANSIT) {
 			BUG_ON(!path->dentry->d_op);
 			BUG_ON(!path->dentry->d_op->d_manage);
-			ret = path->dentry->d_op->d_manage(
-				path->dentry, false);
+			ret = path->dentry->d_op->d_manage(path, false);
 			if (ret < 0)
 				return ret == -EISDIR ? 0 : ret;
 		}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 5beed7b30561..bc529ce88ed0 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -139,7 +139,7 @@ struct dentry_operations {
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
 	struct vfsmount *(*d_automount)(struct path *);
-	int (*d_manage)(struct dentry *, bool);
+	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
 				 unsigned int);
 } ____cacheline_aligned;
-- 
cgit v1.2.3


From 4d796d751cefdb942a54c570bd3087d8be3bb893 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 23 Sep 2016 11:38:08 -0400
Subject: pNFS: Allow layout drivers to manage private data in struct
 nfs4_layoutreturn

Cleanup to allow layout drivers to attach private data to layoutreturn,
and manage the data.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 6 ++++++
 fs/nfs/nfs4xdr.c        | 4 +++-
 fs/nfs/pnfs.c           | 1 +
 include/linux/nfs_xdr.h | 2 ++
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1c629f5e1ae5..f992281c9b29 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3039,6 +3039,7 @@ struct nfs4_closedata {
 	struct {
 		struct nfs4_layoutreturn_args arg;
 		struct nfs4_layoutreturn_res res;
+		struct nfs4_xdr_opaque_data ld_private;
 		u32 roc_barrier;
 		bool roc;
 	} lr;
@@ -3267,6 +3268,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
 	if (IS_ERR(calldata->arg.seqid))
 		goto out_free_calldata;
 	calldata->arg.fmode = 0;
+	calldata->lr.arg.ld_private = &calldata->lr.ld_private;
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.server = server;
@@ -5599,6 +5601,7 @@ struct nfs4_delegreturndata {
 	struct {
 		struct nfs4_layoutreturn_args arg;
 		struct nfs4_layoutreturn_res res;
+		struct nfs4_xdr_opaque_data ld_private;
 		u32 roc_barrier;
 		bool roc;
 	} lr;
@@ -5735,6 +5738,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	data->res.fattr = &data->fattr;
 	data->res.server = server;
 	data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+	data->lr.arg.ld_private = &data->lr.ld_private;
 	nfs_fattr_init(data->res.fattr);
 	data->timestamp = jiffies;
 	data->rpc_status = 0;
@@ -8633,6 +8637,8 @@ static void nfs4_layoutreturn_release(void *calldata)
 	nfs4_sequence_free_slot(&lrp->res.seq_res);
 	pnfs_put_layout_hdr(lrp->args.layout);
 	nfs_iput_and_deactive(lrp->inode);
+	if (lrp->ld_private.ops && lrp->ld_private.ops->free)
+		lrp->ld_private.ops->free(&lrp->ld_private);
 	kfree(calldata);
 	dprintk("<-- %s\n", __func__);
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 84fdcf27138c..1c1768a8fcd1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2035,7 +2035,9 @@ encode_layoutreturn(struct xdr_stream *xdr,
 	spin_lock(&args->inode->i_lock);
 	encode_nfs4_stateid(xdr, &args->stateid);
 	spin_unlock(&args->inode->i_lock);
-	if (lr_ops->encode_layoutreturn)
+	if (args->ld_private->ops && args->ld_private->ops->encode)
+		args->ld_private->ops->encode(xdr, args, args->ld_private);
+	else if (lr_ops->encode_layoutreturn)
 		lr_ops->encode_layoutreturn(xdr, args);
 	else
 		encode_uint32(xdr, 0);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0b25a1c820ba..4631d15227ae 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1086,6 +1086,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
 	}
 
 	pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode);
+	lrp->args.ld_private = &lrp->ld_private;
 	lrp->clp = NFS_SERVER(ino)->nfs_client;
 	lrp->cred = lo->plh_lc_cred;
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 331a3200eb01..b64177d669fd 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -320,6 +320,7 @@ struct nfs4_layoutreturn_args {
 	struct pnfs_layout_range range;
 	nfs4_stateid stateid;
 	__u32   layout_type;
+	struct nfs4_xdr_opaque_data *ld_private;
 };
 
 struct nfs4_layoutreturn_res {
@@ -335,6 +336,7 @@ struct nfs4_layoutreturn {
 	struct nfs_client *clp;
 	struct inode *inode;
 	int rpc_status;
+	struct nfs4_xdr_opaque_data ld_private;
 };
 
 #define PNFS_LAYOUTSTATS_MAXSIZE 256
-- 
cgit v1.2.3


From 422c93c881a1689b5ad99e231a65ee5c51d3b72a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 6 Oct 2016 17:53:20 -0400
Subject: pNFS/flexfiles: Minor refactoring before adding iostats to
 layoutreturn

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 59 ++++++++++++++++++++--------------
 fs/nfs/nfs42proc.c                     |  9 ++++--
 fs/nfs/nfs42xdr.c                      |  5 +--
 fs/nfs/pnfs.h                          |  1 -
 include/linux/nfs_xdr.h                |  3 +-
 5 files changed, 44 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index c18afd5cc0bb..e5078301720a 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1988,7 +1988,7 @@ static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo,
 }
 
 /* report nothing for now */
-static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo,
+static void ff_layout_encode_iostats_array(struct nfs4_flexfile_layout *flo,
 				     struct xdr_stream *xdr,
 				     const struct nfs4_layoutreturn_args *args)
 {
@@ -2026,7 +2026,7 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr,
 	BUG_ON(!start);
 
 	ff_layout_encode_ioerr(flo, xdr, args, ff_opaque->data);
-	ff_layout_encode_iostats(flo, xdr, args);
+	ff_layout_encode_iostats_array(flo, xdr, args);
 
 	*start = cpu_to_be32((xdr->p - start - 1) * 4);
 	dprintk("%s: Return\n", __func__);
@@ -2191,21 +2191,18 @@ ff_layout_encode_io_latency(struct xdr_stream *xdr,
 }
 
 static void
-ff_layout_encode_layoutstats(struct xdr_stream *xdr,
-			     struct nfs42_layoutstat_args *args,
-			     struct nfs42_layoutstat_devinfo *devinfo)
+ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
+			      const struct nfs42_layoutstat_devinfo *devinfo,
+			      struct nfs4_ff_layout_mirror *mirror)
 {
-	struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private;
 	struct nfs4_pnfs_ds_addr *da;
 	struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;
 	struct nfs_fh *fh = &mirror->fh_versions[0];
-	__be32 *p, *start;
+	__be32 *p;
 
 	da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
 	dprintk("%s: DS %s: encoding address %s\n",
 		__func__, ds->ds_remotestr, da->da_remotestr);
-	/* layoutupdate length */
-	start = xdr_reserve_space(xdr, 4);
 	/* netaddr4 */
 	ff_layout_encode_netaddr(xdr, da);
 	/* nfs_fh4 */
@@ -2222,10 +2219,36 @@ ff_layout_encode_layoutstats(struct xdr_stream *xdr,
 	/* bool */
 	p = xdr_reserve_space(xdr, 4);
 	*p = cpu_to_be32(false);
+}
+
+static void
+ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args,
+			     const struct nfs4_xdr_opaque_data *opaque)
+{
+	struct nfs42_layoutstat_devinfo *devinfo = container_of(opaque,
+			struct nfs42_layoutstat_devinfo, ld_private);
+	__be32 *start;
+
+	/* layoutupdate length */
+	start = xdr_reserve_space(xdr, 4);
+	ff_layout_encode_ff_layoutupdate(xdr, devinfo, opaque->data);
 
 	*start = cpu_to_be32((xdr->p - start - 1) * 4);
 }
 
+static void
+ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque)
+{
+	struct nfs4_ff_layout_mirror *mirror = opaque->data;
+
+	ff_layout_put_mirror(mirror);
+}
+
+static const struct nfs4_xdr_opaque_ops layoutstat_ops = {
+	.encode = ff_layout_encode_layoutstats,
+	.free	= ff_layout_free_layoutstats,
+};
+
 static int
 ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
 			       struct nfs42_layoutstat_devinfo *devinfo,
@@ -2257,8 +2280,8 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
 		devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed;
 		spin_unlock(&mirror->lock);
 		devinfo->layout_type = LAYOUT_FLEX_FILES;
-		devinfo->layoutstats_encode = ff_layout_encode_layoutstats;
-		devinfo->layout_private = mirror;
+		devinfo->ld_private.ops = &layoutstat_ops;
+		devinfo->ld_private.data = mirror;
 
 		devinfo++;
 		i++;
@@ -2291,19 +2314,6 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
 	return 0;
 }
 
-static void
-ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data)
-{
-	struct nfs4_ff_layout_mirror *mirror;
-	int i;
-
-	for (i = 0; i < data->args.num_dev; i++) {
-		mirror = data->args.devinfo[i].layout_private;
-		data->args.devinfo[i].layout_private = NULL;
-		ff_layout_put_mirror(mirror);
-	}
-}
-
 static struct pnfs_layoutdriver_type flexfilelayout_type = {
 	.id			= LAYOUT_FLEX_FILES,
 	.name			= "LAYOUT_FLEX_FILES",
@@ -2328,7 +2338,6 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
 	.prepare_layoutreturn   = ff_layout_prepare_layoutreturn,
 	.sync			= pnfs_nfs_generic_sync,
 	.prepare_layoutstats	= ff_layout_prepare_layoutstats,
-	.cleanup_layoutstats	= ff_layout_cleanup_layoutstats,
 };
 
 static int __init nfs4flexfilelayout_init(void)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 608501971fe0..d12ff9385f49 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -397,10 +397,13 @@ static void
 nfs42_layoutstat_release(void *calldata)
 {
 	struct nfs42_layoutstat_data *data = calldata;
-	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
+	struct nfs42_layoutstat_devinfo *devinfo = data->args.devinfo;
+	int i;
 
-	if (nfss->pnfs_curr_ld->cleanup_layoutstats)
-		nfss->pnfs_curr_ld->cleanup_layoutstats(data);
+	for (i = 0; i < data->args.num_dev; i++) {
+		if (devinfo[i].ld_private.ops && devinfo[i].ld_private.ops->free)
+			devinfo[i].ld_private.ops->free(&devinfo[i].ld_private);
+	}
 
 	pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout);
 	smp_mb__before_atomic();
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 8b2605882a20..6c7296454bbc 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -181,8 +181,9 @@ static void encode_layoutstats(struct xdr_stream *xdr,
 			NFS4_DEVICEID4_SIZE);
 	/* Encode layoutupdate4 */
 	*p++ = cpu_to_be32(devinfo->layout_type);
-	if (devinfo->layoutstats_encode != NULL)
-		devinfo->layoutstats_encode(xdr, args, devinfo);
+	if (devinfo->ld_private.ops)
+		devinfo->ld_private.ops->encode(xdr, args,
+				&devinfo->ld_private);
 	else
 		encode_uint32(xdr, 0);
 }
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d1e028175cd1..63f77b49a586 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -182,7 +182,6 @@ struct pnfs_layoutdriver_type {
 				     struct xdr_stream *xdr,
 				     const struct nfs4_layoutcommit_args *args);
 	int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
-	void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data);
 };
 
 struct pnfs_layout_hdr {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b64177d669fd..617cfaa20ffc 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -357,8 +357,7 @@ struct nfs42_layoutstat_devinfo {
 	__u64 write_count;
 	__u64 write_bytes;
 	__u32 layout_type;
-	layoutstats_encode_t layoutstats_encode;
-	void *layout_private;
+	struct nfs4_xdr_opaque_data ld_private;
 };
 
 struct nfs42_layoutstat_args {
-- 
cgit v1.2.3


From b1226c7db1d997fa6955cd3b54ba333bd0d8a29c Mon Sep 17 00:00:00 2001
From: Adit Ranadive <aditr@vmware.com>
Date: Sun, 2 Oct 2016 19:10:21 -0700
Subject: vmxnet3: Move PCI Id to pci_ids.h

The VMXNet3 PCI Id will be shared with our paravirtual RDMA driver.
Moved it to the shared location in pci_ids.h.

Suggested-by: Leon Romanovsky <leon@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/vmxnet3/vmxnet3_int.h | 3 +--
 include/linux/pci_ids.h           | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 7dc37a090549..59e077be8829 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -119,9 +119,8 @@ enum {
 };
 
 /*
- * PCI vendor and device IDs.
+ * Maximum devices supported.
  */
-#define PCI_DEVICE_ID_VMWARE_VMXNET3    0x07B0
 #define MAX_ETHERNET_CARDS		10
 #define MAX_PCI_PASSTHRU_DEVICE		6
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c58752fe16c4..98bb455302cf 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2251,6 +2251,7 @@
 #define PCI_DEVICE_ID_RASTEL_2PORT	0x2000
 
 #define PCI_VENDOR_ID_VMWARE		0x15ad
+#define PCI_DEVICE_ID_VMWARE_VMXNET3	0x07b0
 
 #define PCI_VENDOR_ID_ZOLTRIX		0x15b0
 #define PCI_DEVICE_ID_ZOLTRIX_2BD0	0x2bd0
-- 
cgit v1.2.3


From c6609c0a1c34fc097152b28b496236625673924f Mon Sep 17 00:00:00 2001
From: Ian Kent <ikent@redhat.com>
Date: Thu, 24 Nov 2016 08:03:41 +1100
Subject: vfs: add path_is_mountpoint() helper

d_mountpoint() can only be used reliably to establish if a dentry is
not mounted in any namespace. It isn't aware of the possibility there
may be multiple mounts using a given dentry that may be in a different
namespace.

Add helper functions, path_is_mountpoint(), that checks if a struct path
is a mountpoint for this case.

Link: http://lkml.kernel.org/r/20161011053358.27645.9729.stgit@pluto.themaw.net
Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  6 ++++++
 fs/namespace.c        | 29 +++++++++++++++++++++++++++++
 include/linux/mount.h |  2 ++
 3 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index d2e25d7b64b3..2c856fc47ae3 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -94,6 +94,12 @@ extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
 extern int __legitimize_mnt(struct vfsmount *, unsigned);
 extern bool legitimize_mnt(struct vfsmount *, unsigned);
 
+static inline bool __path_is_mountpoint(const struct path *path)
+{
+	struct mount *m = __lookup_mnt(path->mnt, path->dentry);
+	return m && likely(!(m->mnt.mnt_flags & MNT_SYNC_UMOUNT));
+}
+
 extern void __detach_mounts(struct dentry *dentry);
 
 static inline void detach_mounts(struct dentry *dentry)
diff --git a/fs/namespace.c b/fs/namespace.c
index e6c234b1a645..7a73383e8365 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1159,6 +1159,35 @@ struct vfsmount *mntget(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL(mntget);
 
+/* path_is_mountpoint() - Check if path is a mount in the current
+ *                          namespace.
+ *
+ *  d_mountpoint() can only be used reliably to establish if a dentry is
+ *  not mounted in any namespace and that common case is handled inline.
+ *  d_mountpoint() isn't aware of the possibility there may be multiple
+ *  mounts using a given dentry in a different namespace. This function
+ *  checks if the passed in path is a mountpoint rather than the dentry
+ *  alone.
+ */
+bool path_is_mountpoint(const struct path *path)
+{
+	unsigned seq;
+	bool res;
+
+	if (!d_mountpoint(path->dentry))
+		return false;
+
+	rcu_read_lock();
+	do {
+		seq = read_seqbegin(&mount_lock);
+		res = __path_is_mountpoint(path);
+	} while (read_seqretry(&mount_lock, seq));
+	rcu_read_unlock();
+
+	return res;
+}
+EXPORT_SYMBOL(path_is_mountpoint);
+
 struct vfsmount *mnt_clone_internal(struct path *path)
 {
 	struct mount *p;
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1172cce949a4..5b6dd004bfdc 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -98,4 +98,6 @@ extern dev_t name_to_dev_t(const char *name);
 
 extern unsigned int sysctl_mount_max;
 
+extern bool path_is_mountpoint(const struct path *path);
+
 #endif /* _LINUX_MOUNT_H */
-- 
cgit v1.2.3


From 01619491a5f0766014fe863c5ae480665436e7a2 Mon Sep 17 00:00:00 2001
From: Ian Kent <ikent@redhat.com>
Date: Thu, 24 Nov 2016 08:03:41 +1100
Subject: vfs: add path_has_submounts()

d_mountpoint() can only be used reliably to establish if a dentry is
not mounted in any namespace. It isn't aware of the possibility there
may be multiple mounts using the given dentry, possibly in a different
namespace.

Add function, path_has_submounts(), that checks is a struct path contains
mounts (or is a mountpoint itself) to handle this case.

Link: http://lkml.kernel.org/r/20161011053403.27645.55242.stgit@pluto.themaw.net
Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h |  1 +
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 5c7cc953ac81..8515875854b6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1306,6 +1306,45 @@ int have_submounts(struct dentry *parent)
 }
 EXPORT_SYMBOL(have_submounts);
 
+struct check_mount {
+	struct vfsmount *mnt;
+	unsigned int mounted;
+};
+
+static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry)
+{
+	struct check_mount *info = data;
+	struct path path = { .mnt = info->mnt, .dentry = dentry };
+
+	if (likely(!d_mountpoint(dentry)))
+		return D_WALK_CONTINUE;
+	if (__path_is_mountpoint(&path)) {
+		info->mounted = 1;
+		return D_WALK_QUIT;
+	}
+	return D_WALK_CONTINUE;
+}
+
+/**
+ * path_has_submounts - check for mounts over a dentry in the
+ *                      current namespace.
+ * @parent: path to check.
+ *
+ * Return true if the parent or its subdirectories contain
+ * a mount point in the current namespace.
+ */
+int path_has_submounts(const struct path *parent)
+{
+	struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
+
+	read_seqlock_excl(&mount_lock);
+	d_walk(parent->dentry, &data, path_check_mount, NULL);
+	read_sequnlock_excl(&mount_lock);
+
+	return data.mounted;
+}
+EXPORT_SYMBOL(path_has_submounts);
+
 /*
  * Called by mount code to set a mountpoint and check if the mountpoint is
  * reachable (e.g. NFS can unhash a directory dentry and then the complete
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bc529ce88ed0..0ffaf7aef9ae 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -255,6 +255,7 @@ extern void d_prune_aliases(struct inode *);
 
 /* test whether we have any submounts in a subdir tree */
 extern int have_submounts(struct dentry *);
+extern int path_has_submounts(const struct path *);
 
 /*
  * This adds the entry to the hash queues.
-- 
cgit v1.2.3


From f74e7b33c37e5a7bae33bb73858c2766cb256626 Mon Sep 17 00:00:00 2001
From: Ian Kent <ikent@redhat.com>
Date: Thu, 24 Nov 2016 08:03:42 +1100
Subject: vfs: remove unused have_submounts() function

Now that path_has_submounts() has been added have_submounts() is no
longer used so remove it.

Link: http://lkml.kernel.org/r/20161011053428.27645.12310.stgit@pluto.themaw.net
Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 33 ---------------------------------
 include/linux/dcache.h |  1 -
 2 files changed, 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 8515875854b6..252378359a8f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1273,39 +1273,6 @@ rename_retry:
 	goto again;
 }
 
-/*
- * Search for at least 1 mount point in the dentry's subdirs.
- * We descend to the next level whenever the d_subdirs
- * list is non-empty and continue searching.
- */
-
-static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
-{
-	int *ret = data;
-	if (d_mountpoint(dentry)) {
-		*ret = 1;
-		return D_WALK_QUIT;
-	}
-	return D_WALK_CONTINUE;
-}
-
-/**
- * have_submounts - check for mounts over a dentry
- * @parent: dentry to check.
- *
- * Return true if the parent or its subdirectories contain
- * a mount point
- */
-int have_submounts(struct dentry *parent)
-{
-	int ret = 0;
-
-	d_walk(parent, &ret, check_mount, NULL);
-
-	return ret;
-}
-EXPORT_SYMBOL(have_submounts);
-
 struct check_mount {
 	struct vfsmount *mnt;
 	unsigned int mounted;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 0ffaf7aef9ae..c965e4469499 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -254,7 +254,6 @@ extern struct dentry *d_find_alias(struct inode *);
 extern void d_prune_aliases(struct inode *);
 
 /* test whether we have any submounts in a subdir tree */
-extern int have_submounts(struct dentry *);
 extern int path_has_submounts(const struct path *);
 
 /*
-- 
cgit v1.2.3


From adc176c5472214971d77c1a61c83db9b01e9cdc7 Mon Sep 17 00:00:00 2001
From: Erik Nordmark <nordmark@arista.com>
Date: Fri, 2 Dec 2016 14:00:08 -0800
Subject: ipv6 addrconf: Implemented enhanced DAD (RFC7527)

Implemented RFC7527 Enhanced DAD.
IPv6 duplicate address detection can fail if there is some temporary
loopback of Ethernet frames. RFC7527 solves this by including a random
nonce in the NS messages used for DAD, and if an NS is received with the
same nonce it is assumed to be a looped back DAD probe and is ignored.
RFC7527 is enabled by default. Can be disabled by setting both of
conf/{all,interface}/enhanced_dad to zero.

Signed-off-by: Erik Nordmark <nordmark@arista.com>
Signed-off-by: Bob Gilligan <gilligan@arista.com>
Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  9 +++++++++
 include/linux/ipv6.h                   |  1 +
 include/net/if_inet6.h                 |  1 +
 include/net/ndisc.h                    |  5 ++++-
 include/uapi/linux/ipv6.h              |  1 +
 net/ipv6/addrconf.c                    | 22 +++++++++++++++++++++-
 net/ipv6/ndisc.c                       | 29 ++++++++++++++++++++++++++---
 net/ipv6/route.c                       |  2 +-
 8 files changed, 64 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 5ca567fa6b8c..7dd65c9cf707 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1734,6 +1734,15 @@ drop_unsolicited_na - BOOLEAN
 
 	By default this is turned off.
 
+enhanced_dad - BOOLEAN
+	Include a nonce option in the IPv6 neighbor solicitation messages used for
+	duplicate address detection per RFC7527. A received DAD NS will only signal
+	a duplicate address if the nonce is different. This avoids any false
+	detection of duplicates due to loopback of the NS messages that we send.
+	The nonce option will be sent on an interface unless both of
+	conf/{all,interface}/enhanced_dad are set to FALSE.
+	Default: TRUE
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 3f95233b2733..671d014e6429 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -68,6 +68,7 @@ struct ipv6_devconf {
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	__s32		seg6_require_hmac;
 #endif
+	__u32		enhanced_dad;
 
 	struct ctl_table_header *sysctl_header;
 };
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index b0576cb2ab25..0fa4c324b713 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -55,6 +55,7 @@ struct inet6_ifaddr {
 	__u8			stable_privacy_retry;
 
 	__u16			scope;
+	__u64			dad_nonce;
 
 	unsigned long		cstamp;	/* created timestamp */
 	unsigned long		tstamp; /* updated timestamp */
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index be1fe2283254..d562a2fe4860 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -31,6 +31,7 @@ enum {
 	ND_OPT_PREFIX_INFO = 3,		/* RFC2461 */
 	ND_OPT_REDIRECT_HDR = 4,	/* RFC2461 */
 	ND_OPT_MTU = 5,			/* RFC2461 */
+	ND_OPT_NONCE = 14,              /* RFC7527 */
 	__ND_OPT_ARRAY_MAX,
 	ND_OPT_ROUTE_INFO = 24,		/* RFC4191 */
 	ND_OPT_RDNSS = 25,		/* RFC5006 */
@@ -121,6 +122,7 @@ struct ndisc_options {
 #define nd_opts_pi_end			nd_opt_array[__ND_OPT_PREFIX_INFO_END]
 #define nd_opts_rh			nd_opt_array[ND_OPT_REDIRECT_HDR]
 #define nd_opts_mtu			nd_opt_array[ND_OPT_MTU]
+#define nd_opts_nonce			nd_opt_array[ND_OPT_NONCE]
 #define nd_802154_opts_src_lladdr	nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR]
 #define nd_802154_opts_tgt_lladdr	nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR]
 
@@ -398,7 +400,8 @@ void ndisc_cleanup(void);
 int ndisc_rcv(struct sk_buff *skb);
 
 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr);
+		   const struct in6_addr *daddr, const struct in6_addr *saddr,
+		   u64 nonce);
 
 void ndisc_send_rs(struct net_device *dev,
 		   const struct in6_addr *saddr, const struct in6_addr *daddr);
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 53561be1ac21..eaf65dc82e22 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -181,6 +181,7 @@ enum {
 	DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
 	DEVCONF_SEG6_ENABLED,
 	DEVCONF_SEG6_REQUIRE_HMAC,
+	DEVCONF_ENHANCED_DAD,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4c387dc338e3..c1e124bc8e1e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -242,6 +242,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	.seg6_require_hmac	= 0,
 #endif
+	.enhanced_dad           = 1,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -292,6 +293,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	.seg6_require_hmac	= 0,
 #endif
+	.enhanced_dad           = 1,
 };
 
 /* Check if a valid qdisc is available */
@@ -3735,12 +3737,21 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
 {
 	unsigned long rand_num;
 	struct inet6_dev *idev = ifp->idev;
+	u64 nonce;
 
 	if (ifp->flags & IFA_F_OPTIMISTIC)
 		rand_num = 0;
 	else
 		rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
 
+	nonce = 0;
+	if (idev->cnf.enhanced_dad ||
+	    dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) {
+		do
+			get_random_bytes(&nonce, 6);
+		while (nonce == 0);
+	}
+	ifp->dad_nonce = nonce;
 	ifp->dad_probes = idev->cnf.dad_transmits;
 	addrconf_mod_dad_work(ifp, rand_num);
 }
@@ -3918,7 +3929,8 @@ static void addrconf_dad_work(struct work_struct *w)
 
 	/* send a neighbour solicitation for our addr */
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-	ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any);
+	ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any,
+		      ifp->dad_nonce);
 out:
 	in6_ifa_put(ifp);
 	rtnl_unlock();
@@ -4962,6 +4974,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
 #endif
+	array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -6069,6 +6082,13 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
+	{
+		.procname       = "enhanced_dad",
+		.data           = &ipv6_devconf.enhanced_dad,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec,
+	},
 	{
 		/* sentinel */
 	}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d8e671457d10..7ebac630d3c6 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -233,6 +233,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
 		case ND_OPT_SOURCE_LL_ADDR:
 		case ND_OPT_TARGET_LL_ADDR:
 		case ND_OPT_MTU:
+		case ND_OPT_NONCE:
 		case ND_OPT_REDIRECT_HDR:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
 				ND_PRINTK(2, warn,
@@ -568,7 +569,8 @@ static void ndisc_send_unsol_na(struct net_device *dev)
 }
 
 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr)
+		   const struct in6_addr *daddr, const struct in6_addr *saddr,
+		   u64 nonce)
 {
 	struct sk_buff *skb;
 	struct in6_addr addr_buf;
@@ -588,6 +590,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
 	if (inc_opt)
 		optlen += ndisc_opt_addr_space(dev,
 					       NDISC_NEIGHBOUR_SOLICITATION);
+	if (nonce != 0)
+		optlen += 8;
 
 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
 	if (!skb)
@@ -605,6 +609,13 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
 				       dev->dev_addr,
 				       NDISC_NEIGHBOUR_SOLICITATION);
+	if (nonce != 0) {
+		u8 *opt = skb_put(skb, 8);
+
+		opt[0] = ND_OPT_NONCE;
+		opt[1] = 8 >> 3;
+		memcpy(opt + 2, &nonce, 6);
+	}
 
 	ndisc_send_skb(skb, daddr, saddr);
 }
@@ -693,12 +704,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 				  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
 				  __func__, target);
 		}
-		ndisc_send_ns(dev, target, target, saddr);
+		ndisc_send_ns(dev, target, target, saddr, 0);
 	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
 		neigh_app_ns(neigh);
 	} else {
 		addrconf_addr_solict_mult(target, &mcaddr);
-		ndisc_send_ns(dev, target, &mcaddr, saddr);
+		ndisc_send_ns(dev, target, &mcaddr, saddr, 0);
 	}
 }
 
@@ -742,6 +753,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	int dad = ipv6_addr_any(saddr);
 	bool inc;
 	int is_router = -1;
+	u64 nonce = 0;
 
 	if (skb->len < sizeof(struct nd_msg)) {
 		ND_PRINTK(2, warn, "NS: packet too short\n");
@@ -786,6 +798,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			return;
 		}
 	}
+	if (ndopts.nd_opts_nonce)
+		memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
 
 	inc = ipv6_addr_is_multicast(daddr);
 
@@ -794,6 +808,15 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 have_ifp:
 		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
 			if (dad) {
+				if (nonce != 0 && ifp->dad_nonce == nonce) {
+					u8 *np = (u8 *)&nonce;
+					/* Matching nonce if looped back */
+					ND_PRINTK(2, notice,
+						  "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
+						  ifp->idev->dev->name,
+						  &ifp->addr, np);
+					goto out;
+				}
 				/*
 				 * We are colliding with another node
 				 * who is doing DAD
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b317bb135ed4..aac7818e2e0f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -527,7 +527,7 @@ static void rt6_probe_deferred(struct work_struct *w)
 		container_of(w, struct __rt6_probe_work, work);
 
 	addrconf_addr_solict_mult(&work->target, &mcaddr);
-	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
+	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
 	dev_put(work->dev);
 	kfree(work);
 }
-- 
cgit v1.2.3


From c51d39010a1bccc9c1294e2d7c00005aefeb2b5c Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Tue, 15 Nov 2016 15:08:25 +0100
Subject: netfilter: conntrack: built-in support for DCCP

CONFIG_NF_CT_PROTO_DCCP is no more a tristate. When set to y, connection
tracking support for DCCP protocol is built-in into nf_conntrack.ko.

footprint test:
$ ls -l net/netfilter/nf_conntrack{_proto_dccp,}.ko \
        net/ipv4/netfilter/nf_conntrack_ipv4.ko \
        net/ipv6/netfilter/nf_conntrack_ipv6.ko

(builtin)||  dccp  |  ipv4  |  ipv6  | nf_conntrack
---------++--------+--------+--------+--------------
none     || 469140 | 828755 | 828676 | 6141434
DCCP     ||   -    | 830566 | 829935 | 6533526

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_dccp.h    |  2 +-
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h |  3 +
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h |  3 +
 include/net/netns/conntrack.h                  | 14 +++++
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  3 +
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  3 +
 net/netfilter/Kconfig                          |  6 +-
 net/netfilter/Makefile                         |  3 +-
 net/netfilter/nf_conntrack_proto_dccp.c        | 79 ++++----------------------
 9 files changed, 41 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h
index 40dcc82058d1..ff721d7325cf 100644
--- a/include/linux/netfilter/nf_conntrack_dccp.h
+++ b/include/linux/netfilter/nf_conntrack_dccp.h
@@ -25,7 +25,7 @@ enum ct_dccp_roles {
 #define CT_DCCP_ROLE_MAX	(__CT_DCCP_ROLE_MAX - 1)
 
 #ifdef __KERNEL__
-#include <net/netfilter/nf_conntrack_tuple.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
 
 struct nf_ct_dccp {
 	u_int8_t	role[IP_CT_DIR_MAX];
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 981c327374da..c2f155fd9299 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -15,6 +15,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
+#endif
 
 int nf_conntrack_ipv4_compat_init(void);
 void nf_conntrack_ipv4_compat_fini(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index a4c993685795..5ec66c0d21c4 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -6,6 +6,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
+#endif
 
 #include <linux/sysctl.h>
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 3d06d94d2e52..440b781baf0b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -6,6 +6,9 @@
 #include <linux/atomic.h>
 #include <linux/workqueue.h>
 #include <linux/netfilter/nf_conntrack_tcp.h>
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+#include <linux/netfilter/nf_conntrack_dccp.h>
+#endif
 #include <linux/seqlock.h>
 
 struct ctl_table_header;
@@ -48,12 +51,23 @@ struct nf_icmp_net {
 	unsigned int timeout;
 };
 
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+struct nf_dccp_net {
+	struct nf_proto_net pn;
+	int dccp_loose;
+	unsigned int dccp_timeout[CT_DCCP_MAX + 1];
+};
+#endif
+
 struct nf_ip_net {
 	struct nf_generic_net   generic;
 	struct nf_tcp_net	tcp;
 	struct nf_udp_net	udp;
 	struct nf_icmp_net	icmp;
 	struct nf_icmp_net	icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	struct nf_dccp_net	dccp;
+#endif
 };
 
 struct ct_pcpu {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7130ed5dc1fa..cb3cf770b00c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
 	&nf_conntrack_l4proto_tcp4,
 	&nf_conntrack_l4proto_udp4,
 	&nf_conntrack_l4proto_icmp,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	&nf_conntrack_l4proto_dccp4,
+#endif
 };
 
 static int ipv4_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 500be28ff563..f52338d02951 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
 	&nf_conntrack_l4proto_tcp6,
 	&nf_conntrack_l4proto_udp6,
 	&nf_conntrack_l4proto_icmpv6,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	&nf_conntrack_l4proto_dccp6,
+#endif
 };
 
 static int ipv6_net_init(struct net *net)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 496e1dcbd003..27a3d8c8f8ce 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -146,14 +146,14 @@ config NF_CONNTRACK_LABELS
 	  to connection tracking entries.  It selected by the connlabel match.
 
 config NF_CT_PROTO_DCCP
-	tristate 'DCCP protocol connection tracking support'
+	bool 'DCCP protocol connection tracking support'
 	depends on NETFILTER_ADVANCED
-	default IP_DCCP
+	default y
 	help
 	  With this option enabled, the layer 3 independent connection
 	  tracking code will be able to do state tracking on DCCP connections.
 
-	  If unsure, say 'N'.
+	  If unsure, say Y.
 
 config NF_CT_PROTO_GRE
 	tristate
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3b97d89df2cd..bbd0cc08eff0 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -5,6 +5,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
@@ -16,8 +17,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
 
-# SCTP protocol connection tracking
-obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
 obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
 obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
 obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 073b047314dc..b68ce6ac13b3 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -9,7 +9,6 @@
  *
  */
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/spinlock.h>
@@ -384,17 +383,9 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
 	},
 };
 
-/* this module per-net specifics */
-static unsigned int dccp_net_id __read_mostly;
-struct dccp_net {
-	struct nf_proto_net pn;
-	int dccp_loose;
-	unsigned int dccp_timeout[CT_DCCP_MAX + 1];
-};
-
-static inline struct dccp_net *dccp_pernet(struct net *net)
+static inline struct nf_dccp_net *dccp_pernet(struct net *net)
 {
-	return net_generic(net, dccp_net_id);
+	return &net->ct.nf_ct_proto.dccp;
 }
 
 static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -424,7 +415,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		     unsigned int dataoff, unsigned int *timeouts)
 {
 	struct net *net = nf_ct_net(ct);
-	struct dccp_net *dn;
+	struct nf_dccp_net *dn;
 	struct dccp_hdr _dh, *dh;
 	const char *msg;
 	u_int8_t state;
@@ -719,7 +710,7 @@ static int dccp_nlattr_size(void)
 static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				      struct net *net, void *data)
 {
-	struct dccp_net *dn = dccp_pernet(net);
+	struct nf_dccp_net *dn = dccp_pernet(net);
 	unsigned int *timeouts = data;
 	int i;
 
@@ -820,7 +811,7 @@ static struct ctl_table dccp_sysctl_table[] = {
 #endif /* CONFIG_SYSCTL */
 
 static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
-				     struct dccp_net *dn)
+				     struct nf_dccp_net *dn)
 {
 #ifdef CONFIG_SYSCTL
 	if (pn->ctl_table)
@@ -850,7 +841,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
 
 static int dccp_init_net(struct net *net, u_int16_t proto)
 {
-	struct dccp_net *dn = dccp_pernet(net);
+	struct nf_dccp_net *dn = dccp_pernet(net);
 	struct nf_proto_net *pn = &dn->pn;
 
 	if (!pn->users) {
@@ -868,7 +859,7 @@ static int dccp_init_net(struct net *net, u_int16_t proto)
 	return dccp_kmemdup_sysctl_table(net, pn, dn);
 }
 
-static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
 	.l3proto		= AF_INET,
 	.l4proto		= IPPROTO_DCCP,
 	.name			= "dccp",
@@ -898,11 +889,11 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
 		.nla_policy	= dccp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-	.net_id			= &dccp_net_id,
 	.init_net		= dccp_init_net,
 };
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
 
-static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
 	.l3proto		= AF_INET6,
 	.l4proto		= IPPROTO_DCCP,
 	.name			= "dccp",
@@ -932,56 +923,6 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
 		.nla_policy	= dccp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-	.net_id			= &dccp_net_id,
 	.init_net		= dccp_init_net,
 };
-
-static struct nf_conntrack_l4proto *dccp_proto[] = {
-	&dccp_proto4,
-	&dccp_proto6,
-};
-
-static __net_init int dccp_net_init(struct net *net)
-{
-	return nf_ct_l4proto_pernet_register(net, dccp_proto,
-					     ARRAY_SIZE(dccp_proto));
-}
-
-static __net_exit void dccp_net_exit(struct net *net)
-{
-	nf_ct_l4proto_pernet_unregister(net, dccp_proto,
-					ARRAY_SIZE(dccp_proto));
-}
-
-static struct pernet_operations dccp_net_ops = {
-	.init = dccp_net_init,
-	.exit = dccp_net_exit,
-	.id   = &dccp_net_id,
-	.size = sizeof(struct dccp_net),
-};
-
-static int __init nf_conntrack_proto_dccp_init(void)
-{
-	int ret;
-
-	ret = register_pernet_subsys(&dccp_net_ops);
-	if (ret < 0)
-		return ret;
-	ret = nf_ct_l4proto_register(dccp_proto, ARRAY_SIZE(dccp_proto));
-	if (ret < 0)
-		unregister_pernet_subsys(&dccp_net_ops);
-	return ret;
-}
-
-static void __exit nf_conntrack_proto_dccp_fini(void)
-{
-	nf_ct_l4proto_unregister(dccp_proto, ARRAY_SIZE(dccp_proto));
-	unregister_pernet_subsys(&dccp_net_ops);
-}
-
-module_init(nf_conntrack_proto_dccp_init);
-module_exit(nf_conntrack_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("DCCP connection tracking protocol helper");
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6);
-- 
cgit v1.2.3


From 1cd9cb05f96e526f41bb4704caa95dc40ed08c5d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 4 Dec 2016 18:34:34 -0500
Subject: NFS: Only look at the change attribute cache state in
 nfs_check_verifier

When looking at whether or not our dcache is valid, we really don't care
about the general state of the directory attribute cache. Instead, we
we only care about the state of the change attribute.

This fixes a performance issue when the client is responsible for
changing the directory contents; a number of NFSv4 operations will
atomically update the directory change attribute, but may not return
all the other attributes.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c           | 14 ++++++--------
 fs/nfs/inode.c         |  2 +-
 include/linux/nfs_fs.h |  1 +
 3 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f5702457c052..7483722162fa 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1030,8 +1030,6 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
 static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
 			      int rcu_walk)
 {
-	int ret;
-
 	if (IS_ROOT(dentry))
 		return 1;
 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -1039,12 +1037,12 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
 	if (!nfs_verify_change_attribute(dir, dentry->d_time))
 		return 0;
 	/* Revalidate nfsi->cache_change_attribute before we declare a match */
-	if (rcu_walk)
-		ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
-	else
-		ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
-	if (ret < 0)
-		return 0;
+	if (nfs_mapping_need_revalidate_inode(dir)) {
+		if (rcu_walk)
+			return 0;
+		if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+			return 0;
+	}
 	if (!nfs_verify_change_attribute(dir, dentry->d_time))
 		return 0;
 	return 1;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index df4d7ec348ed..7de345fd8e1e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1112,7 +1112,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
 	return 0;
 }
 
-static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
+bool nfs_mapping_need_revalidate_inode(struct inode *inode)
 {
 	unsigned long cache_validity = NFS_I(inode)->cache_validity;
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index db1002abc95e..cb631973839a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -345,6 +345,7 @@ extern int nfs_attribute_cache_expired(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
+extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_revalidate_mapping_rcu(struct inode *inode);
 extern int nfs_setattr(struct dentry *, struct iattr *);
-- 
cgit v1.2.3


From e0097cf5f2f1b7b8a594beaa32a604776d3ca6ce Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 29 Nov 2016 12:09:10 +0200
Subject: mmc: queue: Fix queue thread wake-up

The only time the driver sleeps expecting to be woken upon the arrival of
a new request, is when the dispatch queue is empty. The only time that it
is known whether the dispatch queue is empty is after NULL is returned
from blk_fetch_request() while under the queue lock.

Recognizing those facts, simplify the synchronization between the queue
thread and the request function. A couple of flags tell the request
function what to do, and the queue lock and barriers associated with
wake-ups ensure synchronization.

The result is simpler and allows the removal of the context_info lock.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Harjani Ritesh <riteshh@codeaurora.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c |  7 -------
 drivers/mmc/card/queue.c | 35 +++++++++++++++++++++--------------
 drivers/mmc/card/queue.h |  1 +
 drivers/mmc/core/core.c  |  6 ------
 include/linux/mmc/host.h |  2 --
 5 files changed, 22 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 3e73be0e7281..646d1a1fa6ca 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1758,8 +1758,6 @@ int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	int ret;
 	struct mmc_blk_data *md = mq->blkdata;
 	struct mmc_card *card = md->queue.card;
-	struct mmc_host *host = card->host;
-	unsigned long flags;
 	bool req_is_special = mmc_req_is_special(req);
 
 	if (req && !mq->mqrq_prev->req)
@@ -1792,11 +1790,6 @@ int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 			mmc_blk_issue_rw_rq(mq, NULL);
 		ret = mmc_blk_issue_flush(mq, req);
 	} else {
-		if (!req && host->areq) {
-			spin_lock_irqsave(&host->context_info.lock, flags);
-			host->context_info.is_waiting_last_req = true;
-			spin_unlock_irqrestore(&host->context_info.lock, flags);
-		}
 		ret = mmc_blk_issue_rw_rq(mq, req);
 	}
 
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 7dacf2744fbd..010888d8d97c 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -53,6 +53,7 @@ static int mmc_queue_thread(void *d)
 {
 	struct mmc_queue *mq = d;
 	struct request_queue *q = mq->queue;
+	struct mmc_context_info *cntx = &mq->card->host->context_info;
 
 	current->flags |= PF_MEMALLOC;
 
@@ -63,6 +64,19 @@ static int mmc_queue_thread(void *d)
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
 		req = blk_fetch_request(q);
+		mq->asleep = false;
+		cntx->is_waiting_last_req = false;
+		cntx->is_new_req = false;
+		if (!req) {
+			/*
+			 * Dispatch queue is empty so set flags for
+			 * mmc_request_fn() to wake us up.
+			 */
+			if (mq->mqrq_prev->req)
+				cntx->is_waiting_last_req = true;
+			else
+				mq->asleep = true;
+		}
 		mq->mqrq_cur->req = req;
 		spin_unlock_irq(q->queue_lock);
 
@@ -115,7 +129,6 @@ static void mmc_request_fn(struct request_queue *q)
 {
 	struct mmc_queue *mq = q->queuedata;
 	struct request *req;
-	unsigned long flags;
 	struct mmc_context_info *cntx;
 
 	if (!mq) {
@@ -127,19 +140,13 @@ static void mmc_request_fn(struct request_queue *q)
 	}
 
 	cntx = &mq->card->host->context_info;
-	if (!mq->mqrq_cur->req && mq->mqrq_prev->req) {
-		/*
-		 * New MMC request arrived when MMC thread may be
-		 * blocked on the previous request to be complete
-		 * with no current request fetched
-		 */
-		spin_lock_irqsave(&cntx->lock, flags);
-		if (cntx->is_waiting_last_req) {
-			cntx->is_new_req = true;
-			wake_up_interruptible(&cntx->wait);
-		}
-		spin_unlock_irqrestore(&cntx->lock, flags);
-	} else if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
+
+	if (cntx->is_waiting_last_req) {
+		cntx->is_new_req = true;
+		wake_up_interruptible(&cntx->wait);
+	}
+
+	if (mq->asleep)
 		wake_up_process(mq->thread);
 }
 
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index 47f5532b5776..d09fce655800 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -39,6 +39,7 @@ struct mmc_queue {
 	unsigned int		flags;
 #define MMC_QUEUE_SUSPENDED	(1 << 0)
 #define MMC_QUEUE_NEW_REQUEST	(1 << 1)
+	bool			asleep;
 	struct mmc_blk_data	*blkdata;
 	struct request_queue	*queue;
 	struct mmc_queue_req	mqrq[2];
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index f39397f7c8dc..dc1f27ee50b8 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -504,18 +504,14 @@ static enum mmc_blk_status mmc_wait_for_data_req_done(struct mmc_host *host,
 	struct mmc_command *cmd;
 	struct mmc_context_info *context_info = &host->context_info;
 	enum mmc_blk_status status;
-	unsigned long flags;
 
 	while (1) {
 		wait_event_interruptible(context_info->wait,
 				(context_info->is_done_rcv ||
 				 context_info->is_new_req));
-		spin_lock_irqsave(&context_info->lock, flags);
 		context_info->is_waiting_last_req = false;
-		spin_unlock_irqrestore(&context_info->lock, flags);
 		if (context_info->is_done_rcv) {
 			context_info->is_done_rcv = false;
-			context_info->is_new_req = false;
 			cmd = mrq->cmd;
 
 			if (!cmd->error || !cmd->retries ||
@@ -534,7 +530,6 @@ static enum mmc_blk_status mmc_wait_for_data_req_done(struct mmc_host *host,
 				continue; /* wait for done/new event again */
 			}
 		} else if (context_info->is_new_req) {
-			context_info->is_new_req = false;
 			if (!next_req)
 				return MMC_BLK_NEW_REQUEST;
 		}
@@ -3016,7 +3011,6 @@ void mmc_unregister_pm_notifier(struct mmc_host *host)
  */
 void mmc_init_context_info(struct mmc_host *host)
 {
-	spin_lock_init(&host->context_info.lock);
 	host->context_info.is_new_req = false;
 	host->context_info.is_done_rcv = false;
 	host->context_info.is_waiting_last_req = false;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 2ce32fefb41c..8bc884121465 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -197,14 +197,12 @@ struct mmc_slot {
  * @is_new_req		wake up reason was new request
  * @is_waiting_last_req	mmc context waiting for single running request
  * @wait		wait queue
- * @lock		lock to protect data fields
  */
 struct mmc_context_info {
 	bool			is_done_rcv;
 	bool			is_new_req;
 	bool			is_waiting_last_req;
 	wait_queue_head_t	wait;
-	spinlock_t		lock;
 };
 
 struct regulator;
-- 
cgit v1.2.3


From 925ff3a7a334b3fe968ae15f07d22df21addad26 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 29 Nov 2016 12:09:16 +0200
Subject: mmc: mmc: Add Command Queue definitions

Add definitions relating to Command Queuing.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c   | 18 ++++++++++++++++++
 include/linux/mmc/card.h |  2 ++
 include/linux/mmc/mmc.h  | 17 +++++++++++++++++
 3 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 3268fcd3378d..16cf6ea6e1ba 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -618,6 +618,24 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
 			(ext_csd[EXT_CSD_SUPPORTED_MODE] & 0x1) &&
 			!(ext_csd[EXT_CSD_FW_CONFIG] & 0x1);
 	}
+
+	/* eMMC v5.1 or later */
+	if (card->ext_csd.rev >= 8) {
+		card->ext_csd.cmdq_support = ext_csd[EXT_CSD_CMDQ_SUPPORT] &
+					     EXT_CSD_CMDQ_SUPPORTED;
+		card->ext_csd.cmdq_depth = (ext_csd[EXT_CSD_CMDQ_DEPTH] &
+					    EXT_CSD_CMDQ_DEPTH_MASK) + 1;
+		/* Exclude inefficiently small queue depths */
+		if (card->ext_csd.cmdq_depth <= 2) {
+			card->ext_csd.cmdq_support = false;
+			card->ext_csd.cmdq_depth = 0;
+		}
+		if (card->ext_csd.cmdq_support) {
+			pr_debug("%s: Command Queue supported depth %u\n",
+				 mmc_hostname(card->host),
+				 card->ext_csd.cmdq_depth);
+		}
+	}
 out:
 	return err;
 }
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index e49a3ff9d0e0..95d69d498296 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -89,6 +89,8 @@ struct mmc_ext_csd {
 	unsigned int		boot_ro_lock;		/* ro lock support */
 	bool			boot_ro_lockable;
 	bool			ffu_capable;	/* Firmware upgrade support */
+	bool			cmdq_support;	/* Command Queue supported */
+	unsigned int		cmdq_depth;	/* Command Queue depth */
 #define MMC_FIRMWARE_LEN 8
 	u8			fwrev[MMC_FIRMWARE_LEN];  /* FW version */
 	u8			raw_exception_status;	/* 54 */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index c376209c70ef..672730acc705 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -84,6 +84,13 @@
 #define MMC_APP_CMD              55   /* ac   [31:16] RCA        R1  */
 #define MMC_GEN_CMD              56   /* adtc [0] RD/WR          R1  */
 
+  /* class 11 */
+#define MMC_QUE_TASK_PARAMS      44   /* ac   [20:16] task id    R1  */
+#define MMC_QUE_TASK_ADDR        45   /* ac   [31:0] data addr   R1  */
+#define MMC_EXECUTE_READ_TASK    46   /* adtc [20:16] task id    R1  */
+#define MMC_EXECUTE_WRITE_TASK   47   /* adtc [20:16] task id    R1  */
+#define MMC_CMDQ_TASK_MGMT       48   /* ac   [20:16] task id    R1b */
+
 static inline bool mmc_op_multi(u32 opcode)
 {
 	return opcode == MMC_WRITE_MULTIPLE_BLOCK ||
@@ -272,6 +279,7 @@ struct _mmc_csd {
  * EXT_CSD fields
  */
 
+#define EXT_CSD_CMDQ_MODE_EN		15	/* R/W */
 #define EXT_CSD_FLUSH_CACHE		32      /* W */
 #define EXT_CSD_CACHE_CTRL		33      /* R/W */
 #define EXT_CSD_POWER_OFF_NOTIFICATION	34	/* R/W */
@@ -331,6 +339,8 @@ struct _mmc_csd {
 #define EXT_CSD_CACHE_SIZE		249	/* RO, 4 bytes */
 #define EXT_CSD_PWR_CL_DDR_200_360	253	/* RO */
 #define EXT_CSD_FIRMWARE_VERSION	254	/* RO, 8 bytes */
+#define EXT_CSD_CMDQ_DEPTH		307	/* RO */
+#define EXT_CSD_CMDQ_SUPPORT		308	/* RO */
 #define EXT_CSD_SUPPORTED_MODE		493	/* RO */
 #define EXT_CSD_TAG_UNIT_SIZE		498	/* RO */
 #define EXT_CSD_DATA_TAG_SUPPORT	499	/* RO */
@@ -437,6 +447,13 @@ struct _mmc_csd {
  */
 #define EXT_CSD_MANUAL_BKOPS_MASK	0x01
 
+/*
+ * Command Queue
+ */
+#define EXT_CSD_CMDQ_MODE_ENABLED	BIT(0)
+#define EXT_CSD_CMDQ_DEPTH_MASK		GENMASK(4, 0)
+#define EXT_CSD_CMDQ_SUPPORTED		BIT(0)
+
 /*
  * MMC_SWITCH access modes
  */
-- 
cgit v1.2.3


From 30103b5b6432a51c3822a26dc340e35d91237f39 Mon Sep 17 00:00:00 2001
From: David Lechner <david@lechnology.com>
Date: Sun, 4 Dec 2016 16:52:31 -0600
Subject: regulator: Fix regulator_get_error_flags() signature mismatch

The function signature of does not match regulator_get_error_flags()
when CONFIG_REGULATOR is not defined vs. when it is not defined.
This makes both declarations match to prevent compiler errors.

Signed-off-by: David Lechner <david@lechnology.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 528eb1f5273e..ea0fffa5faeb 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -498,7 +498,8 @@ static inline unsigned int regulator_get_mode(struct regulator *regulator)
 	return REGULATOR_MODE_NORMAL;
 }
 
-static inline int regulator_get_error_flags(struct regulator *regulator)
+static inline int regulator_get_error_flags(struct regulator *regulator,
+					    unsigned int *flags)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From e711f0309109701cb422aab44ace4ea0dccb89ea Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 2 Dec 2016 15:14:23 +0200
Subject: mmc: mmc: Introduce mmc_abort_tuning()

If a tuning command times out, the card could still be processing it, which
will cause problems for recovery. The eMMC specification says that CMD12
can be used to stop CMD21, so add a function that does that.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc_ops.c | 25 +++++++++++++++++++++++++
 include/linux/mmc/core.h   |  1 +
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 81ce63bb0773..b11c3455b040 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -678,6 +678,31 @@ out:
 }
 EXPORT_SYMBOL_GPL(mmc_send_tuning);
 
+int mmc_abort_tuning(struct mmc_host *host, u32 opcode)
+{
+	struct mmc_command cmd = {0};
+
+	/*
+	 * eMMC specification specifies that CMD12 can be used to stop a tuning
+	 * command, but SD specification does not, so do nothing unless it is
+	 * eMMC.
+	 */
+	if (opcode != MMC_SEND_TUNING_BLOCK_HS200)
+		return 0;
+
+	cmd.opcode = MMC_STOP_TRANSMISSION;
+	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
+
+	/*
+	 * For drivers that override R1 to R1b, set an arbitrary timeout based
+	 * on the tuning timeout i.e. 150ms.
+	 */
+	cmd.busy_timeout = 150;
+
+	return mmc_wait_for_cmd(host, &cmd, 0);
+}
+EXPORT_SYMBOL_GPL(mmc_abort_tuning);
+
 static int
 mmc_send_bus_test(struct mmc_card *card, struct mmc_host *host, u8 opcode,
 		  u8 len)
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 0ce928b3ce90..e33cc748dcfe 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -176,6 +176,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
 extern int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
+extern int mmc_abort_tuning(struct mmc_host *host, u32 opcode);
 extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 
 #define MMC_ERASE_ARG		0x00000000
-- 
cgit v1.2.3


From 64df1148876e35e81e91195e01c8197edc66fcc5 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sun, 4 Dec 2016 13:10:04 +0100
Subject: driver core: Silence device links sphinx warning

Silence this warning emitted by sphinx:
include/linux/device.h:938: warning: No description found for parameter 'links'

While at it, fix typos in comments of device links code.

Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Silvio Fricke <silvio.fricke@gmail.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 4 ++--
 include/linux/device.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index b8b2f6105476..020ea7f05520 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -172,7 +172,7 @@ static int device_reorder_to_tail(struct device *dev, void *not_used)
  *
  * The supplier device is required to be registered when this function is called
  * and NULL will be returned if that is not the case.  The consumer device need
- * not be registerd, however.
+ * not be registered, however.
  */
 struct device_link *device_link_add(struct device *consumer,
 				    struct device *supplier, u32 flags)
@@ -225,7 +225,7 @@ struct device_link *device_link_add(struct device *consumer,
 	INIT_LIST_HEAD(&link->c_node);
 	link->flags = flags;
 
-	/* Deterine the initial link state. */
+	/* Determine the initial link state. */
 	if (flags & DL_FLAG_STATELESS) {
 		link->status = DL_STATE_NONE;
 	} else {
diff --git a/include/linux/device.h b/include/linux/device.h
index 4cd8e52033b0..67bbbee8fe02 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -817,6 +817,7 @@ struct dev_links_info {
  * 		on.  This shrinks the "Board Support Packages" (BSPs) and
  * 		minimizes board-specific #ifdefs in drivers.
  * @driver_data: Private pointer for driver specific info.
+ * @links:	Links to suppliers and consumers of this device.
  * @power:	For device power management.
  * 		See Documentation/power/devices.txt for details.
  * @pm_domain:	Provide callbacks that are executed during system suspend,
-- 
cgit v1.2.3


From 9d4b82706357f2eb23f45309227fc94d11eea255 Mon Sep 17 00:00:00 2001
From: Changming Huang <jerry.huang@nxp.com>
Date: Tue, 29 Nov 2016 13:45:38 +0800
Subject: fsl/usb: Workarourd for USB erratum-A005697

The EHCI specification states the following in the SUSP bit description:
In the Suspend state, the port is sensitive to resume detection.
Note that the bit status does not change until the port is suspended and
that there may be a delay in suspending a port if there is a transaction
currently in progress on the USB.

However, in NXP USBDR controller, the PORTSCx[SUSP] bit changes immediately
when the application sets it and not when the port is actually suspended.

So the application must wait for at least 10 milliseconds after a port
indicates that it is suspended, to make sure this port has entered
suspended state before initiating this port resume using the Force Port
Resume bit. This bit is for NXP controller, not EHCI compatible.

Signed-off-by: Changming Huang <jerry.huang@nxp.com>
Signed-off-by: Ramneek Mehresh <ramneek.mehresh@nxp.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-fsl.c      |  3 +++
 drivers/usb/host/ehci-hub.c      | 14 ++++++++++++++
 drivers/usb/host/ehci.h          |  8 ++++++++
 drivers/usb/host/fsl-mph-dr-of.c |  2 ++
 include/linux/fsl_devices.h      |  1 +
 5 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 9f5ffb629973..91701cc68082 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -286,6 +286,9 @@ static int ehci_fsl_usb_setup(struct ehci_hcd *ehci)
 	if (pdata->has_fsl_erratum_a005275 == 1)
 		ehci->has_fsl_hs_errata = 1;
 
+	if (pdata->has_fsl_erratum_a005697 == 1)
+		ehci->has_fsl_susp_errata = 1;
+
 	if ((pdata->operating_mode == FSL_USB2_DR_HOST) ||
 			(pdata->operating_mode == FSL_USB2_DR_OTG))
 		if (ehci_fsl_setup_phy(hcd, pdata->phy_mode, 0))
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 74f62d68f013..df169c8e7225 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -310,6 +310,14 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 	}
 	spin_unlock_irq(&ehci->lock);
 
+	if (changed && ehci_has_fsl_susp_errata(ehci))
+		/*
+		 * Wait for at least 10 millisecondes to ensure the controller
+		 * enter the suspend status before initiating a port resume
+		 * using the Force Port Resume bit (Not-EHCI compatible).
+		 */
+		usleep_range(10000, 20000);
+
 	if ((changed && ehci->has_tdi_phy_lpm) || fs_idle_delay) {
 		/*
 		 * Wait for HCD to enter low-power mode or for the bus
@@ -1200,6 +1208,12 @@ int ehci_hub_control(
 					wIndex, (temp1 & HOSTPC_PHCD) ?
 					"succeeded" : "failed");
 			}
+			if (ehci_has_fsl_susp_errata(ehci)) {
+				/* 10ms for HCD enter suspend */
+				spin_unlock_irqrestore(&ehci->lock, flags);
+				usleep_range(10000, 20000);
+				spin_lock_irqsave(&ehci->lock, flags);
+			}
 			set_bit(wIndex, &ehci->suspended_ports);
 			break;
 		case USB_PORT_FEAT_POWER:
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 3f3b74aeca97..a8e36170d8b8 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -219,6 +219,7 @@ struct ehci_hcd {			/* one per controller */
 	unsigned		no_selective_suspend:1;
 	unsigned		has_fsl_port_bug:1; /* FreeScale */
 	unsigned		has_fsl_hs_errata:1;	/* Freescale HS quirk */
+	unsigned		has_fsl_susp_errata:1;	/* NXP SUSP quirk */
 	unsigned		big_endian_mmio:1;
 	unsigned		big_endian_desc:1;
 	unsigned		big_endian_capbase:1;
@@ -709,6 +710,13 @@ ehci_port_speed(struct ehci_hcd *ehci, unsigned int portsc)
 #define ehci_has_fsl_hs_errata(e)	(0)
 #endif
 
+/*
+ * Some Freescale/NXP processors have an erratum (USB A-005697)
+ * in which we need to wait for 10ms for bus to enter suspend mode
+ * after setting SUSP bit.
+ */
+#define ehci_has_fsl_susp_errata(e)	((e)->has_fsl_susp_errata)
+
 /*
  * While most USB host controllers implement their registers in
  * little-endian format, a minority (celleb companion chip) implement
diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c
index f07ccb25bc24..e90ddb530765 100644
--- a/drivers/usb/host/fsl-mph-dr-of.c
+++ b/drivers/usb/host/fsl-mph-dr-of.c
@@ -226,6 +226,8 @@ static int fsl_usb2_mph_dr_of_probe(struct platform_device *ofdev)
 		of_property_read_bool(np, "fsl,usb-erratum-a007792");
 	pdata->has_fsl_erratum_a005275 =
 		of_property_read_bool(np, "fsl,usb-erratum-a005275");
+	pdata->has_fsl_erratum_a005697 =
+		of_property_read_bool(np, "fsl,usb_erratum-a005697");
 
 	/*
 	 * Determine whether phy_clk_valid needs to be checked
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index f2912914141a..60cef8227534 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -100,6 +100,7 @@ struct fsl_usb2_platform_data {
 	unsigned	already_suspended:1;
 	unsigned        has_fsl_erratum_a007792:1;
 	unsigned        has_fsl_erratum_a005275:1;
+	unsigned	has_fsl_erratum_a005697:1;
 	unsigned        check_phy_clk_valid:1;
 
 	/* register save area for suspend/resume */
-- 
cgit v1.2.3


From b9c2a2a39898d55b9fe13aa1fe15891e37bc9087 Mon Sep 17 00:00:00 2001
From: Tal Shorer <tal.shorer@gmail.com>
Date: Fri, 18 Nov 2016 14:17:26 +0200
Subject: usb: hcd.h: construct hub class request constants from simpler
 constants

Currently, each hub class request constant is defined by a line like:
#define ClearHubFeature		(0x2000 | USB_REQ_CLEAR_FEATURE)

The "magic" number for the high byte is one of 0x20, 0xa0, 0x23, 0xa3.
The 0x80 bit that changes inditace USB_DIR_IN, and the 0x03 that
pops up is the difference between USB_RECIP_DEVICE (0x00) and
USB_RECIP_OTHER (0x03). The constant 0x20 bit is USB_TYPE_CLASS.

This patch eliminates those magic numbers by defining a macro to help
construct these hub class request from simpler constants.
Note that USB_RT_HUB is defined as (USB_TYPE_CLASS | USB_RECIP_DEVICE)
and that USB_RT_PORT is defined as (USB_TYPE_CLASS | USB_RECIP_OTHER).

Signed-off-by: Tal Shorer <tal.shorer@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/hcd.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 66fc13705ab7..40edf6a8533e 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -566,21 +566,22 @@ extern void usb_ep0_reinit(struct usb_device *);
 	((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
 
 /* class requests from the USB 2.0 hub spec, table 11-15 */
+#define HUB_CLASS_REQ(dir, type, request) ((((dir) | (type)) << 8) | (request))
 /* GetBusState and SetHubDescriptor are optional, omitted */
-#define ClearHubFeature		(0x2000 | USB_REQ_CLEAR_FEATURE)
-#define ClearPortFeature	(0x2300 | USB_REQ_CLEAR_FEATURE)
-#define GetHubDescriptor	(0xa000 | USB_REQ_GET_DESCRIPTOR)
-#define GetHubStatus		(0xa000 | USB_REQ_GET_STATUS)
-#define GetPortStatus		(0xa300 | USB_REQ_GET_STATUS)
-#define SetHubFeature		(0x2000 | USB_REQ_SET_FEATURE)
-#define SetPortFeature		(0x2300 | USB_REQ_SET_FEATURE)
+#define ClearHubFeature		HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, USB_REQ_CLEAR_FEATURE)
+#define ClearPortFeature	HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, USB_REQ_CLEAR_FEATURE)
+#define GetHubDescriptor	HUB_CLASS_REQ(USB_DIR_IN, USB_RT_HUB, USB_REQ_GET_DESCRIPTOR)
+#define GetHubStatus		HUB_CLASS_REQ(USB_DIR_IN, USB_RT_HUB, USB_REQ_GET_STATUS)
+#define GetPortStatus		HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, USB_REQ_GET_STATUS)
+#define SetHubFeature		HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, USB_REQ_SET_FEATURE)
+#define SetPortFeature		HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, USB_REQ_SET_FEATURE)
 
 
 /*-------------------------------------------------------------------------*/
 
 /* class requests from USB 3.1 hub spec, table 10-7 */
-#define SetHubDepth		(0x2000 | HUB_SET_DEPTH)
-#define GetPortErrorCount	(0xa300 | HUB_GET_PORT_ERR_COUNT)
+#define SetHubDepth		HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, HUB_SET_DEPTH)
+#define GetPortErrorCount	HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, HUB_GET_PORT_ERR_COUNT)
 
 /*
  * Generic bandwidth allocation constants/support
-- 
cgit v1.2.3


From 40fc3423b983b864bf70b03199191260ae9b2ea6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 3 Dec 2016 11:14:50 -0800
Subject: tcp: tsq: add tsq_flags / tsq_enum

This is a cleanup, to ease code review of following patches.

Old 'enum tsq_flags' is renamed, and a new enumeration is added
with the flags used in cmpxchg() operations as opposed to
single bit operations.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   | 11 ++++++++++-
 net/ipv4/tcp_output.c | 16 ++++++++--------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 734bab4c3bef..d8be083ab0b0 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -364,7 +364,7 @@ struct tcp_sock {
 	u32	*saved_syn;
 };
 
-enum tsq_flags {
+enum tsq_enum {
 	TSQ_THROTTLED,
 	TSQ_QUEUED,
 	TCP_TSQ_DEFERRED,	   /* tcp_tasklet_func() found socket was owned */
@@ -375,6 +375,15 @@ enum tsq_flags {
 				    */
 };
 
+enum tsq_flags {
+	TSQF_THROTTLED			= (1UL << TSQ_THROTTLED),
+	TSQF_QUEUED			= (1UL << TSQ_QUEUED),
+	TCPF_TSQ_DEFERRED		= (1UL << TCP_TSQ_DEFERRED),
+	TCPF_WRITE_TIMER_DEFERRED	= (1UL << TCP_WRITE_TIMER_DEFERRED),
+	TCPF_DELACK_TIMER_DEFERRED	= (1UL << TCP_DELACK_TIMER_DEFERRED),
+	TCPF_MTU_REDUCED_DEFERRED	= (1UL << TCP_MTU_REDUCED_DEFERRED),
+};
+
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
 {
 	return (struct tcp_sock *)sk;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c7adcb57654e..8f0289b0fb24 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -784,10 +784,10 @@ static void tcp_tasklet_func(unsigned long data)
 	}
 }
 
-#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) |		\
-			  (1UL << TCP_WRITE_TIMER_DEFERRED) |	\
-			  (1UL << TCP_DELACK_TIMER_DEFERRED) |	\
-			  (1UL << TCP_MTU_REDUCED_DEFERRED))
+#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED |		\
+			  TCPF_WRITE_TIMER_DEFERRED |	\
+			  TCPF_DELACK_TIMER_DEFERRED |	\
+			  TCPF_MTU_REDUCED_DEFERRED)
 /**
  * tcp_release_cb - tcp release_sock() callback
  * @sk: socket
@@ -808,7 +808,7 @@ void tcp_release_cb(struct sock *sk)
 		nflags = flags & ~TCP_DEFERRED_ALL;
 	} while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
 
-	if (flags & (1UL << TCP_TSQ_DEFERRED))
+	if (flags & TCPF_TSQ_DEFERRED)
 		tcp_tsq_handler(sk);
 
 	/* Here begins the tricky part :
@@ -822,15 +822,15 @@ void tcp_release_cb(struct sock *sk)
 	 */
 	sock_release_ownership(sk);
 
-	if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
+	if (flags & TCPF_WRITE_TIMER_DEFERRED) {
 		tcp_write_timer_handler(sk);
 		__sock_put(sk);
 	}
-	if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
+	if (flags & TCPF_DELACK_TIMER_DEFERRED) {
 		tcp_delack_timer_handler(sk);
 		__sock_put(sk);
 	}
-	if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
+	if (flags & TCPF_MTU_REDUCED_DEFERRED) {
 		inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
 		__sock_put(sk);
 	}
-- 
cgit v1.2.3


From 7aa5470c2c09265902b5e4289afa82e4e7c2987e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 3 Dec 2016 11:14:57 -0800
Subject: tcp: tsq: move tsq_flags close to sk_wmem_alloc

tsq_flags being in the same cache line than sk_wmem_alloc
makes a lot of sense. Both fields are changed from tcp_wfree()
and more generally by various TSQ related functions.

Prior patch made room in struct sock and added sk_tsq_flags,
this patch deletes tsq_flags from struct tcp_sock.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 -
 net/ipv4/tcp.c        |  4 ++--
 net/ipv4/tcp_ipv4.c   |  2 +-
 net/ipv4/tcp_output.c | 24 +++++++++++-------------
 net/ipv4/tcp_timer.c  |  4 ++--
 net/ipv6/tcp_ipv6.c   |  2 +-
 6 files changed, 17 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d8be083ab0b0..fc5848dad7a4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -186,7 +186,6 @@ struct tcp_sock {
 	u32	tsoffset;	/* timestamp offset */
 
 	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
-	unsigned long	tsq_flags;
 
 	/* Data for direct copy to user */
 	struct {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1149b48700a1..1ef3165114ba 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -663,9 +663,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
 	if (tcp_should_autocork(sk, skb, size_goal)) {
 
 		/* avoid atomic op if TSQ_THROTTLED bit is already set */
-		if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) {
+		if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
-			set_bit(TSQ_THROTTLED, &tp->tsq_flags);
+			set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
 		}
 		/* It is possible TX completion already happened
 		 * before we set TSQ_THROTTLED.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b50f05905ced..30d81f533ada 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -443,7 +443,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 			if (!sock_owned_by_user(sk)) {
 				tcp_v4_mtu_reduced(sk);
 			} else {
-				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
+				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
 					sock_hold(sk);
 			}
 			goto out;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5f04bee4c86a..b45101f3d2bd 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -767,14 +767,15 @@ static void tcp_tasklet_func(unsigned long data)
 	list_for_each_safe(q, n, &list) {
 		tp = list_entry(q, struct tcp_sock, tsq_node);
 		list_del(&tp->tsq_node);
-		clear_bit(TSQ_QUEUED, &tp->tsq_flags);
 
 		sk = (struct sock *)tp;
+		clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
+
 		if (!sk->sk_lock.owned &&
-		    test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags)) {
+		    test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk)) {
-				clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
+				clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
 				tcp_tsq_handler(sk);
 			}
 			bh_unlock_sock(sk);
@@ -797,16 +798,15 @@ static void tcp_tasklet_func(unsigned long data)
  */
 void tcp_release_cb(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned long flags, nflags;
 
 	/* perform an atomic operation only if at least one flag is set */
 	do {
-		flags = tp->tsq_flags;
+		flags = sk->sk_tsq_flags;
 		if (!(flags & TCP_DEFERRED_ALL))
 			return;
 		nflags = flags & ~TCP_DEFERRED_ALL;
-	} while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
+	} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
 
 	if (flags & TCPF_TSQ_DEFERRED)
 		tcp_tsq_handler(sk);
@@ -878,7 +878,7 @@ void tcp_wfree(struct sk_buff *skb)
 	if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
 		goto out;
 
-	for (oval = READ_ONCE(tp->tsq_flags);; oval = nval) {
+	for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
 		struct tsq_tasklet *tsq;
 		bool empty;
 
@@ -886,7 +886,7 @@ void tcp_wfree(struct sk_buff *skb)
 			goto out;
 
 		nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
-		nval = cmpxchg(&tp->tsq_flags, oval, nval);
+		nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
 		if (nval != oval)
 			continue;
 
@@ -2100,7 +2100,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 		    skb->prev == sk->sk_write_queue.next)
 			return false;
 
-		set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
+		set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
 		/* It is possible TX completion already happened
 		 * before we set TSQ_THROTTLED, so we must
 		 * test again the condition.
@@ -2241,8 +2241,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
 			break;
 
-		if (test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags))
-			clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
+		if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
+			clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
 		if (tcp_small_queue_check(sk, skb, 0))
 			break;
 
@@ -3545,8 +3545,6 @@ void tcp_send_ack(struct sock *sk)
 	/* We do not want pure acks influencing TCP Small Queues or fq/pacing
 	 * too much.
 	 * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
-	 * We also avoid tcp_wfree() overhead (cache line miss accessing
-	 * tp->tsq_flags) by using regular sock_wfree()
 	 */
 	skb_set_tcp_pure_ack(buff);
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3ea1cf804748..3705075f42c3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -310,7 +310,7 @@ static void tcp_delack_timer(unsigned long data)
 		inet_csk(sk)->icsk_ack.blocked = 1;
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
 		/* deleguate our work to tcp_release_cb() */
-		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
 			sock_hold(sk);
 	}
 	bh_unlock_sock(sk);
@@ -592,7 +592,7 @@ static void tcp_write_timer(unsigned long data)
 		tcp_write_timer_handler(sk);
 	} else {
 		/* delegate our work to tcp_release_cb() */
-		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
 			sock_hold(sk);
 	}
 	bh_unlock_sock(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a2185a214abc..73bc8fc68acd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -399,7 +399,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (!sock_owned_by_user(sk))
 			tcp_v6_mtu_reduced(sk);
 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
-					   &tp->tsq_flags))
+					   &sk->sk_tsq_flags))
 			sock_hold(sk);
 		goto out;
 	}
-- 
cgit v1.2.3


From a2e7eefd5618e0f75bae2eb5c9387ea2f627b6ca Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Dec 2016 19:31:01 +0100
Subject: nvme: move NVMe class code to pci_ids.h

We'll need to check for it in the AHCI drivers (yes, really) soon.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/nvme/host/pci.c | 3 ---
 include/linux/pci_ids.h | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0fc99f0f2571..2c58f159ac61 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2064,9 +2064,6 @@ static const struct pci_error_handlers nvme_err_handler = {
 	.reset_notify	= nvme_reset_notify,
 };
 
-/* Move to pci_ids.h later */
-#define PCI_CLASS_STORAGE_EXPRESS	0x010802
-
 static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_VDEVICE(INTEL, 0x0953),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c58752fe16c4..a5e6c7bca610 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -23,8 +23,10 @@
 #define PCI_CLASS_STORAGE_SATA		0x0106
 #define PCI_CLASS_STORAGE_SATA_AHCI	0x010601
 #define PCI_CLASS_STORAGE_SAS		0x0107
+#define PCI_CLASS_STORAGE_EXPRESS	0x010802
 #define PCI_CLASS_STORAGE_OTHER		0x0180
 
+
 #define PCI_BASE_CLASS_NETWORK		0x02
 #define PCI_CLASS_NETWORK_ETHERNET	0x0200
 #define PCI_CLASS_NETWORK_TOKEN_RING	0x0201
-- 
cgit v1.2.3


From bfa9cb3e110cc02f2120e021bc853773bfd61b74 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 2 Dec 2016 19:31:02 +0100
Subject: ahci-remap.h: add ahci remapping definitions

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
[hch: split into a separate header and commit]
Signed-off-by: Christoph Hellwig <hch@lst.de>
[tj: dropped duplicate definition of AHCI_VSCAP spotted by Sergei]
Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/ahci-remap.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/linux/ahci-remap.h

(limited to 'include/linux')

diff --git a/include/linux/ahci-remap.h b/include/linux/ahci-remap.h
new file mode 100644
index 000000000000..62be3a40239d
--- /dev/null
+++ b/include/linux/ahci-remap.h
@@ -0,0 +1,28 @@
+#ifndef _LINUX_AHCI_REMAP_H
+#define _LINUX_AHCI_REMAP_H
+
+#include <linux/sizes.h>
+
+#define AHCI_VSCAP		0xa4
+#define AHCI_REMAP_CAP		0x800
+
+/* device class code */
+#define AHCI_REMAP_N_DCC	0x880
+
+/* remap-device base relative to ahci-bar */
+#define AHCI_REMAP_N_OFFSET	SZ_16K
+#define AHCI_REMAP_N_SIZE	SZ_16K
+
+#define AHCI_MAX_REMAP		3
+
+static inline unsigned int ahci_remap_dcc(int i)
+{
+	return AHCI_REMAP_N_DCC + i * 0x80;
+}
+
+static inline unsigned int ahci_remap_base(int i)
+{
+	return AHCI_REMAP_N_OFFSET + i * AHCI_REMAP_N_SIZE;
+}
+
+#endif /* _LINUX_AHCI_REMAP_H */
-- 
cgit v1.2.3


From cbbd26b8b1a6af9c02e2b6523e12bd50cc765059 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 1 Nov 2016 22:09:04 -0400
Subject: [iov_iter] new primitives - copy_from_iter_full() and friends

copy_from_iter_full(), copy_from_iter_full_nocache() and
csum_and_copy_from_iter_full() - counterparts of copy_from_iter()
et.al., advancing iterator only in case of successful full copy
and returning whether it had been successful or not.

Convert some obvious users.  *NOTE* - do not blindly assume that
something is a good candidate for those unless you are sure that
not advancing iov_iter in failure case is the right thing in
this case.  Anything that does short read/short write kind of
stuff (or is in a loop, etc.) is unlikely to be a good one.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/bluetooth/hci_vhci.c       |  2 +-
 drivers/net/macvtap.c              |  4 +-
 drivers/net/tun.c                  |  7 +--
 drivers/usb/gadget/function/f_fs.c |  2 +-
 drivers/usb/gadget/legacy/inode.c  |  2 +-
 drivers/vhost/scsi.c               |  3 +-
 drivers/vhost/vhost.c              |  3 +-
 fs/ncpfs/file.c                    |  2 +-
 fs/orangefs/devorangefs-req.c      | 13 ++---
 include/linux/uio.h                |  3 ++
 kernel/printk/printk.c             |  2 +-
 lib/iov_iter.c                     | 98 +++++++++++++++++++++++++++++++++++++-
 net/atm/common.c                   |  2 +-
 net/bluetooth/l2cap_core.c         |  6 +--
 net/packet/af_packet.c             |  5 +-
 net/tipc/msg.c                     |  4 +-
 security/keys/keyctl.c             |  2 +-
 17 files changed, 121 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index c4a75a18dcae..233e850fdac7 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -181,7 +181,7 @@ static inline ssize_t vhci_get_user(struct vhci_data *data,
 	if (!skb)
 		return -ENOMEM;
 
-	if (copy_from_iter(skb_put(skb, len), len, from) != len) {
+	if (!copy_from_iter_full(skb_put(skb, len), len, from)) {
 		kfree_skb(skb);
 		return -EFAULT;
 	}
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 070e3290aa6e..19d81ca3fb49 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -673,7 +673,6 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 	int depth;
 	bool zerocopy = false;
 	size_t linear;
-	ssize_t n;
 
 	if (q->flags & IFF_VNET_HDR) {
 		vnet_hdr_len = q->vnet_hdr_sz;
@@ -684,8 +683,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 		len -= vnet_hdr_len;
 
 		err = -EFAULT;
-		n = copy_from_iter(&vnet_hdr, sizeof(vnet_hdr), from);
-		if (n != sizeof(vnet_hdr))
+		if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from))
 			goto err;
 		iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr));
 		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8093e39ae263..4fa2d756548a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1171,7 +1171,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	bool zerocopy = false;
 	int err;
 	u32 rxhash;
-	ssize_t n;
 
 	if (!(tun->dev->flags & IFF_UP))
 		return -EIO;
@@ -1181,8 +1180,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 			return -EINVAL;
 		len -= sizeof(pi);
 
-		n = copy_from_iter(&pi, sizeof(pi), from);
-		if (n != sizeof(pi))
+		if (!copy_from_iter_full(&pi, sizeof(pi), from))
 			return -EFAULT;
 	}
 
@@ -1191,8 +1189,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 			return -EINVAL;
 		len -= tun->vnet_hdr_sz;
 
-		n = copy_from_iter(&gso, sizeof(gso), from);
-		if (n != sizeof(gso))
+		if (!copy_from_iter_full(&gso, sizeof(gso), from))
 			return -EFAULT;
 
 		if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 17989b72cdae..0bfd1e25b431 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -949,7 +949,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 			goto error_mutex;
 		}
 		if (!io_data->read &&
-		    copy_from_iter(data, data_len, &io_data->data) != data_len) {
+		    !copy_from_iter_full(data, data_len, &io_data->data)) {
 			ret = -EFAULT;
 			goto error_mutex;
 		}
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index bd82dd12deff..10b2576f8b6a 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -667,7 +667,7 @@ ep_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		return -ENOMEM;
 	}
 
-	if (unlikely(copy_from_iter(buf, len, from) != len)) {
+	if (unlikely(!copy_from_iter_full(buf, len, from))) {
 		value = -EFAULT;
 		goto out;
 	}
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 6e29d053843d..b296985fda0d 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -922,8 +922,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 		 */
 		iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size);
 
-		ret = copy_from_iter(req, req_size, &out_iter);
-		if (unlikely(ret != req_size)) {
+		if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) {
 			vq_err(vq, "Faulted on copy_from_iter\n");
 			vhost_scsi_send_bad_target(vs, vq, head, out);
 			continue;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c6f2d89c0e97..06e8b81b6253 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1862,8 +1862,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
 			       i, count);
 			return -EINVAL;
 		}
-		if (unlikely(copy_from_iter(&desc, sizeof(desc), &from) !=
-			     sizeof(desc))) {
+		if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) {
 			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
 			       i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
 			return -EINVAL;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index dd38ca1f2ecb..83ca77231707 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -203,7 +203,7 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 				      bufsize - (pos % bufsize),
 				      iov_iter_count(from));
 
-		if (copy_from_iter(bouncebuffer, to_write, from) != to_write) {
+		if (!copy_from_iter_full(bouncebuffer, to_write, from)) {
 			errno = -EFAULT;
 			break;
 		}
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index 516ffb4dc9a0..b0ced669427e 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -355,7 +355,6 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 		__u64 tag;
 	} head;
 	int total = ret = iov_iter_count(iter);
-	int n;
 	int downcall_size = sizeof(struct orangefs_downcall_s);
 	int head_size = sizeof(head);
 
@@ -372,8 +371,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 		return -EFAULT;
 	}
      
-	n = copy_from_iter(&head, head_size, iter);
-	if (n < head_size) {
+	if (!copy_from_iter_full(&head, head_size, iter)) {
 		gossip_err("%s: failed to copy head.\n", __func__);
 		return -EFAULT;
 	}
@@ -407,8 +405,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 		return ret;
 	}
 
-	n = copy_from_iter(&op->downcall, downcall_size, iter);
-	if (n != downcall_size) {
+	if (!copy_from_iter_full(&op->downcall, downcall_size, iter)) {
 		gossip_err("%s: failed to copy downcall.\n", __func__);
 		goto Efault;
 	}
@@ -462,10 +459,8 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 		goto Enomem;
 	}
 	memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
-	n = copy_from_iter(op->downcall.trailer_buf,
-			   op->downcall.trailer_size,
-			   iter);
-	if (n != op->downcall.trailer_size) {
+	if (!copy_from_iter_full(op->downcall.trailer_buf,
+			         op->downcall.trailer_size, iter)) {
 		gossip_err("%s: failed to copy trailer.\n", __func__);
 		vfree(op->downcall.trailer_buf);
 		goto Efault;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e22b544d039..e57c0ccd61c6 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -89,7 +89,9 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
+bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
+bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
@@ -155,6 +157,7 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 }
 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 
 int import_iovec(int type, const struct iovec __user * uvector,
 		 unsigned nr_segs, unsigned fast_segs,
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f7a55e9ff2f7..f6bda15396df 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -748,7 +748,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
 		return -ENOMEM;
 
 	buf[len] = '\0';
-	if (copy_from_iter(buf, len, from) != len) {
+	if (!copy_from_iter_full(buf, len, from)) {
 		kfree(buf);
 		return -EFAULT;
 	}
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f2bd21b93dfc..83c00b7f59b5 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -568,6 +568,31 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(copy_from_iter);
 
+bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
+{
+	char *to = addr;
+	if (unlikely(i->type & ITER_PIPE)) {
+		WARN_ON(1);
+		return false;
+	}
+	if (unlikely(i->count < bytes))				\
+		return false;
+
+	iterate_all_kinds(i, bytes, v, ({
+		if (__copy_from_user((to += v.iov_len) - v.iov_len,
+				      v.iov_base, v.iov_len))
+			return false;
+		0;}),
+		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len),
+		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+	)
+
+	iov_iter_advance(i, bytes);
+	return true;
+}
+EXPORT_SYMBOL(copy_from_iter_full);
+
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
@@ -587,6 +612,30 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(copy_from_iter_nocache);
 
+bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
+{
+	char *to = addr;
+	if (unlikely(i->type & ITER_PIPE)) {
+		WARN_ON(1);
+		return false;
+	}
+	if (unlikely(i->count < bytes))				\
+		return false;
+	iterate_all_kinds(i, bytes, v, ({
+		if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
+					     v.iov_base, v.iov_len))
+			return false;
+		0;}),
+		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len),
+		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+	)
+
+	iov_iter_advance(i, bytes);
+	return true;
+}
+EXPORT_SYMBOL(copy_from_iter_full_nocache);
+
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
@@ -1008,7 +1057,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 	}
 	iterate_and_advance(i, bytes, v, ({
 		int err = 0;
-		next = csum_and_copy_from_user(v.iov_base, 
+		next = csum_and_copy_from_user(v.iov_base,
 					       (to += v.iov_len) - v.iov_len,
 					       v.iov_len, 0, &err);
 		if (!err) {
@@ -1037,6 +1086,51 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 }
 EXPORT_SYMBOL(csum_and_copy_from_iter);
 
+bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
+			       struct iov_iter *i)
+{
+	char *to = addr;
+	__wsum sum, next;
+	size_t off = 0;
+	sum = *csum;
+	if (unlikely(i->type & ITER_PIPE)) {
+		WARN_ON(1);
+		return false;
+	}
+	if (unlikely(i->count < bytes))
+		return false;
+	iterate_all_kinds(i, bytes, v, ({
+		int err = 0;
+		next = csum_and_copy_from_user(v.iov_base,
+					       (to += v.iov_len) - v.iov_len,
+					       v.iov_len, 0, &err);
+		if (err)
+			return false;
+		sum = csum_block_add(sum, next, off);
+		off += v.iov_len;
+		0;
+	}), ({
+		char *p = kmap_atomic(v.bv_page);
+		next = csum_partial_copy_nocheck(p + v.bv_offset,
+						 (to += v.bv_len) - v.bv_len,
+						 v.bv_len, 0);
+		kunmap_atomic(p);
+		sum = csum_block_add(sum, next, off);
+		off += v.bv_len;
+	}),({
+		next = csum_partial_copy_nocheck(v.iov_base,
+						 (to += v.iov_len) - v.iov_len,
+						 v.iov_len, 0);
+		sum = csum_block_add(sum, next, off);
+		off += v.iov_len;
+	})
+	)
+	*csum = sum;
+	iov_iter_advance(i, bytes);
+	return true;
+}
+EXPORT_SYMBOL(csum_and_copy_from_iter_full);
+
 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
 			     struct iov_iter *i)
 {
@@ -1051,7 +1145,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
 	iterate_and_advance(i, bytes, v, ({
 		int err = 0;
 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
-					     v.iov_base, 
+					     v.iov_base,
 					     v.iov_len, 0, &err);
 		if (!err) {
 			sum = csum_block_add(sum, next, off);
diff --git a/net/atm/common.c b/net/atm/common.c
index 6dc12305799e..a3ca922d307b 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
 		goto out;
 	skb->dev = NULL; /* for paths shared with net_device interfaces */
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
-	if (copy_from_iter(skb_put(skb, size), size, &m->msg_iter) != size) {
+	if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
 		kfree_skb(skb);
 		error = -EFAULT;
 		goto out;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 577f1c01454a..ce0b5dd01953 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2127,7 +2127,7 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
 	struct sk_buff **frag;
 	int sent = 0;
 
-	if (copy_from_iter(skb_put(skb, count), count, &msg->msg_iter) != count)
+	if (!copy_from_iter_full(skb_put(skb, count), count, &msg->msg_iter))
 		return -EFAULT;
 
 	sent += count;
@@ -2147,8 +2147,8 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
 
 		*frag = tmp;
 
-		if (copy_from_iter(skb_put(*frag, count), count,
-				   &msg->msg_iter) != count)
+		if (!copy_from_iter_full(skb_put(*frag, count), count,
+				   &msg->msg_iter))
 			return -EFAULT;
 
 		sent += count;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d2238b204691..588ec202d5ba 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2432,14 +2432,11 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
 static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
 				 struct virtio_net_hdr *vnet_hdr)
 {
-	int n;
-
 	if (*len < sizeof(*vnet_hdr))
 		return -EINVAL;
 	*len -= sizeof(*vnet_hdr);
 
-	n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter);
-	if (n != sizeof(*vnet_hdr))
+	if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
 		return -EFAULT;
 
 	return __packet_snd_vnet_parse(vnet_hdr, *len);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 17201aa8423d..a22be502f1bd 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -268,7 +268,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		__skb_queue_tail(list, skb);
 		skb_copy_to_linear_data(skb, mhdr, mhsz);
 		pktpos = skb->data + mhsz;
-		if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz)
+		if (copy_from_iter_full(pktpos, dsz, &m->msg_iter))
 			return dsz;
 		rc = -EFAULT;
 		goto error;
@@ -299,7 +299,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		if (drem < pktrem)
 			pktrem = drem;
 
-		if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) {
+		if (!copy_from_iter_full(pktpos, pktrem, &m->msg_iter)) {
 			rc = -EFAULT;
 			goto error;
 		}
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index d580ad06b792..f89f1900e58d 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1074,7 +1074,7 @@ long keyctl_instantiate_key_common(key_serial_t id,
 		}
 
 		ret = -EFAULT;
-		if (copy_from_iter(payload, plen, from) != plen)
+		if (!copy_from_iter_full(payload, plen, from))
 			goto error2;
 	}
 
-- 
cgit v1.2.3


From 15e6cb46c9b09711d1224ae5418b53140e1ba444 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 1 Nov 2016 22:42:45 -0400
Subject: make skb_add_data,{_nocache}() and skb_copy_to_page_nocache() advance
 only on success

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h | 6 +++---
 include/net/sock.h     | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 32810f279f8e..9cfae2e73b3c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2809,12 +2809,12 @@ static inline int skb_add_data(struct sk_buff *skb,
 
 	if (skb->ip_summed == CHECKSUM_NONE) {
 		__wsum csum = 0;
-		if (csum_and_copy_from_iter(skb_put(skb, copy), copy,
-					    &csum, from) == copy) {
+		if (csum_and_copy_from_iter_full(skb_put(skb, copy), copy,
+					         &csum, from)) {
 			skb->csum = csum_block_add(skb->csum, csum, off);
 			return 0;
 		}
-	} else if (copy_from_iter(skb_put(skb, copy), copy, from) == copy)
+	} else if (copy_from_iter_full(skb_put(skb, copy), copy, from))
 		return 0;
 
 	__skb_trim(skb, off);
diff --git a/include/net/sock.h b/include/net/sock.h
index 92b269709b9a..5dd0fed82a06 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1783,13 +1783,13 @@ static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
 {
 	if (skb->ip_summed == CHECKSUM_NONE) {
 		__wsum csum = 0;
-		if (csum_and_copy_from_iter(to, copy, &csum, from) != copy)
+		if (!csum_and_copy_from_iter_full(to, copy, &csum, from))
 			return -EFAULT;
 		skb->csum = csum_block_add(skb->csum, csum, offset);
 	} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
-		if (copy_from_iter_nocache(to, copy, from) != copy)
+		if (!copy_from_iter_full_nocache(to, copy, from))
 			return -EFAULT;
-	} else if (copy_from_iter(to, copy, from) != copy)
+	} else if (!copy_from_iter_full(to, copy, from))
 		return -EFAULT;
 
 	return 0;
-- 
cgit v1.2.3


From 7bd509e311f408f7a5132fcdde2069af65fa05ae Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 4 Dec 2016 23:19:41 +0100
Subject: bpf: add prog_digest and expose it via fdinfo/netlink

When loading a BPF program via bpf(2), calculate the digest over
the program's instruction stream and store it in struct bpf_prog's
digest member. This is done at a point in time before any instructions
are rewritten by the verifier. Any unstable map file descriptor
number part of the imm field will be zeroed for the hash.

fdinfo example output for progs:

  # cat /proc/1590/fdinfo/5
  pos:          0
  flags:        02000002
  mnt_id:       11
  prog_type:    1
  prog_jited:   1
  prog_digest:  b27e8b06da22707513aa97363dfb11c7c3675d28
  memlock:      4096

When programs are pinned and retrieved by an ELF loader, the loader
can check the program's digest through fdinfo and compare it against
one that was generated over the ELF file's program section to see
if the program needs to be reloaded. Furthermore, this can also be
exposed through other means such as netlink in case of a tc cls/act
dump (or xdp in future), but also through tracepoints or other
facilities to identify the program. Other than that, the digest can
also serve as a base name for the work in progress kallsyms support
of programs. The digest doesn't depend/select the crypto layer, since
we need to keep dependencies to a minimum. iproute2 will get support
for this facility.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h                |  1 +
 include/linux/filter.h             |  7 +++-
 include/uapi/linux/pkt_cls.h       |  1 +
 include/uapi/linux/tc_act/tc_bpf.h |  1 +
 kernel/bpf/core.c                  | 65 ++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c               | 24 +++++++++++++-
 kernel/bpf/verifier.c              |  2 ++
 net/sched/act_bpf.c                |  9 ++++++
 net/sched/cls_bpf.c                |  8 +++++
 9 files changed, 116 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 69d0a7f12a3b..8796ff03f472 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -216,6 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+void bpf_prog_calc_digest(struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 97338134398f..f078d2b1cff6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -14,6 +14,7 @@
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 #include <linux/capability.h>
+#include <linux/cryptohash.h>
 
 #include <net/sch_generic.h>
 
@@ -56,6 +57,9 @@ struct bpf_prog_aux;
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
+/* Maximum BPF program size in bytes. */
+#define MAX_BPF_SIZE	(BPF_MAXINSNS * sizeof(struct bpf_insn))
+
 /* Helper macros for filter block array initializers. */
 
 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
@@ -404,8 +408,9 @@ struct bpf_prog {
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1;	/* Do we need dst entry? */
 	kmemcheck_bitfield_end(meta);
-	u32			len;		/* Number of filter blocks */
 	enum bpf_prog_type	type;		/* Type of BPF program */
+	u32			len;		/* Number of filter blocks */
+	u32			digest[SHA_DIGEST_WORDS]; /* Program digest */
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	unsigned int		(*bpf_func)(const void *ctx,
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 86786d45ee66..1adc0b654996 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -397,6 +397,7 @@ enum {
 	TCA_BPF_NAME,
 	TCA_BPF_FLAGS,
 	TCA_BPF_FLAGS_GEN,
+	TCA_BPF_DIGEST,
 	__TCA_BPF_MAX,
 };
 
diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
index 063d9d465119..a6b88a6f7f71 100644
--- a/include/uapi/linux/tc_act/tc_bpf.h
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -27,6 +27,7 @@ enum {
 	TCA_ACT_BPF_FD,
 	TCA_ACT_BPF_NAME,
 	TCA_ACT_BPF_PAD,
+	TCA_ACT_BPF_DIGEST,
 	__TCA_ACT_BPF_MAX,
 };
 #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 82a04143368e..bdcc9f4ba767 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -136,6 +136,71 @@ void __bpf_prog_free(struct bpf_prog *fp)
 	vfree(fp);
 }
 
+#define SHA_BPF_RAW_SIZE						\
+	round_up(MAX_BPF_SIZE + sizeof(__be64) + 1, SHA_MESSAGE_BYTES)
+
+/* Called under verifier mutex. */
+void bpf_prog_calc_digest(struct bpf_prog *fp)
+{
+	const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
+	static u32 ws[SHA_WORKSPACE_WORDS];
+	static u8 raw[SHA_BPF_RAW_SIZE];
+	struct bpf_insn *dst = (void *)raw;
+	u32 i, bsize, psize, blocks;
+	bool was_ld_map;
+	u8 *todo = raw;
+	__be32 *result;
+	__be64 *bits;
+
+	sha_init(fp->digest);
+	memset(ws, 0, sizeof(ws));
+
+	/* We need to take out the map fd for the digest calculation
+	 * since they are unstable from user space side.
+	 */
+	for (i = 0, was_ld_map = false; i < fp->len; i++) {
+		dst[i] = fp->insnsi[i];
+		if (!was_ld_map &&
+		    dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+		    dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
+			was_ld_map = true;
+			dst[i].imm = 0;
+		} else if (was_ld_map &&
+			   dst[i].code == 0 &&
+			   dst[i].dst_reg == 0 &&
+			   dst[i].src_reg == 0 &&
+			   dst[i].off == 0) {
+			was_ld_map = false;
+			dst[i].imm = 0;
+		} else {
+			was_ld_map = false;
+		}
+	}
+
+	psize = fp->len * sizeof(struct bpf_insn);
+	memset(&raw[psize], 0, sizeof(raw) - psize);
+	raw[psize++] = 0x80;
+
+	bsize  = round_up(psize, SHA_MESSAGE_BYTES);
+	blocks = bsize / SHA_MESSAGE_BYTES;
+	if (bsize - psize >= sizeof(__be64)) {
+		bits = (__be64 *)(todo + bsize - sizeof(__be64));
+	} else {
+		bits = (__be64 *)(todo + bsize + bits_offset);
+		blocks++;
+	}
+	*bits = cpu_to_be64((psize - 1) << 3);
+
+	while (blocks--) {
+		sha_transform(fp->digest, todo, ws);
+		todo += SHA_MESSAGE_BYTES;
+	}
+
+	result = (__force __be32 *)fp->digest;
+	for (i = 0; i < SHA_DIGEST_WORDS; i++)
+		result[i] = cpu_to_be32(fp->digest[i]);
+}
+
 static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn)
 {
 	return BPF_CLASS(insn->code) == BPF_JMP  &&
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 85af86c496cd..c0d2b423ce93 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -662,8 +662,30 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+#ifdef CONFIG_PROC_FS
+static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+	const struct bpf_prog *prog = filp->private_data;
+	char prog_digest[sizeof(prog->digest) * 2 + 1] = { };
+
+	bin2hex(prog_digest, prog->digest, sizeof(prog->digest));
+	seq_printf(m,
+		   "prog_type:\t%u\n"
+		   "prog_jited:\t%u\n"
+		   "prog_digest:\t%s\n"
+		   "memlock:\t%llu\n",
+		   prog->type,
+		   prog->jited,
+		   prog_digest,
+		   prog->pages * 1ULL << PAGE_SHIFT);
+}
+#endif
+
 static const struct file_operations bpf_prog_fops = {
-        .release = bpf_prog_release,
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= bpf_prog_show_fdinfo,
+#endif
+	.release	= bpf_prog_release,
 };
 
 int bpf_prog_new_fd(struct bpf_prog *prog)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 38d05da84a49..cb37339ca0da 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3176,6 +3176,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 		log_level = 0;
 	}
 
+	bpf_prog_calc_digest(env->prog);
+
 	ret = replace_map_fd_with_map_ptr(env);
 	if (ret < 0)
 		goto skip_full_check;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 84c1d2da4f8b..1c60317f0121 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -117,10 +117,19 @@ static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
 static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
 				  struct sk_buff *skb)
 {
+	struct nlattr *nla;
+
 	if (prog->bpf_name &&
 	    nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
 		return -EMSGSIZE;
 
+	nla = nla_reserve(skb, TCA_ACT_BPF_DIGEST,
+			  sizeof(prog->filter->digest));
+	if (nla == NULL)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), prog->filter->digest, nla_len(nla));
+
 	return 0;
 }
 
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index f70e03d2d2c8..adc776048d1a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -549,10 +549,18 @@ static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
 static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 				  struct sk_buff *skb)
 {
+	struct nlattr *nla;
+
 	if (prog->bpf_name &&
 	    nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
 		return -EMSGSIZE;
 
+	nla = nla_reserve(skb, TCA_BPF_DIGEST, sizeof(prog->filter->digest));
+	if (nla == NULL)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), prog->filter->digest, nla_len(nla));
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 640eb7e7b5242af53c456552a526d0080e6333f8 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Mon, 14 Nov 2016 22:14:35 +0100
Subject: fs: Constify path_is_under()'s arguments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function path_is_under() doesn't modify the paths pointed by its
arguments but only browse them. Constifying this pointers make a cleaner
interface to be used by (future) code which may only have access to
const struct path pointers (e.g. LSM hooks).

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c     | 2 +-
 include/linux/fs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index e6c234b1a645..4d80a5066a1f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2997,7 +2997,7 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
 	return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
 }
 
-bool path_is_under(struct path *path1, struct path *path2)
+bool path_is_under(const struct path *path1, const struct path *path2)
 {
 	bool res;
 	read_seqlock_excl(&mount_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dc0478c07b2a..f96501b51c49 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2709,7 +2709,7 @@ extern struct file * open_exec(const char *);
  
 /* fs/dcache.c -- generic fs support functions */
 extern bool is_subdir(struct dentry *, struct dentry *);
-extern bool path_is_under(struct path *, struct path *);
+extern bool path_is_under(const struct path *, const struct path *);
 
 extern char *file_path(struct file *, char *, int);
 
-- 
cgit v1.2.3


From 3cd5eca8d7a2fe43098df4c33a1272fe6945cac9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 20:19:09 -0500
Subject: fsnotify: constify 'data' passed to ->handle_event()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/notify/dnotify/dnotify.c          |  2 +-
 fs/notify/fanotify/fanotify.c        |  8 ++++----
 fs/notify/fanotify/fanotify.h        |  2 +-
 fs/notify/inotify/inotify.h          |  2 +-
 fs/notify/inotify/inotify_fsnotify.c |  4 ++--
 include/linux/fsnotify_backend.h     |  2 +-
 kernel/audit_fsnotify.c              | 10 +++++-----
 kernel/audit_tree.c                  |  2 +-
 kernel/audit_watch.c                 |  8 ++++----
 9 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 6faaf710e563..5a4ec309e283 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -85,7 +85,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 				struct inode *inode,
 				struct fsnotify_mark *inode_mark,
 				struct fsnotify_mark *vfsmount_mark,
-				u32 mask, void *data, int data_type,
+				u32 mask, const void *data, int data_type,
 				const unsigned char *file_name, u32 cookie)
 {
 	struct dnotify_mark *dn_mark;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index e0e5f7c3c99f..bbc175d4213d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -90,10 +90,10 @@ static int fanotify_get_response(struct fsnotify_group *group,
 static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
 				       struct fsnotify_mark *vfsmnt_mark,
 				       u32 event_mask,
-				       void *data, int data_type)
+				       const void *data, int data_type)
 {
 	__u32 marks_mask, marks_ignored_mask;
-	struct path *path = data;
+	const struct path *path = data;
 
 	pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p"
 		 " data_type=%d\n", __func__, inode_mark, vfsmnt_mark,
@@ -140,7 +140,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
 }
 
 struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask,
-						 struct path *path)
+						 const struct path *path)
 {
 	struct fanotify_event_info *event;
 
@@ -177,7 +177,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 				 struct inode *inode,
 				 struct fsnotify_mark *inode_mark,
 				 struct fsnotify_mark *fanotify_mark,
-				 u32 mask, void *data, int data_type,
+				 u32 mask, const void *data, int data_type,
 				 const unsigned char *file_name, u32 cookie)
 {
 	int ret = 0;
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 2a5fb14115df..4500a74f8d38 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -47,4 +47,4 @@ static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
 }
 
 struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask,
-						 struct path *path);
+						 const struct path *path);
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index ed855ef6f077..a6f5907a3fee 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -26,7 +26,7 @@ extern int inotify_handle_event(struct fsnotify_group *group,
 				struct inode *inode,
 				struct fsnotify_mark *inode_mark,
 				struct fsnotify_mark *vfsmount_mark,
-				u32 mask, void *data, int data_type,
+				u32 mask, const void *data, int data_type,
 				const unsigned char *file_name, u32 cookie);
 
 extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 2cd900c2c737..19e7ec109a75 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -66,7 +66,7 @@ int inotify_handle_event(struct fsnotify_group *group,
 			 struct inode *inode,
 			 struct fsnotify_mark *inode_mark,
 			 struct fsnotify_mark *vfsmount_mark,
-			 u32 mask, void *data, int data_type,
+			 u32 mask, const void *data, int data_type,
 			 const unsigned char *file_name, u32 cookie)
 {
 	struct inotify_inode_mark *i_mark;
@@ -80,7 +80,7 @@ int inotify_handle_event(struct fsnotify_group *group,
 
 	if ((inode_mark->mask & FS_EXCL_UNLINK) &&
 	    (data_type == FSNOTIFY_EVENT_PATH)) {
-		struct path *path = data;
+		const struct path *path = data;
 
 		if (d_unlinked(path->dentry))
 			return 0;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 79467b239fcf..d357041bbec8 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -96,7 +96,7 @@ struct fsnotify_ops {
 			    struct inode *inode,
 			    struct fsnotify_mark *inode_mark,
 			    struct fsnotify_mark *vfsmount_mark,
-			    u32 mask, void *data, int data_type,
+			    u32 mask, const void *data, int data_type,
 			    const unsigned char *file_name, u32 cookie);
 	void (*free_group_priv)(struct fsnotify_group *group);
 	void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index f84f8d06e1f6..1173f7cc7ba3 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -74,7 +74,7 @@ int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_
 }
 
 static void audit_update_mark(struct audit_fsnotify_mark *audit_mark,
-			     struct inode *inode)
+			     const struct inode *inode)
 {
 	audit_mark->dev = inode ? inode->i_sb->s_dev : AUDIT_DEV_UNSET;
 	audit_mark->ino = inode ? inode->i_ino : AUDIT_INO_UNSET;
@@ -168,11 +168,11 @@ static int audit_mark_handle_event(struct fsnotify_group *group,
 				    struct inode *to_tell,
 				    struct fsnotify_mark *inode_mark,
 				    struct fsnotify_mark *vfsmount_mark,
-				    u32 mask, void *data, int data_type,
+				    u32 mask, const void *data, int data_type,
 				    const unsigned char *dname, u32 cookie)
 {
 	struct audit_fsnotify_mark *audit_mark;
-	struct inode *inode = NULL;
+	const struct inode *inode = NULL;
 
 	audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark);
 
@@ -180,10 +180,10 @@ static int audit_mark_handle_event(struct fsnotify_group *group,
 
 	switch (data_type) {
 	case (FSNOTIFY_EVENT_PATH):
-		inode = ((struct path *)data)->dentry->d_inode;
+		inode = ((const struct path *)data)->dentry->d_inode;
 		break;
 	case (FSNOTIFY_EVENT_INODE):
-		inode = (struct inode *)data;
+		inode = (const struct inode *)data;
 		break;
 	default:
 		BUG();
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 25772476fa4a..3a2f5dfe8093 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -948,7 +948,7 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
 				   struct inode *to_tell,
 				   struct fsnotify_mark *inode_mark,
 				   struct fsnotify_mark *vfsmount_mark,
-				   u32 mask, void *data, int data_type,
+				   u32 mask, const void *data, int data_type,
 				   const unsigned char *file_name, u32 cookie)
 {
 	return 0;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 0d302a87f21b..f476c46b9c20 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -472,10 +472,10 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
 				    struct inode *to_tell,
 				    struct fsnotify_mark *inode_mark,
 				    struct fsnotify_mark *vfsmount_mark,
-				    u32 mask, void *data, int data_type,
+				    u32 mask, const void *data, int data_type,
 				    const unsigned char *dname, u32 cookie)
 {
-	struct inode *inode;
+	const struct inode *inode;
 	struct audit_parent *parent;
 
 	parent = container_of(inode_mark, struct audit_parent, mark);
@@ -484,10 +484,10 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
 
 	switch (data_type) {
 	case (FSNOTIFY_EVENT_PATH):
-		inode = d_backing_inode(((struct path *)data)->dentry);
+		inode = d_backing_inode(((const struct path *)data)->dentry);
 		break;
 	case (FSNOTIFY_EVENT_INODE):
-		inode = (struct inode *)data;
+		inode = (const struct inode *)data;
 		break;
 	default:
 		BUG();
-- 
cgit v1.2.3


From e637835eccc8b93f39ca869628f9a0437bba744e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 20:21:17 -0500
Subject: fsnotify(): constify 'data'

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/notify/fsnotify.c             | 6 +++---
 include/linux/fsnotify_backend.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index db39de2dd4cb..7788a79eedf7 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -125,7 +125,7 @@ EXPORT_SYMBOL_GPL(__fsnotify_parent);
 static int send_to_group(struct inode *to_tell,
 			 struct fsnotify_mark *inode_mark,
 			 struct fsnotify_mark *vfsmount_mark,
-			 __u32 mask, void *data,
+			 __u32 mask, const void *data,
 			 int data_is, u32 cookie,
 			 const unsigned char *file_name)
 {
@@ -187,7 +187,7 @@ static int send_to_group(struct inode *to_tell,
  * out to all of the registered fsnotify_group.  Those groups can then use the
  * notification event in whatever means they feel necessary.
  */
-int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	     const unsigned char *file_name, u32 cookie)
 {
 	struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
@@ -199,7 +199,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
 	if (data_is == FSNOTIFY_EVENT_PATH)
-		mnt = real_mount(((struct path *)data)->mnt);
+		mnt = real_mount(((const struct path *)data)->mnt);
 	else
 		mnt = NULL;
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d357041bbec8..e6ea6757a275 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -245,7 +245,7 @@ struct fsnotify_mark {
 /* called from the vfs helpers */
 
 /* main fsnotify call to send events */
-extern int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 		    const unsigned char *name, u32 cookie);
 extern int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
@@ -357,7 +357,7 @@ extern void fsnotify_init_event(struct fsnotify_event *event,
 
 #else
 
-static inline int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 			   const unsigned char *name, u32 cookie)
 {
 	return 0;
-- 
cgit v1.2.3


From 12c7f9dc0fd154632457f3474351bcfcf4e61512 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 20:23:04 -0500
Subject: constify fsnotify_parent()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/notify/fsnotify.c             | 2 +-
 include/linux/fsnotify.h         | 2 +-
 include/linux/fsnotify_backend.h | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 7788a79eedf7..b41515d3f081 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -86,7 +86,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 }
 
 /* Notify this dentry's parent about a child's events. */
-int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
 {
 	struct dentry *parent;
 	struct inode *p_inode;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index b8bcc058e031..e19eb1f5e958 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -17,7 +17,7 @@
 #include <linux/bug.h>
 
 /* Notify this dentry's parent about a child's events. */
-static inline int fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+static inline int fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
 {
 	if (!dentry)
 		dentry = path->dentry;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index e6ea6757a275..0cf34d6cc253 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -247,7 +247,7 @@ struct fsnotify_mark {
 /* main fsnotify call to send events */
 extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 		    const unsigned char *name, u32 cookie);
-extern int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask);
+extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
 extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
 extern u32 fsnotify_get_cookie(void);
@@ -363,7 +363,7 @@ static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data,
 	return 0;
 }
 
-static inline int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+static inline int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 40212d531d4bfac48dca8cd3d794639766745cda Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 20:24:41 -0500
Subject: fsnotify: constify the places working with ->f_path

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fsnotify.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index e19eb1f5e958..b43d3f5bd9ea 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -28,7 +28,7 @@ static inline int fsnotify_parent(const struct path *path, struct dentry *dentry
 /* simple call site for access decisions */
 static inline int fsnotify_perm(struct file *file, int mask)
 {
-	struct path *path = &file->f_path;
+	const struct path *path = &file->f_path;
 	/*
 	 * Do not use file_inode() here or anywhere in this file to get the
 	 * inode.  That would break *notity on overlayfs.
@@ -176,7 +176,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
  */
 static inline void fsnotify_access(struct file *file)
 {
-	struct path *path = &file->f_path;
+	const struct path *path = &file->f_path;
 	struct inode *inode = path->dentry->d_inode;
 	__u32 mask = FS_ACCESS;
 
@@ -194,7 +194,7 @@ static inline void fsnotify_access(struct file *file)
  */
 static inline void fsnotify_modify(struct file *file)
 {
-	struct path *path = &file->f_path;
+	const struct path *path = &file->f_path;
 	struct inode *inode = path->dentry->d_inode;
 	__u32 mask = FS_MODIFY;
 
@@ -212,7 +212,7 @@ static inline void fsnotify_modify(struct file *file)
  */
 static inline void fsnotify_open(struct file *file)
 {
-	struct path *path = &file->f_path;
+	const struct path *path = &file->f_path;
 	struct inode *inode = path->dentry->d_inode;
 	__u32 mask = FS_OPEN;
 
@@ -228,7 +228,7 @@ static inline void fsnotify_open(struct file *file)
  */
 static inline void fsnotify_close(struct file *file)
 {
-	struct path *path = &file->f_path;
+	const struct path *path = &file->f_path;
 	struct inode *inode = path->dentry->d_inode;
 	fmode_t mode = file->f_mode;
 	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
-- 
cgit v1.2.3


From 8bd107633b64195a0748b05236c3d14db0a8bed4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 20:36:51 -0500
Subject: audit_log_{name,link_denied}: constify struct path

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h | 2 +-
 kernel/audit.c        | 4 ++--
 kernel/audit.h        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 9d4443f93db6..f51fca8d0b6f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -147,7 +147,7 @@ extern void		    audit_log_d_path(struct audit_buffer *ab,
 extern void		    audit_log_key(struct audit_buffer *ab,
 					  char *key);
 extern void		    audit_log_link_denied(const char *operation,
-						  struct path *link);
+						  const struct path *link);
 extern void		    audit_log_lost(const char *message);
 #ifdef CONFIG_SECURITY
 extern void 		    audit_log_secctx(struct audit_buffer *ab, u32 secid);
diff --git a/kernel/audit.c b/kernel/audit.c
index f1ca11613379..06008c422bd5 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1760,7 +1760,7 @@ void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
  * @call_panic: optional pointer to int that will be updated if secid fails
  */
 void audit_log_name(struct audit_context *context, struct audit_names *n,
-		    struct path *path, int record_num, int *call_panic)
+		    const struct path *path, int record_num, int *call_panic)
 {
 	struct audit_buffer *ab;
 	ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
@@ -1948,7 +1948,7 @@ EXPORT_SYMBOL(audit_log_task_info);
  * @operation: specific link operation
  * @link: the path that triggered the restriction
  */
-void audit_log_link_denied(const char *operation, struct path *link)
+void audit_log_link_denied(const char *operation, const struct path *link)
 {
 	struct audit_buffer *ab;
 	struct audit_names *name;
diff --git a/kernel/audit.h b/kernel/audit.h
index 431444c3708b..960d49c9db5e 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -212,7 +212,7 @@ extern void audit_copy_inode(struct audit_names *name,
 extern void audit_log_cap(struct audit_buffer *ab, char *prefix,
 			  kernel_cap_t *cap);
 extern void audit_log_name(struct audit_context *context,
-			   struct audit_names *n, struct path *path,
+			   struct audit_names *n, const struct path *path,
 			   int record_num, int *call_panic);
 
 extern int audit_pid;
-- 
cgit v1.2.3


From 71215a75ceddf38ba9d4563481da8dd943de10fc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 19:30:18 -0500
Subject: constify get_dcookie() and friends

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/oprofile/cell/spu_task_sync.c | 2 +-
 drivers/oprofile/buffer_sync.c             | 2 +-
 fs/dcookies.c                              | 4 ++--
 include/linux/dcookies.h                   | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 83d2b4ef7f0d..44d67b167e0b 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -295,7 +295,7 @@ out:
  * dcookie user still being registered (namely, the reader
  * of the event buffer).
  */
-static inline unsigned long fast_get_dcookie(struct path *path)
+static inline unsigned long fast_get_dcookie(const struct path *path)
 {
 	unsigned long cookie;
 
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 82f7000a285d..642478d35e99 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -206,7 +206,7 @@ void sync_stop(void)
  * because we cannot reach this code without at least one
  * dcookie user still being registered (namely, the reader
  * of the event buffer). */
-static inline unsigned long fast_get_dcookie(struct path *path)
+static inline unsigned long fast_get_dcookie(const struct path *path)
 {
 	unsigned long cookie;
 
diff --git a/fs/dcookies.c b/fs/dcookies.c
index ac44a69fbea9..a26a701ef512 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -90,7 +90,7 @@ static void hash_dcookie(struct dcookie_struct * dcs)
 }
 
 
-static struct dcookie_struct *alloc_dcookie(struct path *path)
+static struct dcookie_struct *alloc_dcookie(const struct path *path)
 {
 	struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache,
 							GFP_KERNEL);
@@ -113,7 +113,7 @@ static struct dcookie_struct *alloc_dcookie(struct path *path)
 /* This is the main kernel-side routine that retrieves the cookie
  * value for a dentry/vfsmnt pair.
  */
-int get_dcookie(struct path *path, unsigned long *cookie)
+int get_dcookie(const struct path *path, unsigned long *cookie)
 {
 	int err = 0;
 	struct dcookie_struct * dcs;
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h
index 5ac3bdd5cee6..699b6c499c4f 100644
--- a/include/linux/dcookies.h
+++ b/include/linux/dcookies.h
@@ -44,7 +44,7 @@ void dcookie_unregister(struct dcookie_user * user);
  *
  * Returns 0 on success, with *cookie filled in
  */
-int get_dcookie(struct path *path, unsigned long *cookie);
+int get_dcookie(const struct path *path, unsigned long *cookie);
 
 #else
 
@@ -58,7 +58,7 @@ static inline void dcookie_unregister(struct dcookie_user * user)
 	return;
 }
 
-static inline int get_dcookie(struct path *path, unsigned long *cookie)
+static inline int get_dcookie(const struct path *path, unsigned long *cookie)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.2.3


From a4141d7cf80fee99ace8d8a95dd358c98ad6ad69 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 20:28:12 -0500
Subject: constify alloc_file()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 2 +-
 include/linux/file.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index ad17e05ebf95..6d982b57de92 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -155,7 +155,7 @@ over:
  * @mode: the mode with which the new file will be opened
  * @fop: the 'struct file_operations' for the new file
  */
-struct file *alloc_file(struct path *path, fmode_t mode,
+struct file *alloc_file(const struct path *path, fmode_t mode,
 		const struct file_operations *fop)
 {
 	struct file *file;
diff --git a/include/linux/file.h b/include/linux/file.h
index 7444f5feda12..61eb82cbafba 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -17,7 +17,7 @@ struct file_operations;
 struct vfsmount;
 struct dentry;
 struct path;
-extern struct file *alloc_file(struct path *, fmode_t mode,
+extern struct file *alloc_file(const struct path *, fmode_t mode,
 	const struct file_operations *fop);
 
 static inline void fput_light(struct file *file, int fput_needed)
-- 
cgit v1.2.3


From 8c54ca9c6882f5a68d19a82fd063b74f91d4c22b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 19:49:34 -0500
Subject: quota: constify struct path in quota_on

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/super.c          | 4 ++--
 fs/quota/dquot.c         | 2 +-
 fs/quota/quota.c         | 4 ++--
 fs/reiserfs/super.c      | 4 ++--
 include/linux/quota.h    | 2 +-
 include/linux/quotaops.h | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 52b0530c5d65..2d97f7a29d09 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1187,7 +1187,7 @@ static int ext4_release_dquot(struct dquot *dquot);
 static int ext4_mark_dquot_dirty(struct dquot *dquot);
 static int ext4_write_info(struct super_block *sb, int type);
 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
-			 struct path *path);
+			 const struct path *path);
 static int ext4_quota_off(struct super_block *sb, int type);
 static int ext4_quota_on_mount(struct super_block *sb, int type);
 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
@@ -5239,7 +5239,7 @@ static void lockdep_set_quota_inode(struct inode *inode, int subclass)
  * Standard function to be called on quota_on
  */
 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
-			 struct path *path)
+			 const struct path *path)
 {
 	int err;
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 1bfac28b7e7d..8738a0d62c09 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2401,7 +2401,7 @@ int dquot_resume(struct super_block *sb, int type)
 EXPORT_SYMBOL(dquot_resume);
 
 int dquot_quota_on(struct super_block *sb, int type, int format_id,
-		   struct path *path)
+		   const struct path *path)
 {
 	int error = security_quota_on(path->dentry);
 	if (error)
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 2d445425aad7..5acd0c4769af 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -80,7 +80,7 @@ unsigned int qtype_enforce_flag(int type)
 }
 
 static int quota_quotaon(struct super_block *sb, int type, qid_t id,
-		         struct path *path)
+		         const struct path *path)
 {
 	if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable)
 		return -ENOSYS;
@@ -700,7 +700,7 @@ static int quota_rmxquota(struct super_block *sb, void __user *addr)
 
 /* Copy parameters and call proper function */
 static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
-		       void __user *addr, struct path *path)
+		       void __user *addr, const struct path *path)
 {
 	int ret;
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0a6ad4e71e88..e314cb30a181 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -802,7 +802,7 @@ static int reiserfs_acquire_dquot(struct dquot *);
 static int reiserfs_release_dquot(struct dquot *);
 static int reiserfs_mark_dquot_dirty(struct dquot *);
 static int reiserfs_write_info(struct super_block *, int);
-static int reiserfs_quota_on(struct super_block *, int, int, struct path *);
+static int reiserfs_quota_on(struct super_block *, int, int, const struct path *);
 
 static const struct dquot_operations reiserfs_quota_operations = {
 	.write_dquot = reiserfs_write_dquot,
@@ -2348,7 +2348,7 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type)
  * Standard function to be called on quota_on
  */
 static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
-			     struct path *path)
+			     const struct path *path)
 {
 	int err;
 	struct inode *inode;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 55107a8ff887..78a98821f9d0 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -431,7 +431,7 @@ struct qc_info {
 
 /* Operations handling requests from userspace */
 struct quotactl_ops {
-	int (*quota_on)(struct super_block *, int, int, struct path *);
+	int (*quota_on)(struct super_block *, int, int, const struct path *);
 	int (*quota_off)(struct super_block *, int);
 	int (*quota_enable)(struct super_block *, unsigned int);
 	int (*quota_disable)(struct super_block *, unsigned int);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f00fa86ac966..799a63d0e1a8 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -90,7 +90,7 @@ int dquot_file_open(struct inode *inode, struct file *file);
 int dquot_enable(struct inode *inode, int type, int format_id,
 	unsigned int flags);
 int dquot_quota_on(struct super_block *sb, int type, int format_id,
- 	struct path *path);
+	const struct path *path);
 int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int dquot_quota_off(struct super_block *sb, int type);
-- 
cgit v1.2.3


From ca71cf71eeda04dc9ad18271504e499013af5415 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 19:45:28 -0500
Subject: namespace.c: constify struct path passed to a bunch of primitives

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h         | 2 +-
 fs/namespace.c        | 8 ++++----
 include/linux/fs.h    | 2 +-
 include/linux/mount.h | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index f4da3341b4a3..3e460159d835 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -62,7 +62,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 extern void *copy_mount_options(const void __user *);
 extern char *copy_mount_string(const void __user *);
 
-extern struct vfsmount *lookup_mnt(struct path *);
+extern struct vfsmount *lookup_mnt(const struct path *);
 extern int finish_automount(struct vfsmount *, struct path *);
 
 extern int sb_prepare_remount_readonly(struct super_block *);
diff --git a/fs/namespace.c b/fs/namespace.c
index 4d80a5066a1f..9ad88a45b3e3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -678,7 +678,7 @@ out:
  *
  * lookup_mnt takes a reference to the found vfsmount.
  */
-struct vfsmount *lookup_mnt(struct path *path)
+struct vfsmount *lookup_mnt(const struct path *path)
 {
 	struct mount *child_mnt;
 	struct vfsmount *m;
@@ -1159,7 +1159,7 @@ struct vfsmount *mntget(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL(mntget);
 
-struct vfsmount *mnt_clone_internal(struct path *path)
+struct vfsmount *mnt_clone_internal(const struct path *path)
 {
 	struct mount *p;
 	p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
@@ -1758,7 +1758,7 @@ out:
 
 /* Caller should check returned pointer for errors */
 
-struct vfsmount *collect_mounts(struct path *path)
+struct vfsmount *collect_mounts(const struct path *path)
 {
 	struct mount *tree;
 	namespace_lock();
@@ -1791,7 +1791,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
  *
  * Release with mntput().
  */
-struct vfsmount *clone_private_mount(struct path *path)
+struct vfsmount *clone_private_mount(const struct path *path)
 {
 	struct mount *old_mnt = real_mount(path->mnt);
 	struct mount *new_mnt;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f96501b51c49..3056fe46f336 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2123,7 +2123,7 @@ extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern long do_mount(const char *, const char __user *,
 		     const char *, unsigned long, void *);
-extern struct vfsmount *collect_mounts(struct path *);
+extern struct vfsmount *collect_mounts(const struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
 			  struct vfsmount *);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1172cce949a4..cf2b5784b649 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -79,12 +79,12 @@ extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mnt_drop_write_file(struct file *file);
 extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
-extern struct vfsmount *mnt_clone_internal(struct path *path);
+extern struct vfsmount *mnt_clone_internal(const struct path *path);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 extern bool mnt_may_suid(struct vfsmount *mnt);
 
 struct path;
-extern struct vfsmount *clone_private_mount(struct path *path);
+extern struct vfsmount *clone_private_mount(const struct path *path);
 
 struct file_system_type;
 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
-- 
cgit v1.2.3


From f0bb5aaf2c51267c49ed5e2c6103df22acfe30f5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Nov 2016 20:27:12 -0500
Subject: vfs: misc struct path constification

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c         | 4 ++--
 fs/statfs.c        | 2 +-
 fs/utimes.c        | 2 +-
 include/linux/fs.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 5b4eed221530..1c8f4386b03f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2895,7 +2895,7 @@ bool may_open_dev(const struct path *path)
 		!(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
 }
 
-static int may_open(struct path *path, int acc_mode, int flag)
+static int may_open(const struct path *path, int acc_mode, int flag)
 {
 	struct dentry *dentry = path->dentry;
 	struct inode *inode = dentry->d_inode;
@@ -2945,7 +2945,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
 
 static int handle_truncate(struct file *filp)
 {
-	struct path *path = &filp->f_path;
+	const struct path *path = &filp->f_path;
 	struct inode *inode = path->dentry->d_inode;
 	int error = get_write_access(inode);
 	if (error)
diff --git a/fs/statfs.c b/fs/statfs.c
index 083dc0ac9140..13ae259d4879 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -63,7 +63,7 @@ static int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
 	return retval;
 }
 
-int vfs_statfs(struct path *path, struct kstatfs *buf)
+int vfs_statfs(const struct path *path, struct kstatfs *buf)
 {
 	int error;
 
diff --git a/fs/utimes.c b/fs/utimes.c
index 22307cdf7014..5fdb505e307c 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -48,7 +48,7 @@ static bool nsec_valid(long nsec)
 	return nsec >= 0 && nsec <= 999999999;
 }
 
-static int utimes_common(struct path *path, struct timespec *times)
+static int utimes_common(const struct path *path, struct timespec *times)
 {
 	int error;
 	struct iattr newattrs;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3056fe46f336..0e177d395efb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2127,7 +2127,7 @@ extern struct vfsmount *collect_mounts(const struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
 			  struct vfsmount *);
-extern int vfs_statfs(struct path *, struct kstatfs *);
+extern int vfs_statfs(const struct path *, struct kstatfs *);
 extern int user_statfs(const char __user *, struct kstatfs *);
 extern int fd_statfs(int, struct kstatfs *);
 extern int vfs_ustat(dev_t, struct kstatfs *);
-- 
cgit v1.2.3


From 0f0fe7e01327b3d524787a2e8b7e78f010db2bb8 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 1 Dec 2016 16:28:17 +0530
Subject: PM / OPP: Manage supply's voltage/current in a separate structure

This is a preparatory step for multiple regulator per device support.
Move the voltage/current variables to a new structure.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Dave Gerlach <d-gerlach@ti.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c    | 44 +++++++++++++++++++++-------------------
 drivers/base/power/opp/debugfs.c |  8 ++++----
 drivers/base/power/opp/of.c      | 18 ++++++++--------
 drivers/base/power/opp/opp.h     | 11 +++-------
 include/linux/pm_opp.h           | 16 +++++++++++++++
 5 files changed, 55 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 6441dfda489f..188048d3d203 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -112,7 +112,7 @@ unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 	if (IS_ERR_OR_NULL(tmp_opp))
 		pr_err("%s: Invalid parameters\n", __func__);
 	else
-		v = tmp_opp->u_volt;
+		v = tmp_opp->supply.u_volt;
 
 	return v;
 }
@@ -246,10 +246,10 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 		if (!opp->available)
 			continue;
 
-		if (opp->u_volt_min < min_uV)
-			min_uV = opp->u_volt_min;
-		if (opp->u_volt_max > max_uV)
-			max_uV = opp->u_volt_max;
+		if (opp->supply.u_volt_min < min_uV)
+			min_uV = opp->supply.u_volt_min;
+		if (opp->supply.u_volt_max > max_uV)
+			max_uV = opp->supply.u_volt_max;
 	}
 
 	rcu_read_unlock();
@@ -637,14 +637,14 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 	if (IS_ERR(old_opp)) {
 		old_u_volt = 0;
 	} else {
-		old_u_volt = old_opp->u_volt;
-		old_u_volt_min = old_opp->u_volt_min;
-		old_u_volt_max = old_opp->u_volt_max;
+		old_u_volt = old_opp->supply.u_volt;
+		old_u_volt_min = old_opp->supply.u_volt_min;
+		old_u_volt_max = old_opp->supply.u_volt_max;
 	}
 
-	u_volt = opp->u_volt;
-	u_volt_min = opp->u_volt_min;
-	u_volt_max = opp->u_volt_max;
+	u_volt = opp->supply.u_volt;
+	u_volt_min = opp->supply.u_volt_min;
+	u_volt_max = opp->supply.u_volt_max;
 
 	reg = opp_table->regulator;
 
@@ -957,10 +957,11 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
 	struct regulator *reg = opp_table->regulator;
 
 	if (!IS_ERR(reg) &&
-	    !regulator_is_supported_voltage(reg, opp->u_volt_min,
-					    opp->u_volt_max)) {
+	    !regulator_is_supported_voltage(reg, opp->supply.u_volt_min,
+					    opp->supply.u_volt_max)) {
 		pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n",
-			__func__, opp->u_volt_min, opp->u_volt_max);
+			__func__, opp->supply.u_volt_min,
+			opp->supply.u_volt_max);
 		return false;
 	}
 
@@ -993,11 +994,12 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 
 		/* Duplicate OPPs */
 		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
-			 __func__, opp->rate, opp->u_volt, opp->available,
-			 new_opp->rate, new_opp->u_volt, new_opp->available);
+			 __func__, opp->rate, opp->supply.u_volt,
+			 opp->available, new_opp->rate, new_opp->supply.u_volt,
+			 new_opp->available);
 
-		return opp->available && new_opp->u_volt == opp->u_volt ?
-			0 : -EEXIST;
+		return opp->available &&
+		       new_opp->supply.u_volt == opp->supply.u_volt ? 0 : -EEXIST;
 	}
 
 	new_opp->opp_table = opp_table;
@@ -1064,9 +1066,9 @@ int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
 	/* populate the opp table */
 	new_opp->rate = freq;
 	tol = u_volt * opp_table->voltage_tolerance_v1 / 100;
-	new_opp->u_volt = u_volt;
-	new_opp->u_volt_min = u_volt - tol;
-	new_opp->u_volt_max = u_volt + tol;
+	new_opp->supply.u_volt = u_volt;
+	new_opp->supply.u_volt_min = u_volt - tol;
+	new_opp->supply.u_volt_max = u_volt + tol;
 	new_opp->available = true;
 	new_opp->dynamic = dynamic;
 
diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c
index ef1ae6b52042..c897676ca35f 100644
--- a/drivers/base/power/opp/debugfs.c
+++ b/drivers/base/power/opp/debugfs.c
@@ -63,16 +63,16 @@ int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table)
 	if (!debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate))
 		return -ENOMEM;
 
-	if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d, &opp->u_volt))
+	if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d, &opp->supply.u_volt))
 		return -ENOMEM;
 
-	if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d, &opp->u_volt_min))
+	if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d, &opp->supply.u_volt_min))
 		return -ENOMEM;
 
-	if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d, &opp->u_volt_max))
+	if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d, &opp->supply.u_volt_max))
 		return -ENOMEM;
 
-	if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->u_amp))
+	if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->supply.u_amp))
 		return -ENOMEM;
 
 	if (!debugfs_create_ulong("clock_latency_ns", S_IRUGO, d,
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
index 5b3755e49731..bdf409d42126 100644
--- a/drivers/base/power/opp/of.c
+++ b/drivers/base/power/opp/of.c
@@ -148,14 +148,14 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 		return -EINVAL;
 	}
 
-	opp->u_volt = microvolt[0];
+	opp->supply.u_volt = microvolt[0];
 
 	if (count == 1) {
-		opp->u_volt_min = opp->u_volt;
-		opp->u_volt_max = opp->u_volt;
+		opp->supply.u_volt_min = opp->supply.u_volt;
+		opp->supply.u_volt_max = opp->supply.u_volt;
 	} else {
-		opp->u_volt_min = microvolt[1];
-		opp->u_volt_max = microvolt[2];
+		opp->supply.u_volt_min = microvolt[1];
+		opp->supply.u_volt_max = microvolt[2];
 	}
 
 	/* Search for "opp-microamp-<name>" */
@@ -173,7 +173,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 	}
 
 	if (prop && !of_property_read_u32(opp->np, name, &val))
-		opp->u_amp = val;
+		opp->supply.u_amp = val;
 
 	return 0;
 }
@@ -303,9 +303,9 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 	mutex_unlock(&opp_table_lock);
 
 	pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
-		 __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
-		 new_opp->u_volt_min, new_opp->u_volt_max,
-		 new_opp->clock_latency_ns);
+		 __func__, new_opp->turbo, new_opp->rate,
+		 new_opp->supply.u_volt, new_opp->supply.u_volt_min,
+		 new_opp->supply.u_volt_max, new_opp->clock_latency_ns);
 
 	/*
 	 * Notify the changes in the availability of the operable
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 96cd30ac6c1d..8a02516542c2 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -61,10 +61,7 @@ extern struct list_head opp_tables;
  * @turbo:	true if turbo (boost) OPP
  * @suspend:	true if suspend OPP
  * @rate:	Frequency in hertz
- * @u_volt:	Target voltage in microvolts corresponding to this OPP
- * @u_volt_min:	Minimum voltage in microvolts corresponding to this OPP
- * @u_volt_max:	Maximum voltage in microvolts corresponding to this OPP
- * @u_amp:	Maximum current drawn by the device in microamperes
+ * @supply:	Power supply voltage/current values
  * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's
  *		frequency from any other OPP's frequency.
  * @opp_table:	points back to the opp_table struct this opp belongs to
@@ -83,10 +80,8 @@ struct dev_pm_opp {
 	bool suspend;
 	unsigned long rate;
 
-	unsigned long u_volt;
-	unsigned long u_volt_min;
-	unsigned long u_volt_max;
-	unsigned long u_amp;
+	struct dev_pm_opp_supply supply;
+
 	unsigned long clock_latency_ns;
 
 	struct opp_table *opp_table;
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index f6bc76501912..824f7268f687 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -25,6 +25,22 @@ enum dev_pm_opp_event {
 	OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
 };
 
+/**
+ * struct dev_pm_opp_supply - Power supply voltage/current values
+ * @u_volt:	Target voltage in microvolts corresponding to this OPP
+ * @u_volt_min:	Minimum voltage in microvolts corresponding to this OPP
+ * @u_volt_max:	Maximum voltage in microvolts corresponding to this OPP
+ * @u_amp:	Maximum current drawn by the device in microamperes
+ *
+ * This structure stores the voltage/current values for a single power supply.
+ */
+struct dev_pm_opp_supply {
+	unsigned long u_volt;
+	unsigned long u_volt_min;
+	unsigned long u_volt_max;
+	unsigned long u_amp;
+};
+
 #if defined(CONFIG_PM_OPP)
 
 unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
-- 
cgit v1.2.3


From dfbe4678d709e25e0f36e6b6333e2a7a67aefb7e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 1 Dec 2016 16:28:19 +0530
Subject: PM / OPP: Add infrastructure to manage multiple regulators

This patch adds infrastructure to manage multiple regulators and updates
the only user (cpufreq-dt) of dev_pm_opp_set{put}_regulator().

This is preparatory work for adding full support for devices with
multiple regulators.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c    | 248 +++++++++++++++++++++++++++------------
 drivers/base/power/opp/debugfs.c |  52 ++++++--
 drivers/base/power/opp/of.c      | 103 +++++++++++-----
 drivers/base/power/opp/opp.h     |  10 +-
 drivers/cpufreq/cpufreq-dt.c     |   6 +-
 include/linux/pm_opp.h           |   8 +-
 6 files changed, 300 insertions(+), 127 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 46ad8470c438..b4da31c5a5eb 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -93,6 +93,8 @@ struct opp_table *_find_opp_table(struct device *dev)
  * Return: voltage in micro volt corresponding to the opp, else
  * return 0
  *
+ * This is useful only for devices with single power supply.
+ *
  * Locking: This function must be called under rcu_read_lock(). opp is a rcu
  * protected pointer. This means that opp which could have been fetched by
  * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
@@ -112,7 +114,7 @@ unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 	if (IS_ERR_OR_NULL(tmp_opp))
 		pr_err("%s: Invalid parameters\n", __func__);
 	else
-		v = tmp_opp->supply.u_volt;
+		v = tmp_opp->supplies[0].u_volt;
 
 	return v;
 }
@@ -210,6 +212,24 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
 
+static int _get_regulator_count(struct device *dev)
+{
+	struct opp_table *opp_table;
+	int count;
+
+	rcu_read_lock();
+
+	opp_table = _find_opp_table(dev);
+	if (!IS_ERR(opp_table))
+		count = opp_table->regulator_count;
+	else
+		count = 0;
+
+	rcu_read_unlock();
+
+	return count;
+}
+
 /**
  * dev_pm_opp_get_max_volt_latency() - Get max voltage latency in nanoseconds
  * @dev: device for which we do this operation
@@ -222,34 +242,51 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 {
 	struct opp_table *opp_table;
 	struct dev_pm_opp *opp;
-	struct regulator *reg;
+	struct regulator *reg, **regulators;
 	unsigned long latency_ns = 0;
-	unsigned long min_uV = ~0, max_uV = 0;
-	int ret;
+	int ret, i, count;
+	struct {
+		unsigned long min;
+		unsigned long max;
+	} *uV;
+
+	count = _get_regulator_count(dev);
+
+	/* Regulator may not be required for the device */
+	if (!count)
+		return 0;
+
+	regulators = kmalloc_array(count, sizeof(*regulators), GFP_KERNEL);
+	if (!regulators)
+		return 0;
+
+	uV = kmalloc_array(count, sizeof(*uV), GFP_KERNEL);
+	if (!uV)
+		goto free_regulators;
 
 	rcu_read_lock();
 
 	opp_table = _find_opp_table(dev);
 	if (IS_ERR(opp_table)) {
 		rcu_read_unlock();
-		return 0;
+		goto free_uV;
 	}
 
-	reg = opp_table->regulator;
-	if (IS_ERR(reg)) {
-		/* Regulator may not be required for device */
-		rcu_read_unlock();
-		return 0;
-	}
+	memcpy(regulators, opp_table->regulators, count * sizeof(*regulators));
 
-	list_for_each_entry_rcu(opp, &opp_table->opp_list, node) {
-		if (!opp->available)
-			continue;
+	for (i = 0; i < count; i++) {
+		uV[i].min = ~0;
+		uV[i].max = 0;
 
-		if (opp->supply.u_volt_min < min_uV)
-			min_uV = opp->supply.u_volt_min;
-		if (opp->supply.u_volt_max > max_uV)
-			max_uV = opp->supply.u_volt_max;
+		list_for_each_entry_rcu(opp, &opp_table->opp_list, node) {
+			if (!opp->available)
+				continue;
+
+			if (opp->supplies[i].u_volt_min < uV[i].min)
+				uV[i].min = opp->supplies[i].u_volt_min;
+			if (opp->supplies[i].u_volt_max > uV[i].max)
+				uV[i].max = opp->supplies[i].u_volt_max;
+		}
 	}
 
 	rcu_read_unlock();
@@ -258,9 +295,16 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 	 * The caller needs to ensure that opp_table (and hence the regulator)
 	 * isn't freed, while we are executing this routine.
 	 */
-	ret = regulator_set_voltage_time(reg, min_uV, max_uV);
-	if (ret > 0)
-		latency_ns = ret * 1000;
+	for (i = 0; reg = regulators[i], i < count; i++) {
+		ret = regulator_set_voltage_time(reg, uV[i].min, uV[i].max);
+		if (ret > 0)
+			latency_ns += ret * 1000;
+	}
+
+free_uV:
+	kfree(uV);
+free_regulators:
+	kfree(regulators);
 
 	return latency_ns;
 }
@@ -580,7 +624,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
 	struct opp_table *opp_table;
 	struct dev_pm_opp *old_opp, *opp;
-	struct regulator *reg;
+	struct regulator *reg = ERR_PTR(-ENXIO);
 	struct clk *clk;
 	unsigned long freq, old_freq;
 	struct dev_pm_opp_supply old_supply, new_supply;
@@ -633,14 +677,23 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 		return ret;
 	}
 
+	if (opp_table->regulators) {
+		/* This function only supports single regulator per device */
+		if (WARN_ON(opp_table->regulator_count > 1)) {
+			dev_err(dev, "multiple regulators not supported\n");
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+
+		reg = opp_table->regulators[0];
+	}
+
 	if (IS_ERR(old_opp))
 		old_supply.u_volt = 0;
 	else
-		memcpy(&old_supply, &old_opp->supply, sizeof(old_supply));
-
-	memcpy(&new_supply, &opp->supply, sizeof(new_supply));
+		memcpy(&old_supply, old_opp->supplies, sizeof(old_supply));
 
-	reg = opp_table->regulator;
+	memcpy(&new_supply, opp->supplies, sizeof(new_supply));
 
 	rcu_read_unlock();
 
@@ -764,9 +817,6 @@ static struct opp_table *_add_opp_table(struct device *dev)
 
 	_of_init_opp_table(opp_table, dev);
 
-	/* Set regulator to a non-NULL error value */
-	opp_table->regulator = ERR_PTR(-ENXIO);
-
 	/* Find clk for the device */
 	opp_table->clk = clk_get(dev, NULL);
 	if (IS_ERR(opp_table->clk)) {
@@ -815,7 +865,7 @@ static void _remove_opp_table(struct opp_table *opp_table)
 	if (opp_table->prop_name)
 		return;
 
-	if (!IS_ERR(opp_table->regulator))
+	if (opp_table->regulators)
 		return;
 
 	/* Release clk */
@@ -924,35 +974,50 @@ struct dev_pm_opp *_allocate_opp(struct device *dev,
 				 struct opp_table **opp_table)
 {
 	struct dev_pm_opp *opp;
+	int count, supply_size;
+	struct opp_table *table;
 
-	/* allocate new OPP node */
-	opp = kzalloc(sizeof(*opp), GFP_KERNEL);
-	if (!opp)
+	table = _add_opp_table(dev);
+	if (!table)
 		return NULL;
 
-	INIT_LIST_HEAD(&opp->node);
+	/* Allocate space for at least one supply */
+	count = table->regulator_count ? table->regulator_count : 1;
+	supply_size = sizeof(*opp->supplies) * count;
 
-	*opp_table = _add_opp_table(dev);
-	if (!*opp_table) {
-		kfree(opp);
+	/* allocate new OPP node and supplies structures */
+	opp = kzalloc(sizeof(*opp) + supply_size, GFP_KERNEL);
+	if (!opp) {
+		kfree(table);
 		return NULL;
 	}
 
+	/* Put the supplies at the end of the OPP structure as an empty array */
+	opp->supplies = (struct dev_pm_opp_supply *)(opp + 1);
+	INIT_LIST_HEAD(&opp->node);
+
+	*opp_table = table;
+
 	return opp;
 }
 
 static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
 					 struct opp_table *opp_table)
 {
-	struct regulator *reg = opp_table->regulator;
-
-	if (!IS_ERR(reg) &&
-	    !regulator_is_supported_voltage(reg, opp->supply.u_volt_min,
-					    opp->supply.u_volt_max)) {
-		pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n",
-			__func__, opp->supply.u_volt_min,
-			opp->supply.u_volt_max);
-		return false;
+	struct regulator *reg;
+	int i;
+
+	for (i = 0; i < opp_table->regulator_count; i++) {
+		reg = opp_table->regulators[i];
+
+		if (!regulator_is_supported_voltage(reg,
+					opp->supplies[i].u_volt_min,
+					opp->supplies[i].u_volt_max)) {
+			pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n",
+				__func__, opp->supplies[i].u_volt_min,
+				opp->supplies[i].u_volt_max);
+			return false;
+		}
 	}
 
 	return true;
@@ -984,12 +1049,13 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 
 		/* Duplicate OPPs */
 		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
-			 __func__, opp->rate, opp->supply.u_volt,
-			 opp->available, new_opp->rate, new_opp->supply.u_volt,
-			 new_opp->available);
+			 __func__, opp->rate, opp->supplies[0].u_volt,
+			 opp->available, new_opp->rate,
+			 new_opp->supplies[0].u_volt, new_opp->available);
 
+		/* Should we compare voltages for all regulators here ? */
 		return opp->available &&
-		       new_opp->supply.u_volt == opp->supply.u_volt ? 0 : -EEXIST;
+		       new_opp->supplies[0].u_volt == opp->supplies[0].u_volt ? 0 : -EEXIST;
 	}
 
 	new_opp->opp_table = opp_table;
@@ -1056,9 +1122,9 @@ int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
 	/* populate the opp table */
 	new_opp->rate = freq;
 	tol = u_volt * opp_table->voltage_tolerance_v1 / 100;
-	new_opp->supply.u_volt = u_volt;
-	new_opp->supply.u_volt_min = u_volt - tol;
-	new_opp->supply.u_volt_max = u_volt + tol;
+	new_opp->supplies[0].u_volt = u_volt;
+	new_opp->supplies[0].u_volt_min = u_volt - tol;
+	new_opp->supplies[0].u_volt_max = u_volt + tol;
 	new_opp->available = true;
 	new_opp->dynamic = dynamic;
 
@@ -1303,12 +1369,14 @@ unlock:
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
 
 /**
- * dev_pm_opp_set_regulator() - Set regulator name for the device
+ * dev_pm_opp_set_regulators() - Set regulator names for the device
  * @dev: Device for which regulator name is being set.
- * @name: Name of the regulator.
+ * @names: Array of pointers to the names of the regulator.
+ * @count: Number of regulators.
  *
  * In order to support OPP switching, OPP layer needs to know the name of the
- * device's regulator, as the core would be required to switch voltages as well.
+ * device's regulators, as the core would be required to switch voltages as
+ * well.
  *
  * This must be called before any OPPs are initialized for the device.
  *
@@ -1318,11 +1386,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name)
+struct opp_table *dev_pm_opp_set_regulators(struct device *dev,
+					    const char * const names[],
+					    unsigned int count)
 {
 	struct opp_table *opp_table;
 	struct regulator *reg;
-	int ret;
+	int ret, i;
 
 	mutex_lock(&opp_table_lock);
 
@@ -1338,26 +1408,44 @@ struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name)
 		goto err;
 	}
 
-	/* Already have a regulator set */
-	if (WARN_ON(!IS_ERR(opp_table->regulator))) {
+	/* Already have regulators set */
+	if (WARN_ON(opp_table->regulators)) {
 		ret = -EBUSY;
 		goto err;
 	}
-	/* Allocate the regulator */
-	reg = regulator_get_optional(dev, name);
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
-		if (ret != -EPROBE_DEFER)
-			dev_err(dev, "%s: no regulator (%s) found: %d\n",
-				__func__, name, ret);
+
+	opp_table->regulators = kmalloc_array(count,
+					      sizeof(*opp_table->regulators),
+					      GFP_KERNEL);
+	if (!opp_table->regulators) {
+		ret = -ENOMEM;
 		goto err;
 	}
 
-	opp_table->regulator = reg;
+	for (i = 0; i < count; i++) {
+		reg = regulator_get_optional(dev, names[i]);
+		if (IS_ERR(reg)) {
+			ret = PTR_ERR(reg);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "%s: no regulator (%s) found: %d\n",
+					__func__, names[i], ret);
+			goto free_regulators;
+		}
+
+		opp_table->regulators[i] = reg;
+	}
+
+	opp_table->regulator_count = count;
 
 	mutex_unlock(&opp_table_lock);
 	return opp_table;
 
+free_regulators:
+	while (i != 0)
+		regulator_put(opp_table->regulators[--i]);
+
+	kfree(opp_table->regulators);
+	opp_table->regulators = NULL;
 err:
 	_remove_opp_table(opp_table);
 unlock:
@@ -1365,11 +1453,11 @@ unlock:
 
 	return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulators);
 
 /**
- * dev_pm_opp_put_regulator() - Releases resources blocked for regulator
- * @opp_table: OPP table returned from dev_pm_opp_set_regulator().
+ * dev_pm_opp_put_regulators() - Releases resources blocked for regulator
+ * @opp_table: OPP table returned from dev_pm_opp_set_regulators().
  *
  * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
@@ -1377,20 +1465,26 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-void dev_pm_opp_put_regulator(struct opp_table *opp_table)
+void dev_pm_opp_put_regulators(struct opp_table *opp_table)
 {
+	int i;
+
 	mutex_lock(&opp_table_lock);
 
-	if (IS_ERR(opp_table->regulator)) {
-		pr_err("%s: Doesn't have regulator set\n", __func__);
+	if (!opp_table->regulators) {
+		pr_err("%s: Doesn't have regulators set\n", __func__);
 		goto unlock;
 	}
 
 	/* Make sure there are no concurrent readers while updating opp_table */
 	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	regulator_put(opp_table->regulator);
-	opp_table->regulator = ERR_PTR(-ENXIO);
+	for (i = opp_table->regulator_count - 1; i >= 0; i--)
+		regulator_put(opp_table->regulators[i]);
+
+	kfree(opp_table->regulators);
+	opp_table->regulators = NULL;
+	opp_table->regulator_count = 0;
 
 	/* Try freeing opp_table if this was the last blocking resource */
 	_remove_opp_table(opp_table);
@@ -1398,7 +1492,7 @@ void dev_pm_opp_put_regulator(struct opp_table *opp_table)
 unlock:
 	mutex_unlock(&opp_table_lock);
 }
-EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulators);
 
 /**
  * dev_pm_opp_add()  - Add an OPP table from a table definitions
diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c
index c897676ca35f..95f433db4ac7 100644
--- a/drivers/base/power/opp/debugfs.c
+++ b/drivers/base/power/opp/debugfs.c
@@ -15,6 +15,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/limits.h>
+#include <linux/slab.h>
 
 #include "opp.h"
 
@@ -34,6 +35,46 @@ void opp_debug_remove_one(struct dev_pm_opp *opp)
 	debugfs_remove_recursive(opp->dentry);
 }
 
+static bool opp_debug_create_supplies(struct dev_pm_opp *opp,
+				      struct opp_table *opp_table,
+				      struct dentry *pdentry)
+{
+	struct dentry *d;
+	int i = 0;
+	char *name;
+
+	/* Always create at least supply-0 directory */
+	do {
+		name = kasprintf(GFP_KERNEL, "supply-%d", i);
+
+		/* Create per-opp directory */
+		d = debugfs_create_dir(name, pdentry);
+
+		kfree(name);
+
+		if (!d)
+			return false;
+
+		if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d,
+					  &opp->supplies[i].u_volt))
+			return false;
+
+		if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d,
+					  &opp->supplies[i].u_volt_min))
+			return false;
+
+		if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d,
+					  &opp->supplies[i].u_volt_max))
+			return false;
+
+		if (!debugfs_create_ulong("u_amp", S_IRUGO, d,
+					  &opp->supplies[i].u_amp))
+			return false;
+	} while (++i < opp_table->regulator_count);
+
+	return true;
+}
+
 int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table)
 {
 	struct dentry *pdentry = opp_table->dentry;
@@ -63,16 +104,7 @@ int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table)
 	if (!debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate))
 		return -ENOMEM;
 
-	if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d, &opp->supply.u_volt))
-		return -ENOMEM;
-
-	if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d, &opp->supply.u_volt_min))
-		return -ENOMEM;
-
-	if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d, &opp->supply.u_volt_max))
-		return -ENOMEM;
-
-	if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->supply.u_amp))
+	if (!opp_debug_create_supplies(opp, opp_table, d))
 		return -ENOMEM;
 
 	if (!debugfs_create_ulong("clock_latency_ns", S_IRUGO, d,
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
index bdf409d42126..3f7d2591b173 100644
--- a/drivers/base/power/opp/of.c
+++ b/drivers/base/power/opp/of.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/device.h>
 #include <linux/of.h>
+#include <linux/slab.h>
 #include <linux/export.h>
 
 #include "opp.h"
@@ -101,16 +102,16 @@ static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
 	return true;
 }
 
-/* TODO: Support multiple regulators */
 static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 			      struct opp_table *opp_table)
 {
-	u32 microvolt[3] = {0};
-	u32 val;
-	int count, ret;
+	u32 *microvolt, *microamp = NULL;
+	int supplies, vcount, icount, ret, i, j;
 	struct property *prop = NULL;
 	char name[NAME_MAX];
 
+	supplies = opp_table->regulator_count ? opp_table->regulator_count : 1;
+
 	/* Search for "opp-microvolt-<name>" */
 	if (opp_table->prop_name) {
 		snprintf(name, sizeof(name), "opp-microvolt-%s",
@@ -128,34 +129,29 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 			return 0;
 	}
 
-	count = of_property_count_u32_elems(opp->np, name);
-	if (count < 0) {
+	vcount = of_property_count_u32_elems(opp->np, name);
+	if (vcount < 0) {
 		dev_err(dev, "%s: Invalid %s property (%d)\n",
-			__func__, name, count);
-		return count;
+			__func__, name, vcount);
+		return vcount;
 	}
 
-	/* There can be one or three elements here */
-	if (count != 1 && count != 3) {
-		dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
-			__func__, name, count);
+	/* There can be one or three elements per supply */
+	if (vcount != supplies && vcount != supplies * 3) {
+		dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n",
+			__func__, name, vcount, supplies);
 		return -EINVAL;
 	}
 
-	ret = of_property_read_u32_array(opp->np, name, microvolt, count);
+	microvolt = kmalloc_array(vcount, sizeof(*microvolt), GFP_KERNEL);
+	if (!microvolt)
+		return -ENOMEM;
+
+	ret = of_property_read_u32_array(opp->np, name, microvolt, vcount);
 	if (ret) {
 		dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
-		return -EINVAL;
-	}
-
-	opp->supply.u_volt = microvolt[0];
-
-	if (count == 1) {
-		opp->supply.u_volt_min = opp->supply.u_volt;
-		opp->supply.u_volt_max = opp->supply.u_volt;
-	} else {
-		opp->supply.u_volt_min = microvolt[1];
-		opp->supply.u_volt_max = microvolt[2];
+		ret = -EINVAL;
+		goto free_microvolt;
 	}
 
 	/* Search for "opp-microamp-<name>" */
@@ -172,10 +168,59 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 		prop = of_find_property(opp->np, name, NULL);
 	}
 
-	if (prop && !of_property_read_u32(opp->np, name, &val))
-		opp->supply.u_amp = val;
+	if (prop) {
+		icount = of_property_count_u32_elems(opp->np, name);
+		if (icount < 0) {
+			dev_err(dev, "%s: Invalid %s property (%d)\n", __func__,
+				name, icount);
+			ret = icount;
+			goto free_microvolt;
+		}
 
-	return 0;
+		if (icount != supplies) {
+			dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n",
+				__func__, name, icount, supplies);
+			ret = -EINVAL;
+			goto free_microvolt;
+		}
+
+		microamp = kmalloc_array(icount, sizeof(*microamp), GFP_KERNEL);
+		if (!microamp) {
+			ret = -EINVAL;
+			goto free_microvolt;
+		}
+
+		ret = of_property_read_u32_array(opp->np, name, microamp,
+						 icount);
+		if (ret) {
+			dev_err(dev, "%s: error parsing %s: %d\n", __func__,
+				name, ret);
+			ret = -EINVAL;
+			goto free_microamp;
+		}
+	}
+
+	for (i = 0, j = 0; i < supplies; i++) {
+		opp->supplies[i].u_volt = microvolt[j++];
+
+		if (vcount == supplies) {
+			opp->supplies[i].u_volt_min = opp->supplies[i].u_volt;
+			opp->supplies[i].u_volt_max = opp->supplies[i].u_volt;
+		} else {
+			opp->supplies[i].u_volt_min = microvolt[j++];
+			opp->supplies[i].u_volt_max = microvolt[j++];
+		}
+
+		if (microamp)
+			opp->supplies[i].u_amp = microamp[i];
+	}
+
+free_microamp:
+	kfree(microamp);
+free_microvolt:
+	kfree(microvolt);
+
+	return ret;
 }
 
 /**
@@ -304,8 +349,8 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 
 	pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
 		 __func__, new_opp->turbo, new_opp->rate,
-		 new_opp->supply.u_volt, new_opp->supply.u_volt_min,
-		 new_opp->supply.u_volt_max, new_opp->clock_latency_ns);
+		 new_opp->supplies[0].u_volt, new_opp->supplies[0].u_volt_min,
+		 new_opp->supplies[0].u_volt_max, new_opp->clock_latency_ns);
 
 	/*
 	 * Notify the changes in the availability of the operable
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 8a02516542c2..5b0f7e53bede 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -61,7 +61,7 @@ extern struct list_head opp_tables;
  * @turbo:	true if turbo (boost) OPP
  * @suspend:	true if suspend OPP
  * @rate:	Frequency in hertz
- * @supply:	Power supply voltage/current values
+ * @supplies:	Power supplies voltage/current values
  * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's
  *		frequency from any other OPP's frequency.
  * @opp_table:	points back to the opp_table struct this opp belongs to
@@ -80,7 +80,7 @@ struct dev_pm_opp {
 	bool suspend;
 	unsigned long rate;
 
-	struct dev_pm_opp_supply supply;
+	struct dev_pm_opp_supply *supplies;
 
 	unsigned long clock_latency_ns;
 
@@ -139,7 +139,8 @@ enum opp_table_access {
  * @supported_hw_count: Number of elements in supported_hw array.
  * @prop_name: A name to postfix to many DT properties, while parsing them.
  * @clk: Device's clock handle
- * @regulator: Supply regulator
+ * @regulators: Supply regulators
+ * @regulator_count: Number of power supply regulators
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *
@@ -174,7 +175,8 @@ struct opp_table {
 	unsigned int supported_hw_count;
 	const char *prop_name;
 	struct clk *clk;
-	struct regulator *regulator;
+	struct regulator **regulators;
+	unsigned int regulator_count;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 4d3ec92cbabf..269013311e79 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -188,7 +188,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	 */
 	name = find_supply_name(cpu_dev);
 	if (name) {
-		opp_table = dev_pm_opp_set_regulator(cpu_dev, name);
+		opp_table = dev_pm_opp_set_regulators(cpu_dev, &name, 1);
 		if (IS_ERR(opp_table)) {
 			ret = PTR_ERR(opp_table);
 			dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n",
@@ -289,7 +289,7 @@ out_free_priv:
 out_free_opp:
 	dev_pm_opp_of_cpumask_remove_table(policy->cpus);
 	if (name)
-		dev_pm_opp_put_regulator(opp_table);
+		dev_pm_opp_put_regulators(opp_table);
 out_put_clk:
 	clk_put(cpu_clk);
 
@@ -304,7 +304,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
 	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	if (priv->reg_name)
-		dev_pm_opp_put_regulator(priv->opp_table);
+		dev_pm_opp_put_regulators(priv->opp_table);
 
 	clk_put(policy->clk);
 	kfree(priv);
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 824f7268f687..9a825ae78653 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -79,8 +79,8 @@ int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
 void dev_pm_opp_put_supported_hw(struct device *dev);
 int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
 void dev_pm_opp_put_prop_name(struct device *dev);
-struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name);
-void dev_pm_opp_put_regulator(struct opp_table *opp_table);
+struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count);
+void dev_pm_opp_put_regulators(struct opp_table *opp_table);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
@@ -187,12 +187,12 @@ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 
 static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
 
-static inline struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name)
+static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count)
 {
 	return ERR_PTR(-ENOTSUPP);
 }
 
-static inline void dev_pm_opp_put_regulator(struct opp_table *opp_table) {}
+static inline void dev_pm_opp_put_regulators(struct opp_table *opp_table) {}
 
 static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
-- 
cgit v1.2.3


From 947355850fcb3bb6549294316667d0f53bc03082 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 1 Dec 2016 16:28:20 +0530
Subject: PM / OPP: Separate out _generic_set_opp()

Later patches would add support for custom set_opp() callbacks. This
patch separates out the code for _generic_set_opp() handler in order to
prepare for that.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Dave Gerlach <d-gerlach@ti.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 181 +++++++++++++++++++++++++++++-------------
 drivers/base/power/opp/opp.h  |   3 +
 include/linux/pm_opp.h        |  35 ++++++++
 3 files changed, 166 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index b4da31c5a5eb..e33198ce41b4 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -610,6 +610,69 @@ static int _set_opp_voltage(struct device *dev, struct regulator *reg,
 	return ret;
 }
 
+static inline int
+_generic_set_opp_clk_only(struct device *dev, struct clk *clk,
+			  unsigned long old_freq, unsigned long freq)
+{
+	int ret;
+
+	ret = clk_set_rate(clk, freq);
+	if (ret) {
+		dev_err(dev, "%s: failed to set clock rate: %d\n", __func__,
+			ret);
+	}
+
+	return ret;
+}
+
+static int _generic_set_opp(struct dev_pm_set_opp_data *data)
+{
+	struct dev_pm_opp_supply *old_supply = data->old_opp.supplies;
+	struct dev_pm_opp_supply *new_supply = data->new_opp.supplies;
+	unsigned long old_freq = data->old_opp.rate, freq = data->new_opp.rate;
+	struct regulator *reg = data->regulators[0];
+	struct device *dev= data->dev;
+	int ret;
+
+	/* This function only supports single regulator per device */
+	if (WARN_ON(data->regulator_count > 1)) {
+		dev_err(dev, "multiple regulators are not supported\n");
+		return -EINVAL;
+	}
+
+	/* Scaling up? Scale voltage before frequency */
+	if (freq > old_freq) {
+		ret = _set_opp_voltage(dev, reg, new_supply);
+		if (ret)
+			goto restore_voltage;
+	}
+
+	/* Change frequency */
+	ret = _generic_set_opp_clk_only(dev, data->clk, old_freq, freq);
+	if (ret)
+		goto restore_voltage;
+
+	/* Scaling down? Scale voltage after frequency */
+	if (freq < old_freq) {
+		ret = _set_opp_voltage(dev, reg, new_supply);
+		if (ret)
+			goto restore_freq;
+	}
+
+	return 0;
+
+restore_freq:
+	if (_generic_set_opp_clk_only(dev, data->clk, freq, old_freq))
+		dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
+			__func__, old_freq);
+restore_voltage:
+	/* This shouldn't harm even if the voltages weren't updated earlier */
+	if (old_supply->u_volt)
+		_set_opp_voltage(dev, reg, old_supply);
+
+	return ret;
+}
+
 /**
  * dev_pm_opp_set_rate() - Configure new OPP based on frequency
  * @dev:	 device for which we do this operation
@@ -623,12 +686,12 @@ static int _set_opp_voltage(struct device *dev, struct regulator *reg,
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
 	struct opp_table *opp_table;
+	unsigned long freq, old_freq;
 	struct dev_pm_opp *old_opp, *opp;
-	struct regulator *reg = ERR_PTR(-ENXIO);
+	struct regulator **regulators;
+	struct dev_pm_set_opp_data *data;
 	struct clk *clk;
-	unsigned long freq, old_freq;
-	struct dev_pm_opp_supply old_supply, new_supply;
-	int ret;
+	int ret, size;
 
 	if (unlikely(!target_freq)) {
 		dev_err(dev, "%s: Invalid target frequency %lu\n", __func__,
@@ -677,64 +740,36 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 		return ret;
 	}
 
-	if (opp_table->regulators) {
-		/* This function only supports single regulator per device */
-		if (WARN_ON(opp_table->regulator_count > 1)) {
-			dev_err(dev, "multiple regulators not supported\n");
-			rcu_read_unlock();
-			return -EINVAL;
-		}
+	dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n", __func__,
+		old_freq, freq);
 
-		reg = opp_table->regulators[0];
+	regulators = opp_table->regulators;
+
+	/* Only frequency scaling */
+	if (!regulators) {
+		rcu_read_unlock();
+		return _generic_set_opp_clk_only(dev, clk, old_freq, freq);
 	}
 
+	data = opp_table->set_opp_data;
+	data->regulators = regulators;
+	data->regulator_count = opp_table->regulator_count;
+	data->clk = clk;
+	data->dev = dev;
+
+	data->old_opp.rate = old_freq;
+	size = sizeof(*opp->supplies) * opp_table->regulator_count;
 	if (IS_ERR(old_opp))
-		old_supply.u_volt = 0;
+		memset(data->old_opp.supplies, 0, size);
 	else
-		memcpy(&old_supply, old_opp->supplies, sizeof(old_supply));
+		memcpy(data->old_opp.supplies, old_opp->supplies, size);
 
-	memcpy(&new_supply, opp->supplies, sizeof(new_supply));
+	data->new_opp.rate = freq;
+	memcpy(data->new_opp.supplies, opp->supplies, size);
 
 	rcu_read_unlock();
 
-	/* Scaling up? Scale voltage before frequency */
-	if (freq > old_freq) {
-		ret = _set_opp_voltage(dev, reg, &new_supply);
-		if (ret)
-			goto restore_voltage;
-	}
-
-	/* Change frequency */
-
-	dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n",
-		__func__, old_freq, freq);
-
-	ret = clk_set_rate(clk, freq);
-	if (ret) {
-		dev_err(dev, "%s: failed to set clock rate: %d\n", __func__,
-			ret);
-		goto restore_voltage;
-	}
-
-	/* Scaling down? Scale voltage after frequency */
-	if (freq < old_freq) {
-		ret = _set_opp_voltage(dev, reg, &new_supply);
-		if (ret)
-			goto restore_freq;
-	}
-
-	return 0;
-
-restore_freq:
-	if (clk_set_rate(clk, old_freq))
-		dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
-			__func__, old_freq);
-restore_voltage:
-	/* This shouldn't harm even if the voltages weren't updated earlier */
-	if (old_supply.u_volt)
-		_set_opp_voltage(dev, reg, &old_supply);
-
-	return ret;
+	return _generic_set_opp(data);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
 
@@ -1368,6 +1403,38 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
 
+static int _allocate_set_opp_data(struct opp_table *opp_table)
+{
+	struct dev_pm_set_opp_data *data;
+	int len, count = opp_table->regulator_count;
+
+	if (WARN_ON(!count))
+		return -EINVAL;
+
+	/* space for set_opp_data */
+	len = sizeof(*data);
+
+	/* space for old_opp.supplies and new_opp.supplies */
+	len += 2 * sizeof(struct dev_pm_opp_supply) * count;
+
+	data = kzalloc(len, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->old_opp.supplies = (void *)(data + 1);
+	data->new_opp.supplies = data->old_opp.supplies + count;
+
+	opp_table->set_opp_data = data;
+
+	return 0;
+}
+
+static void _free_set_opp_data(struct opp_table *opp_table)
+{
+	kfree(opp_table->set_opp_data);
+	opp_table->set_opp_data = NULL;
+}
+
 /**
  * dev_pm_opp_set_regulators() - Set regulator names for the device
  * @dev: Device for which regulator name is being set.
@@ -1437,6 +1504,11 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev,
 
 	opp_table->regulator_count = count;
 
+	/* Allocate block only once to pass to set_opp() routines */
+	ret = _allocate_set_opp_data(opp_table);
+	if (ret)
+		goto free_regulators;
+
 	mutex_unlock(&opp_table_lock);
 	return opp_table;
 
@@ -1446,6 +1518,7 @@ free_regulators:
 
 	kfree(opp_table->regulators);
 	opp_table->regulators = NULL;
+	opp_table->regulator_count = 0;
 err:
 	_remove_opp_table(opp_table);
 unlock:
@@ -1482,6 +1555,8 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table)
 	for (i = opp_table->regulator_count - 1; i >= 0; i--)
 		regulator_put(opp_table->regulators[i]);
 
+	_free_set_opp_data(opp_table);
+
 	kfree(opp_table->regulators);
 	opp_table->regulators = NULL;
 	opp_table->regulator_count = 0;
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 5b0f7e53bede..a05e43912c6b 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -141,6 +141,7 @@ enum opp_table_access {
  * @clk: Device's clock handle
  * @regulators: Supply regulators
  * @regulator_count: Number of power supply regulators
+ * @set_opp_data: Data to be passed to set_opp callback
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *
@@ -178,6 +179,8 @@ struct opp_table {
 	struct regulator **regulators;
 	unsigned int regulator_count;
 
+	struct dev_pm_set_opp_data *set_opp_data;
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
 	char dentry_name[NAME_MAX];
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 9a825ae78653..779b40a9287d 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -17,6 +17,8 @@
 #include <linux/err.h>
 #include <linux/notifier.h>
 
+struct clk;
+struct regulator;
 struct dev_pm_opp;
 struct device;
 struct opp_table;
@@ -41,6 +43,39 @@ struct dev_pm_opp_supply {
 	unsigned long u_amp;
 };
 
+/**
+ * struct dev_pm_opp_info - OPP freq/voltage/current values
+ * @rate:	Target clk rate in hz
+ * @supplies:	Array of voltage/current values for all power supplies
+ *
+ * This structure stores the freq/voltage/current values for a single OPP.
+ */
+struct dev_pm_opp_info {
+	unsigned long rate;
+	struct dev_pm_opp_supply *supplies;
+};
+
+/**
+ * struct dev_pm_set_opp_data - Set OPP data
+ * @old_opp:	Old OPP info
+ * @new_opp:	New OPP info
+ * @regulators:	Array of regulator pointers
+ * @regulator_count: Number of regulators
+ * @clk:	Pointer to clk
+ * @dev:	Pointer to the struct device
+ *
+ * This structure contains all information required for setting an OPP.
+ */
+struct dev_pm_set_opp_data {
+	struct dev_pm_opp_info old_opp;
+	struct dev_pm_opp_info new_opp;
+
+	struct regulator **regulators;
+	unsigned int regulator_count;
+	struct clk *clk;
+	struct device *dev;
+};
+
 #if defined(CONFIG_PM_OPP)
 
 unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
-- 
cgit v1.2.3


From 4dab160eb1586f67e8ba7c55ffdd2373f7a5553e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 1 Dec 2016 16:28:21 +0530
Subject: PM / OPP: Allow platform specific custom set_opp() callbacks

The generic set_opp() handler isn't sufficient for platforms with
complex DVFS.  For example, some TI platforms have multiple regulators
for a CPU device. The order in which various supplies need to be
programmed is only known to the platform code and its best to leave it
to it.

This patch implements APIs to register platform specific set_opp()
callback.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Dave Gerlach <d-gerlach@ti.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 114 +++++++++++++++++++++++++++++++++++++++++-
 drivers/base/power/opp/opp.h  |   2 +
 include/linux/pm_opp.h        |  10 ++++
 3 files changed, 125 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index e33198ce41b4..eceebef36f21 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -687,6 +687,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
 	struct opp_table *opp_table;
 	unsigned long freq, old_freq;
+	int (*set_opp)(struct dev_pm_set_opp_data *data);
 	struct dev_pm_opp *old_opp, *opp;
 	struct regulator **regulators;
 	struct dev_pm_set_opp_data *data;
@@ -751,6 +752,11 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 		return _generic_set_opp_clk_only(dev, clk, old_freq, freq);
 	}
 
+	if (opp_table->set_opp)
+		set_opp = opp_table->set_opp;
+	else
+		set_opp = _generic_set_opp;
+
 	data = opp_table->set_opp_data;
 	data->regulators = regulators;
 	data->regulator_count = opp_table->regulator_count;
@@ -769,7 +775,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 
 	rcu_read_unlock();
 
-	return _generic_set_opp(data);
+	return set_opp(data);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
 
@@ -903,6 +909,9 @@ static void _remove_opp_table(struct opp_table *opp_table)
 	if (opp_table->regulators)
 		return;
 
+	if (opp_table->set_opp)
+		return;
+
 	/* Release clk */
 	if (!IS_ERR(opp_table->clk))
 		clk_put(opp_table->clk);
@@ -1569,6 +1578,109 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulators);
 
+/**
+ * dev_pm_opp_register_set_opp_helper() - Register custom set OPP helper
+ * @dev: Device for which the helper is getting registered.
+ * @set_opp: Custom set OPP helper.
+ *
+ * This is useful to support complex platforms (like platforms with multiple
+ * regulators per device), instead of the generic OPP set rate helper.
+ *
+ * This must be called before any OPPs are initialized for the device.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_register_set_opp_helper(struct device *dev,
+			int (*set_opp)(struct dev_pm_set_opp_data *data))
+{
+	struct opp_table *opp_table;
+	int ret;
+
+	if (!set_opp)
+		return -EINVAL;
+
+	mutex_lock(&opp_table_lock);
+
+	opp_table = _add_opp_table(dev);
+	if (!opp_table) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* This should be called before OPPs are initialized */
+	if (WARN_ON(!list_empty(&opp_table->opp_list))) {
+		ret = -EBUSY;
+		goto err;
+	}
+
+	/* Already have custom set_opp helper */
+	if (WARN_ON(opp_table->set_opp)) {
+		ret = -EBUSY;
+		goto err;
+	}
+
+	opp_table->set_opp = set_opp;
+
+	mutex_unlock(&opp_table_lock);
+	return 0;
+
+err:
+	_remove_opp_table(opp_table);
+unlock:
+	mutex_unlock(&opp_table_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_register_set_opp_helper);
+
+/**
+ * dev_pm_opp_register_put_opp_helper() - Releases resources blocked for
+ *					   set_opp helper
+ * @dev: Device for which custom set_opp helper has to be cleared.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_register_put_opp_helper(struct device *dev)
+{
+	struct opp_table *opp_table;
+
+	mutex_lock(&opp_table_lock);
+
+	/* Check for existing table for 'dev' first */
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		dev_err(dev, "Failed to find opp_table: %ld\n",
+			PTR_ERR(opp_table));
+		goto unlock;
+	}
+
+	if (!opp_table->set_opp) {
+		dev_err(dev, "%s: Doesn't have custom set_opp helper set\n",
+			__func__);
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating opp_table */
+	WARN_ON(!list_empty(&opp_table->opp_list));
+
+	opp_table->set_opp = NULL;
+
+	/* Try freeing opp_table if this was the last blocking resource */
+	_remove_opp_table(opp_table);
+
+unlock:
+	mutex_unlock(&opp_table_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_register_put_opp_helper);
+
 /**
  * dev_pm_opp_add()  - Add an OPP table from a table definitions
  * @dev:	device for which we do this operation
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index a05e43912c6b..af9f2b849a66 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -141,6 +141,7 @@ enum opp_table_access {
  * @clk: Device's clock handle
  * @regulators: Supply regulators
  * @regulator_count: Number of power supply regulators
+ * @set_opp: Platform specific set_opp callback
  * @set_opp_data: Data to be passed to set_opp callback
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
@@ -179,6 +180,7 @@ struct opp_table {
 	struct regulator **regulators;
 	unsigned int regulator_count;
 
+	int (*set_opp)(struct dev_pm_set_opp_data *data);
 	struct dev_pm_set_opp_data *set_opp_data;
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 779b40a9287d..0edd88f93904 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -116,6 +116,8 @@ int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
 void dev_pm_opp_put_prop_name(struct device *dev);
 struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count);
 void dev_pm_opp_put_regulators(struct opp_table *opp_table);
+int dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
+void dev_pm_opp_register_put_opp_helper(struct device *dev);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
@@ -215,6 +217,14 @@ static inline int dev_pm_opp_set_supported_hw(struct device *dev,
 
 static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
 
+static inline int dev_pm_opp_register_set_opp_helper(struct device *dev,
+			int (*set_opp)(struct dev_pm_set_opp_data *data))
+{
+	return -ENOTSUPP;
+}
+
+static inline void dev_pm_opp_register_put_opp_helper(struct device *dev) {}
+
 static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 {
 	return -ENOTSUPP;
-- 
cgit v1.2.3


From a317178e36b52f5f24ad226a77403eeea5ac05c4 Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Fri, 21 Oct 2016 23:51:54 +0300
Subject: parser: add u64 number parser

Will be used by the nvme-fabrics FC transport in parsing options

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/parser.h |  1 +
 lib/parser.c           | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/parser.h b/include/linux/parser.h
index 39d5b7955b23..884c1e6eb3fe 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -27,6 +27,7 @@ typedef struct {
 
 int match_token(char *, const match_table_t table, substring_t args[]);
 int match_int(substring_t *, int *result);
+int match_u64(substring_t *, u64 *result);
 int match_octal(substring_t *, int *result);
 int match_hex(substring_t *, int *result);
 bool match_wildcard(const char *pattern, const char *str);
diff --git a/lib/parser.c b/lib/parser.c
index b6d11631231b..3278958b472a 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -151,6 +151,36 @@ static int match_number(substring_t *s, int *result, int base)
 	return ret;
 }
 
+/**
+ * match_u64int: scan a number in the given base from a substring_t
+ * @s: substring to be scanned
+ * @result: resulting u64 on success
+ * @base: base to use when converting string
+ *
+ * Description: Given a &substring_t and a base, attempts to parse the substring
+ * as a number in that base. On success, sets @result to the integer represented
+ * by the string and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ */
+static int match_u64int(substring_t *s, u64 *result, int base)
+{
+	char *buf;
+	int ret;
+	u64 val;
+	size_t len = s->to - s->from;
+
+	buf = kmalloc(len + 1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	memcpy(buf, s->from, len);
+	buf[len] = '\0';
+
+	ret = kstrtoull(buf, base, &val);
+	if (!ret)
+		*result = val;
+	kfree(buf);
+	return ret;
+}
+
 /**
  * match_int: - scan a decimal representation of an integer from a substring_t
  * @s: substring_t to be scanned
@@ -166,6 +196,23 @@ int match_int(substring_t *s, int *result)
 }
 EXPORT_SYMBOL(match_int);
 
+/**
+ * match_u64: - scan a decimal representation of a u64 from
+ *                  a substring_t
+ * @s: substring_t to be scanned
+ * @result: resulting unsigned long long on success
+ *
+ * Description: Attempts to parse the &substring_t @s as a long decimal
+ * integer. On success, sets @result to the integer represented by the
+ * string and returns 0.
+ * Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ */
+int match_u64(substring_t *s, u64 *result)
+{
+	return match_u64int(s, result, 0);
+}
+EXPORT_SYMBOL(match_u64);
+
 /**
  * match_octal: - scan an octal representation of an integer from a substring_t
  * @s: substring_t to be scanned
-- 
cgit v1.2.3


From cba3bdfd2e89edd706e2c40dfad914aca663b6ac Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 2 Dec 2016 00:28:39 -0800
Subject: nvme-fabrics: Add FC transport error codes to nvme.h

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jay Freyensee <james_p_freyensee@linux.intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0df9466a7c38..5ac1f57226f4 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -963,6 +963,19 @@ enum {
 	NVME_SC_ACCESS_DENIED		= 0x286,
 
 	NVME_SC_DNR			= 0x4000,
+
+
+	/*
+	 * FC Transport-specific error status values for NVME commands
+	 *
+	 * Transport-specific status code values must be in the range 0xB0..0xBF
+	 */
+
+	/* Generic FC failure - catchall */
+	NVME_SC_FC_TRANSPORT_ERROR	= 0x00B0,
+
+	/* I/O failure due to FC ABTS'd */
+	NVME_SC_FC_TRANSPORT_ABORTED	= 0x00B1,
 };
 
 struct nvme_completion {
-- 
cgit v1.2.3


From b1ad1475b447a7668ac8bfad77277c4405941883 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 2 Dec 2016 00:28:40 -0800
Subject: nvme-fabrics: Add FC transport FC-NVME definitions

- Formats for Cmd, Data, Rsp IUs
- Formats FC-4 LS definitions
- Add to MAINTAINERS file

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jay Freyensee <james_p_freyensee@linux.intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 MAINTAINERS             |   6 ++
 include/linux/nvme-fc.h | 268 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 274 insertions(+)
 create mode 100644 include/linux/nvme-fc.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index bbc2b39e67e9..e630560df720 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8659,6 +8659,12 @@ L:	linux-nvme@lists.infradead.org
 S:	Supported
 F:	drivers/nvme/target/
 
+NVM EXPRESS FC TRANSPORT DRIVERS
+M:	James Smart <james.smart@broadcom.com>
+L:	linux-nvme@lists.infradead.org
+S:	Supported
+F:	include/linux/nvme-fc.h
+
 NVMEM FRAMEWORK
 M:	Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
 M:	Maxime Ripard <maxime.ripard@free-electrons.com>
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
new file mode 100644
index 000000000000..4b45226bd604
--- /dev/null
+++ b/include/linux/nvme-fc.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2016 Avago Technologies.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful.
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
+ * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
+ * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
+ * See the GNU General Public License for more details, a copy of which
+ * can be found in the file COPYING included with this package
+ *
+ */
+
+/*
+ * This file contains definitions relative to FC-NVME r1.11 and a few
+ * newer items
+ */
+
+#ifndef _NVME_FC_H
+#define _NVME_FC_H 1
+
+
+#define NVME_CMD_SCSI_ID		0xFD
+#define NVME_CMD_FC_ID			FC_TYPE_NVME
+
+/* FC-NVME Cmd IU Flags */
+#define FCNVME_CMD_FLAGS_DIRMASK	0x03
+#define FCNVME_CMD_FLAGS_WRITE		0x01
+#define FCNVME_CMD_FLAGS_READ		0x02
+
+struct nvme_fc_cmd_iu {
+	__u8			scsi_id;
+	__u8			fc_id;
+	__be16			iu_len;
+	__u8			rsvd4[3];
+	__u8			flags;
+	__be64			connection_id;
+	__be32			csn;
+	__be32			data_len;
+	struct nvme_command	sqe;
+	__be32			rsvd88[2];
+};
+
+#define NVME_FC_SIZEOF_ZEROS_RSP	12
+
+struct nvme_fc_ersp_iu {
+	__u8			rsvd0[2];
+	__be16			iu_len;
+	__be32			rsn;
+	__be32			xfrd_len;
+	__be32			rsvd12;
+	struct nvme_completion	cqe;
+	/* for now - no additional payload */
+};
+
+
+/* FC-NVME r1.03/16-119v0 NVME Link Services */
+enum {
+	FCNVME_LS_RSVD			= 0,
+	FCNVME_LS_RJT			= 1,
+	FCNVME_LS_ACC			= 2,
+	FCNVME_LS_CREATE_ASSOCIATION	= 3,
+	FCNVME_LS_CREATE_CONNECTION	= 4,
+	FCNVME_LS_DISCONNECT		= 5,
+};
+
+/* FC-NVME r1.03/16-119v0 NVME Link Service Descriptors */
+enum {
+	FCNVME_LSDESC_RSVD		= 0x0,
+	FCNVME_LSDESC_RQST		= 0x1,
+	FCNVME_LSDESC_RJT		= 0x2,
+	FCNVME_LSDESC_CREATE_ASSOC_CMD	= 0x3,
+	FCNVME_LSDESC_CREATE_CONN_CMD	= 0x4,
+	FCNVME_LSDESC_DISCONN_CMD	= 0x5,
+	FCNVME_LSDESC_CONN_ID		= 0x6,
+	FCNVME_LSDESC_ASSOC_ID		= 0x7,
+};
+
+
+/* ********** start of Link Service Descriptors ********** */
+
+
+/*
+ * fills in length of a descriptor. Struture minus descriptor header
+ */
+static inline __be32 fcnvme_lsdesc_len(size_t sz)
+{
+	return cpu_to_be32(sz - (2 * sizeof(u32)));
+}
+
+
+struct fcnvme_ls_rqst_w0 {
+	u8	ls_cmd;			/* FCNVME_LS_xxx */
+	u8	zeros[3];
+};
+
+/* FCNVME_LSDESC_RQST */
+struct fcnvme_lsdesc_rqst {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	struct fcnvme_ls_rqst_w0	w0;
+	__be32	rsvd12;
+};
+
+
+
+
+/* FCNVME_LSDESC_RJT */
+struct fcnvme_lsdesc_rjt {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	u8	rsvd8;
+
+	/*
+	 * Reject reason and explanaction codes are generic
+	 * to ELs's from LS-3.
+	 */
+	u8	reason_code;
+	u8	reason_explanation;
+
+	u8	vendor;
+	__be32	rsvd12;
+};
+
+
+#define FCNVME_ASSOC_HOSTID_LEN		64
+#define FCNVME_ASSOC_HOSTNQN_LEN	256
+#define FCNVME_ASSOC_SUBNQN_LEN		256
+
+/* FCNVME_LSDESC_CREATE_ASSOC_CMD */
+struct fcnvme_lsdesc_cr_assoc_cmd {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be16	ersp_ratio;
+	__be16	rsvd10;
+	__be32	rsvd12[9];
+	__be16	cntlid;
+	__be16	sqsize;
+	__be32	rsvd52;
+	u8	hostid[FCNVME_ASSOC_HOSTID_LEN];
+	u8	hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
+	u8	subnqn[FCNVME_ASSOC_SUBNQN_LEN];
+	u8	rsvd632[384];
+};
+
+/* FCNVME_LSDESC_CREATE_CONN_CMD */
+struct fcnvme_lsdesc_cr_conn_cmd {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be16	ersp_ratio;
+	__be16	rsvd10;
+	__be32	rsvd12[9];
+	__be16	qid;
+	__be16	sqsize;
+	__be32  rsvd52;
+};
+
+/* Disconnect Scope Values */
+enum {
+	FCNVME_DISCONN_ASSOCIATION	= 0,
+	FCNVME_DISCONN_CONNECTION	= 1,
+};
+
+/* FCNVME_LSDESC_DISCONN_CMD */
+struct fcnvme_lsdesc_disconn_cmd {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	u8	rsvd8[3];
+	/* note: scope is really a 1 bit field */
+	u8	scope;			/* FCNVME_DISCONN_xxx */
+	__be32	rsvd12;
+	__be64	id;
+};
+
+/* FCNVME_LSDESC_CONN_ID */
+struct fcnvme_lsdesc_conn_id {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be64	connection_id;
+};
+
+/* FCNVME_LSDESC_ASSOC_ID */
+struct fcnvme_lsdesc_assoc_id {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be64	association_id;
+};
+
+/* r_ctl values */
+enum {
+	FCNVME_RS_RCTL_DATA		= 1,
+	FCNVME_RS_RCTL_XFER_RDY		= 5,
+	FCNVME_RS_RCTL_RSP		= 8,
+};
+
+
+/* ********** start of Link Services ********** */
+
+
+/* FCNVME_LS_RJT */
+struct fcnvme_ls_rjt {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_rqst		rqst;
+	struct fcnvme_lsdesc_rjt		rjt;
+};
+
+/* FCNVME_LS_ACC */
+struct fcnvme_ls_acc_hdr {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_rqst		rqst;
+	/* Followed by cmd-specific ACC descriptors, see next definitions */
+};
+
+/* FCNVME_LS_CREATE_ASSOCIATION */
+struct fcnvme_ls_cr_assoc_rqst {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_cr_assoc_cmd	assoc_cmd;
+};
+
+struct fcnvme_ls_cr_assoc_acc {
+	struct fcnvme_ls_acc_hdr		hdr;
+	struct fcnvme_lsdesc_assoc_id		associd;
+	struct fcnvme_lsdesc_conn_id		connectid;
+};
+
+
+/* FCNVME_LS_CREATE_CONNECTION */
+struct fcnvme_ls_cr_conn_rqst {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_assoc_id		associd;
+	struct fcnvme_lsdesc_cr_conn_cmd	connect_cmd;
+};
+
+struct fcnvme_ls_cr_conn_acc {
+	struct fcnvme_ls_acc_hdr		hdr;
+	struct fcnvme_lsdesc_conn_id		connectid;
+};
+
+/* FCNVME_LS_DISCONNECT */
+struct fcnvme_ls_disconnect_rqst {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_assoc_id		associd;
+	struct fcnvme_lsdesc_disconn_cmd	discon_cmd;
+};
+
+struct fcnvme_ls_disconnect_acc {
+	struct fcnvme_ls_acc_hdr		hdr;
+};
+
+
+/*
+ * Yet to be defined in FC-NVME:
+ */
+#define NVME_FC_CONNECT_TIMEOUT_SEC	2		/* 2 seconds */
+#define NVME_FC_LS_TIMEOUT_SEC		2		/* 2 seconds */
+#define NVME_FC_TGTOP_TIMEOUT_SEC	2		/* 2 seconds */
+
+
+#endif /* _NVME_FC_H */
-- 
cgit v1.2.3


From d6d20012e116904065d192be6146040c99c03c3c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 2 Dec 2016 00:28:41 -0800
Subject: nvme-fabrics: Add FC transport LLDD api definitions

Host:
 - LLDD registration with the host transport
 - registering host ports (local ports) and target ports seen on
   fabric (remote ports)
 - Data structures and call points for FC-4 LS's and FCP IO requests

Target:
 - LLDD registration with the target transport
 - registering nvme subsystem ports (target ports)
 - Data structures and call points for reception of FC-4 LS's and
   FCP IO requests, and callbacks to perform data and rsp transfers
   for the io.

Add to MAINTAINERS file

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jay Freyensee <james_p_freyensee@linux.intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 MAINTAINERS                    |   1 +
 include/linux/nvme-fc-driver.h | 851 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 852 insertions(+)
 create mode 100644 include/linux/nvme-fc-driver.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index e630560df720..539502e7089d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8664,6 +8664,7 @@ M:	James Smart <james.smart@broadcom.com>
 L:	linux-nvme@lists.infradead.org
 S:	Supported
 F:	include/linux/nvme-fc.h
+F:	include/linux/nvme-fc-driver.h
 
 NVMEM FRAMEWORK
 M:	Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
new file mode 100644
index 000000000000..f21471f7ee40
--- /dev/null
+++ b/include/linux/nvme-fc-driver.h
@@ -0,0 +1,851 @@
+/*
+ * Copyright (c) 2016, Avago Technologies
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_FC_DRIVER_H
+#define _NVME_FC_DRIVER_H 1
+
+
+/*
+ * **********************  LLDD FC-NVME Host API ********************
+ *
+ *  For FC LLDD's that are the NVME Host role.
+ *
+ * ******************************************************************
+ */
+
+
+
+/* FC Port role bitmask - can merge with FC Port Roles in fc transport */
+#define FC_PORT_ROLE_NVME_INITIATOR	0x10
+#define FC_PORT_ROLE_NVME_TARGET	0x11
+#define FC_PORT_ROLE_NVME_DISCOVERY	0x12
+
+
+/**
+ * struct nvme_fc_port_info - port-specific ids and FC connection-specific
+ *                            data element used during NVME Host role
+ *                            registrations
+ *
+ * Static fields describing the port being registered:
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx)
+ *
+ * Initialization values for dynamic port fields:
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ */
+struct nvme_fc_port_info {
+	u64			node_name;
+	u64			port_name;
+	u32			port_role;
+	u32			port_id;
+};
+
+
+/**
+ * struct nvmefc_ls_req - Request structure passed from NVME-FC transport
+ *                        to LLDD in order to perform a NVME FC-4 LS
+ *                        request and obtain a response.
+ *
+ * Values set by the NVME-FC layer prior to calling the LLDD ls_req
+ * entrypoint.
+ * @rqstaddr: pointer to request buffer
+ * @rqstdma:  PCI DMA address of request buffer
+ * @rqstlen:  Length, in bytes, of request buffer
+ * @rspaddr:  pointer to response buffer
+ * @rspdma:   PCI DMA address of response buffer
+ * @rsplen:   Length, in bytes, of response buffer
+ * @timeout:  Maximum amount of time, in seconds, to wait for the LS response.
+ *            If timeout exceeded, LLDD to abort LS exchange and complete
+ *            LS request with error status.
+ * @private:  pointer to memory allocated alongside the ls request structure
+ *            that is specifically for the LLDD to use while processing the
+ *            request. The length of the buffer corresponds to the
+ *            lsrqst_priv_sz value specified in the nvme_fc_port_template
+ *            supplied by the LLDD.
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            the LS request. req argument is the pointer to the original LS
+ *            request structure. Status argument must be 0 upon success, a
+ *            negative errno on failure (example: -ENXIO).
+ */
+struct nvmefc_ls_req {
+	void			*rqstaddr;
+	dma_addr_t		rqstdma;
+	u32			rqstlen;
+	void			*rspaddr;
+	dma_addr_t		rspdma;
+	u32			rsplen;
+	u32			timeout;
+
+	void			*private;
+
+	void (*done)(struct nvmefc_ls_req *req, int status);
+
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+enum nvmefc_fcp_datadir {
+	NVMEFC_FCP_NODATA,	/* payload_length and sg_cnt will be zero */
+	NVMEFC_FCP_WRITE,
+	NVMEFC_FCP_READ,
+};
+
+
+#define NVME_FC_MAX_SEGMENTS		256
+
+/**
+ * struct nvmefc_fcp_req - Request structure passed from NVME-FC transport
+ *                         to LLDD in order to perform a NVME FCP IO operation.
+ *
+ * Values set by the NVME-FC layer prior to calling the LLDD fcp_io
+ * entrypoint.
+ * @cmdaddr:   pointer to the FCP CMD IU buffer
+ * @rspaddr:   pointer to the FCP RSP IU buffer
+ * @cmddma:    PCI DMA address of the FCP CMD IU buffer
+ * @rspdma:    PCI DMA address of the FCP RSP IU buffer
+ * @cmdlen:    Length, in bytes, of the FCP CMD IU buffer
+ * @rsplen:    Length, in bytes, of the FCP RSP IU buffer
+ * @payload_length: Length of DATA_IN or DATA_OUT payload data to transfer
+ * @sg_table:  scatter/gather structure for payload data
+ * @first_sgl: memory for 1st scatter/gather list segment for payload data
+ * @sg_cnt:    number of elements in the scatter/gather list
+ * @io_dir:    direction of the FCP request (see NVMEFC_FCP_xxx)
+ * @sqid:      The nvme SQID the command is being issued on
+ * @done:      The callback routine the LLDD is to invoke upon completion of
+ *             the FCP operation. req argument is the pointer to the original
+ *             FCP IO operation.
+ * @private:   pointer to memory allocated alongside the FCP operation
+ *             request structure that is specifically for the LLDD to use
+ *             while processing the operation. The length of the buffer
+ *             corresponds to the fcprqst_priv_sz value specified in the
+ *             nvme_fc_port_template supplied by the LLDD.
+ *
+ * Values set by the LLDD indicating completion status of the FCP operation.
+ * Must be set prior to calling the done() callback.
+ * @transferred_length: amount of payload data, in bytes, that were
+ *             transferred. Should equal payload_length on success.
+ * @rcv_rsplen: length, in bytes, of the FCP RSP IU received.
+ * @status:    Completion status of the FCP operation. must be 0 upon success,
+ *             NVME_SC_FC_xxx value upon failure. Note: this is NOT a
+ *             reflection of the NVME CQE completion status. Only the status
+ *             of the FCP operation at the NVME-FC level.
+ */
+struct nvmefc_fcp_req {
+	void			*cmdaddr;
+	void			*rspaddr;
+	dma_addr_t		cmddma;
+	dma_addr_t		rspdma;
+	u16			cmdlen;
+	u16			rsplen;
+
+	u32			payload_length;
+	struct sg_table		sg_table;
+	struct scatterlist	*first_sgl;
+	int			sg_cnt;
+	enum nvmefc_fcp_datadir	io_dir;
+
+	__le16			sqid;
+
+	void (*done)(struct nvmefc_fcp_req *req);
+
+	void			*private;
+
+	u32			transferred_length;
+	u16			rcv_rsplen;
+	u32			status;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/*
+ * Direct copy of fc_port_state enum. For later merging
+ */
+enum nvme_fc_obj_state {
+	FC_OBJSTATE_UNKNOWN,
+	FC_OBJSTATE_NOTPRESENT,
+	FC_OBJSTATE_ONLINE,
+	FC_OBJSTATE_OFFLINE,		/* User has taken Port Offline */
+	FC_OBJSTATE_BLOCKED,
+	FC_OBJSTATE_BYPASSED,
+	FC_OBJSTATE_DIAGNOSTICS,
+	FC_OBJSTATE_LINKDOWN,
+	FC_OBJSTATE_ERROR,
+	FC_OBJSTATE_LOOPBACK,
+	FC_OBJSTATE_DELETED,
+};
+
+
+/**
+ * struct nvme_fc_local_port - structure used between NVME-FC transport and
+ *                 a LLDD to reference a local NVME host port.
+ *                 Allocated/created by the nvme_fc_register_localport()
+ *                 transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num:  NVME-FC transport host port number
+ * @port_role: NVME roles are supported on the port (see FC_PORT_ROLE_xxx)
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @private:   pointer to memory allocated alongside the local port
+ *             structure that is specifically for the LLDD to use.
+ *             The length of the buffer corresponds to the local_priv_sz
+ *             value specified in the nvme_fc_port_template supplied by
+ *             the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link state. LLDD
+ * may reference fields directly to change them. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ * @port_state:   Operational state of the port.
+ */
+struct nvme_fc_local_port {
+	/* static/read-only fields */
+	u32 port_num;
+	u32 port_role;
+	u64 node_name;
+	u64 port_name;
+
+	void *private;
+
+	/* dynamic fields */
+	u32 port_id;
+	enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvme_fc_remote_port - structure used between NVME-FC transport and
+ *                 a LLDD to reference a remote NVME subsystem port.
+ *                 Allocated/created by the nvme_fc_register_remoteport()
+ *                 transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num:  NVME-FC transport remote subsystem port number
+ * @port_role: NVME roles are supported on the port (see FC_PORT_ROLE_xxx)
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @localport: pointer to the NVME-FC local host port the subsystem is
+ *             connected to.
+ * @private:   pointer to memory allocated alongside the remote port
+ *             structure that is specifically for the LLDD to use.
+ *             The length of the buffer corresponds to the remote_priv_sz
+ *             value specified in the nvme_fc_port_template supplied by
+ *             the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link or login
+ * state. LLDD may reference fields directly to change them. Initialized by
+ * the port_info struct supplied to the registration call.
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ * @port_state:   Operational state of the remote port. Valid values are
+ *                ONLINE or UNKNOWN.
+ */
+struct nvme_fc_remote_port {
+	/* static fields */
+	u32 port_num;
+	u32 port_role;
+	u64 node_name;
+	u64 port_name;
+
+	struct nvme_fc_local_port *localport;
+
+	void *private;
+
+	/* dynamic fields */
+	u32 port_id;
+	enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvme_fc_port_template - structure containing static entrypoints and
+ *                 operational parameters for an LLDD that supports NVME host
+ *                 behavior. Passed by reference in port registrations.
+ *                 NVME-FC transport remembers template reference and may
+ *                 access it during runtime operation.
+ *
+ * Host/Initiator Transport Entrypoints/Parameters:
+ *
+ * @localport_delete:  The LLDD initiates deletion of a localport via
+ *       nvme_fc_deregister_localport(). However, the teardown is
+ *       asynchronous. This routine is called upon the completion of the
+ *       teardown to inform the LLDD that the localport has been deleted.
+ *       Entrypoint is Mandatory.
+ *
+ * @remoteport_delete:  The LLDD initiates deletion of a remoteport via
+ *       nvme_fc_deregister_remoteport(). However, the teardown is
+ *       asynchronous. This routine is called upon the completion of the
+ *       teardown to inform the LLDD that the remoteport has been deleted.
+ *       Entrypoint is Mandatory.
+ *
+ * @create_queue:  Upon creating a host<->controller association, queues are
+ *       created such that they can be affinitized to cpus/cores. This
+ *       callback into the LLDD to notify that a controller queue is being
+ *       created.  The LLDD may choose to allocate an associated hw queue
+ *       or map it onto a shared hw queue. Upon return from the call, the
+ *       LLDD specifies a handle that will be given back to it for any
+ *       command that is posted to the controller queue.  The handle can
+ *       be used by the LLDD to map quickly to the proper hw queue for
+ *       command execution.  The mask of cpu's that will map to this queue
+ *       at the block-level is also passed in. The LLDD should use the
+ *       queue id and/or cpu masks to ensure proper affinitization of the
+ *       controller queue to the hw queue.
+ *       Entrypoint is Optional.
+ *
+ * @delete_queue:  This is the inverse of the crete_queue. During
+ *       host<->controller association teardown, this routine is called
+ *       when a controller queue is being terminated. Any association with
+ *       a hw queue should be termined. If there is a unique hw queue, the
+ *       hw queue should be torn down.
+ *       Entrypoint is Optional.
+ *
+ * @poll_queue:  Called to poll for the completion of an io on a blk queue.
+ *       Entrypoint is Optional.
+ *
+ * @ls_req:  Called to issue a FC-NVME FC-4 LS service request.
+ *       The nvme_fc_ls_req structure will fully describe the buffers for
+ *       the request payload and where to place the response payload. The
+ *       LLDD is to allocate an exchange, issue the LS request, obtain the
+ *       LS response, and call the "done" routine specified in the request
+ *       structure (argument to done is the ls request structure itself).
+ *       Entrypoint is Mandatory.
+ *
+ * @fcp_io:  called to issue a FC-NVME I/O request.  The I/O may be for
+ *       an admin queue or an i/o queue.  The nvmefc_fcp_req structure will
+ *       fully describe the io: the buffer containing the FC-NVME CMD IU
+ *       (which contains the SQE), the sg list for the payload if applicable,
+ *       and the buffer to place the FC-NVME RSP IU into.  The LLDD will
+ *       complete the i/o, indicating the amount of data transferred or
+ *       any transport error, and call the "done" routine specified in the
+ *       request structure (argument to done is the fcp request structure
+ *       itself).
+ *       Entrypoint is Mandatory.
+ *
+ * @ls_abort: called to request the LLDD to abort the indicated ls request.
+ *       The call may return before the abort has completed. After aborting
+ *       the request, the LLDD must still call the ls request done routine
+ *       indicating an FC transport Aborted status.
+ *       Entrypoint is Mandatory.
+ *
+ * @fcp_abort: called to request the LLDD to abort the indicated fcp request.
+ *       The call may return before the abort has completed. After aborting
+ *       the request, the LLDD must still call the fcp request done routine
+ *       indicating an FC transport Aborted status.
+ *       Entrypoint is Mandatory.
+ *
+ * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
+ *       supports for cpu affinitization.
+ *       Value is Mandatory. Must be at least 1.
+ *
+ * @max_sgl_segments:  indicates the maximum number of sgl segments supported
+ *       by the LLDD
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @max_dif_sgl_segments:  indicates the maximum number of sgl segments
+ *       supported by the LLDD for DIF operations.
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @dma_boundary:  indicates the dma address boundary where dma mappings
+ *       will be split across.
+ *       Value is Mandatory. Typical value is 0xFFFFFFFF to split across
+ *       4Gig address boundarys
+ *
+ * @local_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a localport is allocated.  The additional memory
+ *       area solely for the of the LLDD and its location is specified by
+ *       the localport->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @remote_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a remoteport is allocated.  The additional memory
+ *       area solely for the of the LLDD and its location is specified by
+ *       the remoteport->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a ls request structure is allocated. The additional
+ *       memory area solely for the of the LLDD and its location is
+ *       specified by the ls_request->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @fcprqst_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a fcp request structure is allocated. The additional
+ *       memory area solely for the of the LLDD and its location is
+ *       specified by the fcp_request->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ */
+struct nvme_fc_port_template {
+	/* initiator-based functions */
+	void	(*localport_delete)(struct nvme_fc_local_port *);
+	void	(*remoteport_delete)(struct nvme_fc_remote_port *);
+	int	(*create_queue)(struct nvme_fc_local_port *,
+				unsigned int qidx, u16 qsize,
+				void **handle);
+	void	(*delete_queue)(struct nvme_fc_local_port *,
+				unsigned int qidx, void *handle);
+	void	(*poll_queue)(struct nvme_fc_local_port *, void *handle);
+	int	(*ls_req)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				struct nvmefc_ls_req *);
+	int	(*fcp_io)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				void *hw_queue_handle,
+				struct nvmefc_fcp_req *);
+	void	(*ls_abort)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				struct nvmefc_ls_req *);
+	void	(*fcp_abort)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				void *hw_queue_handle,
+				struct nvmefc_fcp_req *);
+
+	u32	max_hw_queues;
+	u16	max_sgl_segments;
+	u16	max_dif_sgl_segments;
+	u64	dma_boundary;
+
+	/* sizes of additional private data for data structures */
+	u32	local_priv_sz;
+	u32	remote_priv_sz;
+	u32	lsrqst_priv_sz;
+	u32	fcprqst_priv_sz;
+};
+
+
+/*
+ * Initiator/Host functions
+ */
+
+int nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
+			struct nvme_fc_port_template *template,
+			struct device *dev,
+			struct nvme_fc_local_port **lport_p);
+
+int nvme_fc_unregister_localport(struct nvme_fc_local_port *localport);
+
+int nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
+			struct nvme_fc_port_info *pinfo,
+			struct nvme_fc_remote_port **rport_p);
+
+int nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *remoteport);
+
+
+
+/*
+ * ***************  LLDD FC-NVME Target/Subsystem API ***************
+ *
+ *  For FC LLDD's that are the NVME Subsystem role
+ *
+ * ******************************************************************
+ */
+
+/**
+ * struct nvmet_fc_port_info - port-specific ids and FC connection-specific
+ *                             data element used during NVME Subsystem role
+ *                             registrations
+ *
+ * Static fields describing the port being registered:
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ *
+ * Initialization values for dynamic port fields:
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ */
+struct nvmet_fc_port_info {
+	u64			node_name;
+	u64			port_name;
+	u32			port_id;
+};
+
+
+/**
+ * struct nvmefc_tgt_ls_req - Structure used between LLDD and NVMET-FC
+ *                            layer to represent the exchange context for
+ *                            a FC-NVME Link Service (LS).
+ *
+ * The structure is allocated by the LLDD whenever a LS Request is received
+ * from the FC link. The address of the structure is passed to the nvmet-fc
+ * layer via the nvmet_fc_rcv_ls_req() call. The address of the structure
+ * will be passed back to the LLDD when the response is to be transmit.
+ * The LLDD is to use the address to map back to the LLDD exchange structure
+ * which maintains information such as the targetport the LS was received
+ * on, the remote FC NVME initiator that sent the LS, and any FC exchange
+ * context.  Upon completion of the LS response transmit, the address of the
+ * structure will be passed back to the LS rsp done() routine, allowing the
+ * nvmet-fc layer to release dma resources. Upon completion of the done()
+ * routine, no further access will be made by the nvmet-fc layer and the
+ * LLDD can de-allocate the structure.
+ *
+ * Field initialization:
+ *   At the time of the nvmet_fc_rcv_ls_req() call, there is no content that
+ *     is valid in the structure.
+ *
+ *   When the structure is used for the LLDD->xmt_ls_rsp() call, the nvmet-fc
+ *     layer will fully set the fields in order to specify the response
+ *     payload buffer and its length as well as the done routine to be called
+ *     upon compeletion of the transmit.  The nvmet-fc layer will also set a
+ *     private pointer for its own use in the done routine.
+ *
+ * Values set by the NVMET-FC layer prior to calling the LLDD xmt_ls_rsp
+ * entrypoint.
+ * @rspbuf:   pointer to the LS response buffer
+ * @rspdma:   PCI DMA address of the LS response buffer
+ * @rsplen:   Length, in bytes, of the LS response buffer
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            transmitting the LS response. req argument is the pointer to
+ *            the original ls request.
+ * @nvmet_fc_private:  pointer to an internal NVMET-FC layer structure used
+ *            as part of the NVMET-FC processing. The LLDD is not to access
+ *            this pointer.
+ */
+struct nvmefc_tgt_ls_req {
+	void		*rspbuf;
+	dma_addr_t	rspdma;
+	u16		rsplen;
+
+	void (*done)(struct nvmefc_tgt_ls_req *req);
+	void *nvmet_fc_private;		/* LLDD is not to access !! */
+};
+
+/* Operations that NVME-FC layer may request the LLDD to perform for FCP */
+enum {
+	NVMET_FCOP_READDATA	= 1,	/* xmt data to initiator */
+	NVMET_FCOP_WRITEDATA	= 2,	/* xmt data from initiator */
+	NVMET_FCOP_READDATA_RSP	= 3,	/* xmt data to initiator and send
+					 * rsp as well
+					 */
+	NVMET_FCOP_RSP		= 4,	/* send rsp frame */
+	NVMET_FCOP_ABORT	= 5,	/* abort exchange via ABTS */
+	NVMET_FCOP_BA_ACC	= 6,	/* send BA_ACC */
+	NVMET_FCOP_BA_RJT	= 7,	/* send BA_RJT */
+};
+
+/**
+ * struct nvmefc_tgt_fcp_req - Structure used between LLDD and NVMET-FC
+ *                            layer to represent the exchange context and
+ *                            the specific FC-NVME IU operation(s) to perform
+ *                            for a FC-NVME FCP IO.
+ *
+ * Structure used between LLDD and nvmet-fc layer to represent the exchange
+ * context for a FC-NVME FCP I/O operation (e.g. a nvme sqe, the sqe-related
+ * memory transfers, and its assocated cqe transfer).
+ *
+ * The structure is allocated by the LLDD whenever a FCP CMD IU is received
+ * from the FC link. The address of the structure is passed to the nvmet-fc
+ * layer via the nvmet_fc_rcv_fcp_req() call. The address of the structure
+ * will be passed back to the LLDD for the data operations and transmit of
+ * the response. The LLDD is to use the address to map back to the LLDD
+ * exchange structure which maintains information such as the targetport
+ * the FCP I/O was received on, the remote FC NVME initiator that sent the
+ * FCP I/O, and any FC exchange context.  Upon completion of the FCP target
+ * operation, the address of the structure will be passed back to the FCP
+ * op done() routine, allowing the nvmet-fc layer to release dma resources.
+ * Upon completion of the done() routine for either RSP or ABORT ops, no
+ * further access will be made by the nvmet-fc layer and the LLDD can
+ * de-allocate the structure.
+ *
+ * Field initialization:
+ *   At the time of the nvmet_fc_rcv_fcp_req() call, there is no content that
+ *     is valid in the structure.
+ *
+ *   When the structure is used for an FCP target operation, the nvmet-fc
+ *     layer will fully set the fields in order to specify the scattergather
+ *     list, the transfer length, as well as the done routine to be called
+ *     upon compeletion of the operation.  The nvmet-fc layer will also set a
+ *     private pointer for its own use in the done routine.
+ *
+ * Note: the LLDD must never fail a NVMET_FCOP_ABORT request !!
+ *
+ * Values set by the NVMET-FC layer prior to calling the LLDD fcp_op
+ * entrypoint.
+ * @op:       Indicates the FCP IU operation to perform (see NVMET_FCOP_xxx)
+ * @hwqid:    Specifies the hw queue index (0..N-1, where N is the
+ *            max_hw_queues value from the LLD's nvmet_fc_target_template)
+ *            that the operation is to use.
+ * @offset:   Indicates the DATA_OUT/DATA_IN payload offset to be tranferred.
+ *            Field is only valid on WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @timeout:  amount of time, in seconds, to wait for a response from the NVME
+ *            host. A value of 0 is an infinite wait.
+ *            Valid only for the following ops:
+ *              WRITEDATA: caps the wait for data reception
+ *              READDATA_RSP & RSP: caps wait for FCP_CONF reception (if used)
+ * @transfer_length: the length, in bytes, of the DATA_OUT or DATA_IN payload
+ *            that is to be transferred.
+ *            Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @ba_rjt:   Contains the BA_RJT payload that is to be transferred.
+ *            Valid only for the NVMET_FCOP_BA_RJT op.
+ * @sg:       Scatter/gather list for the DATA_OUT/DATA_IN payload data.
+ *            Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @sg_cnt:   Number of valid entries in the scatter/gather list.
+ *            Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @rspaddr:  pointer to the FCP RSP IU buffer to be transmit
+ *            Used by RSP and READDATA_RSP ops
+ * @rspdma:   PCI DMA address of the FCP RSP IU buffer
+ *            Used by RSP and READDATA_RSP ops
+ * @rsplen:   Length, in bytes, of the FCP RSP IU buffer
+ *            Used by RSP and READDATA_RSP ops
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            the operation. req argument is the pointer to the original
+ *            FCP subsystem op request.
+ * @nvmet_fc_private:  pointer to an internal NVMET-FC layer structure used
+ *            as part of the NVMET-FC processing. The LLDD is not to
+ *            reference this field.
+ *
+ * Values set by the LLDD indicating completion status of the FCP operation.
+ * Must be set prior to calling the done() callback.
+ * @transferred_length: amount of DATA_OUT payload data received by a
+ *            a WRITEDATA operation. If not a WRITEDATA operation, value must
+ *            be set to 0. Should equal transfer_length on success.
+ * @fcp_error: status of the FCP operation. Must be 0 on success; on failure
+ *            must be a NVME_SC_FC_xxxx value.
+ */
+struct nvmefc_tgt_fcp_req {
+	u8			op;
+	u16			hwqid;
+	u32			offset;
+	u32			timeout;
+	u32			transfer_length;
+	struct fc_ba_rjt	ba_rjt;
+	struct scatterlist	sg[NVME_FC_MAX_SEGMENTS];
+	int			sg_cnt;
+	void			*rspaddr;
+	dma_addr_t		rspdma;
+	u16			rsplen;
+
+	void (*done)(struct nvmefc_tgt_fcp_req *);
+
+	void *nvmet_fc_private;		/* LLDD is not to access !! */
+
+	u32			transferred_length;
+	int			fcp_error;
+};
+
+
+/* Target Features (Bit fields) LLDD supports */
+enum {
+	NVMET_FCTGTFEAT_READDATA_RSP = (1 << 0),
+		/* Bit 0: supports the NVMET_FCPOP_READDATA_RSP op, which
+		 * sends (the last) Read Data sequence followed by the RSP
+		 * sequence in one LLDD operation. Errors during Data
+		 * sequence transmit must not allow RSP sequence to be sent.
+		 */
+	NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1),
+		/* Bit 1: When 0, the LLDD will deliver FCP CMD
+		 * on the CPU it should be affinitized to. Thus work will
+		 * be scheduled on the cpu received on. When 1, the LLDD
+		 * may not deliver the CMD on the CPU it should be worked
+		 * on. The transport should pick a cpu to schedule the work
+		 * on.
+		 */
+};
+
+
+/**
+ * struct nvmet_fc_target_port - structure used between NVME-FC transport and
+ *                 a LLDD to reference a local NVME subsystem port.
+ *                 Allocated/created by the nvme_fc_register_targetport()
+ *                 transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num:  NVME-FC transport subsytem port number
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @private:   pointer to memory allocated alongside the local port
+ *             structure that is specifically for the LLDD to use.
+ *             The length of the buffer corresponds to the target_priv_sz
+ *             value specified in the nvme_fc_target_template supplied by
+ *             the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link state. LLDD
+ * may reference fields directly to change them. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ * @port_state:   Operational state of the port.
+ */
+struct nvmet_fc_target_port {
+	/* static/read-only fields */
+	u32 port_num;
+	u64 node_name;
+	u64 port_name;
+
+	void *private;
+
+	/* dynamic fields */
+	u32 port_id;
+	enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvmet_fc_target_template - structure containing static entrypoints
+ *                 and operational parameters for an LLDD that supports NVME
+ *                 subsystem behavior. Passed by reference in port
+ *                 registrations. NVME-FC transport remembers template
+ *                 reference and may access it during runtime operation.
+ *
+ * Subsystem/Target Transport Entrypoints/Parameters:
+ *
+ * @targetport_delete:  The LLDD initiates deletion of a targetport via
+ *       nvmet_fc_unregister_targetport(). However, the teardown is
+ *       asynchronous. This routine is called upon the completion of the
+ *       teardown to inform the LLDD that the targetport has been deleted.
+ *       Entrypoint is Mandatory.
+ *
+ * @xmt_ls_rsp:  Called to transmit the response to a FC-NVME FC-4 LS service.
+ *       The nvmefc_tgt_ls_req structure is the same LLDD-supplied exchange
+ *       structure specified in the nvmet_fc_rcv_ls_req() call made when
+ *       the LS request was received.  The structure will fully describe
+ *       the buffers for the response payload and the dma address of the
+ *       payload. The LLDD is to transmit the response (or return a non-zero
+ *       errno status), and upon completion of the transmit, call the
+ *       "done" routine specified in the nvmefc_tgt_ls_req structure
+ *       (argument to done is the ls reqwuest structure itself).
+ *       After calling the done routine, the LLDD shall consider the
+ *       LS handling complete and the nvmefc_tgt_ls_req structure may
+ *       be freed/released.
+ *       Entrypoint is Mandatory.
+ *
+ * @fcp_op:  Called to perform a data transfer, transmit a response, or
+ *       abort an FCP opertion. The nvmefc_tgt_fcp_req structure is the same
+ *       LLDD-supplied exchange structure specified in the
+ *       nvmet_fc_rcv_fcp_req() call made when the FCP CMD IU was received.
+ *       The op field in the structure shall indicate the operation for
+ *       the LLDD to perform relative to the io.
+ *         NVMET_FCOP_READDATA operation: the LLDD is to send the
+ *           payload data (described by sglist) to the host in 1 or
+ *           more FC sequences (preferrably 1).  Note: the fc-nvme layer
+ *           may call the READDATA operation multiple times for longer
+ *           payloads.
+ *         NVMET_FCOP_WRITEDATA operation: the LLDD is to receive the
+ *           payload data (described by sglist) from the host via 1 or
+ *           more FC sequences (preferrably 1). The LLDD is to generate
+ *           the XFER_RDY IU(s) corresponding to the data being requested.
+ *           Note: the FC-NVME layer may call the WRITEDATA operation
+ *           multiple times for longer payloads.
+ *         NVMET_FCOP_READDATA_RSP operation: the LLDD is to send the
+ *           payload data (described by sglist) to the host in 1 or
+ *           more FC sequences (preferrably 1). If an error occurs during
+ *           payload data transmission, the LLDD is to set the
+ *           nvmefc_tgt_fcp_req fcp_error and transferred_length field, then
+ *           consider the operation complete. On error, the LLDD is to not
+ *           transmit the FCP_RSP iu. If all payload data is transferred
+ *           successfully, the LLDD is to update the nvmefc_tgt_fcp_req
+ *           transferred_length field and may subsequently transmit the
+ *           FCP_RSP iu payload (described by rspbuf, rspdma, rsplen).
+ *           The LLDD is to await FCP_CONF reception to confirm the RSP
+ *           reception by the host. The LLDD may retramsit the FCP_RSP iu
+ *           if necessary per FC-NVME. Upon reception of FCP_CONF, or upon
+ *           FCP_CONF failure, the LLDD is to set the nvmefc_tgt_fcp_req
+ *           fcp_error field and consider the operation complete..
+ *         NVMET_FCOP_RSP: the LLDD is to transmit the FCP_RSP iu payload
+ *           (described by rspbuf, rspdma, rsplen).  The LLDD is to await
+ *           FCP_CONF reception to confirm the RSP reception by the host.
+ *           The LLDD may retramsit the FCP_RSP iu if necessary per FC-NVME.
+ *           Upon reception of FCP_CONF, or upon FCP_CONF failure, the
+ *           LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and
+ *           consider the operation complete..
+ *         NVMET_FCOP_ABORT: the LLDD is to terminate the exchange
+ *           corresponding to the fcp operation. The LLDD shall send
+ *           ABTS and follow FC exchange abort-multi rules, including
+ *           ABTS retries and possible logout.
+ *       Upon completing the indicated operation, the LLDD is to set the
+ *       status fields for the operation (tranferred_length and fcp_error
+ *       status) in the request, then all the "done" routine
+ *       indicated in the fcp request.  Upon return from the "done"
+ *       routine for either a NVMET_FCOP_RSP or NVMET_FCOP_ABORT operation
+ *       the fc-nvme layer will not longer reference the fcp request,
+ *       allowing the LLDD to free/release the fcp request.
+ *       Note: when calling the done routine for READDATA or WRITEDATA
+ *       operations, the fc-nvme layer may immediate convert, in the same
+ *       thread and before returning to the LLDD, the fcp operation to
+ *       the next operation for the fcp io and call the LLDDs fcp_op
+ *       call again. If fields in the fcp request are to be accessed post
+ *       the done call, the LLDD should save their values prior to calling
+ *       the done routine, and inspect the save values after the done
+ *       routine.
+ *       Returns 0 on success, -<errno> on failure (Ex: -EIO)
+ *       Entrypoint is Mandatory.
+ *
+ * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
+ *       supports for cpu affinitization.
+ *       Value is Mandatory. Must be at least 1.
+ *
+ * @max_sgl_segments:  indicates the maximum number of sgl segments supported
+ *       by the LLDD
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @max_dif_sgl_segments:  indicates the maximum number of sgl segments
+ *       supported by the LLDD for DIF operations.
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @dma_boundary:  indicates the dma address boundary where dma mappings
+ *       will be split across.
+ *       Value is Mandatory. Typical value is 0xFFFFFFFF to split across
+ *       4Gig address boundarys
+ *
+ * @target_features: The LLDD sets bits in this field to correspond to
+ *       optional features that are supported by the LLDD.
+ *       Refer to the NVMET_FCTGTFEAT_xxx values.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @target_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a targetport is allocated.  The additional memory
+ *       area solely for the of the LLDD and its location is specified by
+ *       the targetport->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ */
+struct nvmet_fc_target_template {
+	void (*targetport_delete)(struct nvmet_fc_target_port *tgtport);
+	int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport,
+				struct nvmefc_tgt_ls_req *tls_req);
+	int (*fcp_op)(struct nvmet_fc_target_port *tgtport,
+				struct nvmefc_tgt_fcp_req *);
+
+	u32	max_hw_queues;
+	u16	max_sgl_segments;
+	u16	max_dif_sgl_segments;
+	u64	dma_boundary;
+
+	u32	target_features;
+
+	u32	target_priv_sz;
+};
+
+
+int nvmet_fc_register_targetport(struct nvmet_fc_port_info *portinfo,
+			struct nvmet_fc_target_template *template,
+			struct device *dev,
+			struct nvmet_fc_target_port **tgtport_p);
+
+int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *tgtport);
+
+int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *tgtport,
+			struct nvmefc_tgt_ls_req *lsreq,
+			void *lsreqbuf, u32 lsreqbuf_len);
+
+int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport,
+			struct nvmefc_tgt_fcp_req *fcpreq,
+			void *cmdiubuf, u32 cmdiubuf_len);
+
+#endif /* _NVME_FC_DRIVER_H */
-- 
cgit v1.2.3


From 7c4788950ba5922fde976d80b72baf46f14dee8d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 22 Nov 2016 10:57:15 +0100
Subject: x86/uaccess, sched/preempt: Verify access_ok() context

I recently encountered wreckage because access_ok() was used where it
should not be, add an explicit WARN when access_ok() is used wrongly.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uaccess.h | 13 +++++++++++--
 include/linux/preempt.h        | 21 +++++++++++++--------
 2 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index faf3687f1035..ea148313570f 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -68,6 +68,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
 	__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
 })
 
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+# define WARN_ON_IN_IRQ()	WARN_ON_ONCE(!in_task())
+#else
+# define WARN_ON_IN_IRQ()
+#endif
+
 /**
  * access_ok: - Checks if a user space pointer is valid
  * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE.  Note that
@@ -88,8 +94,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
  * checks that the pointer is in the user space range - after calling
  * this function, memory access functions may still return -EFAULT.
  */
-#define access_ok(type, addr, size) \
-	likely(!__range_not_ok(addr, size, user_addr_max()))
+#define access_ok(type, addr, size)					\
+({									\
+	WARN_ON_IN_IRQ();						\
+	likely(!__range_not_ok(addr, size, user_addr_max()));		\
+})
 
 /*
  * These are the main single-value transfer routines.  They automatically
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 75e4e30677f1..7eeceac52dea 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -65,19 +65,24 @@
 
 /*
  * Are we doing bottom half or hardware interrupt processing?
- * Are we in a softirq context? Interrupt context?
- * in_softirq - Are we currently processing softirq or have bh disabled?
- * in_serving_softirq - Are we currently processing softirq?
+ *
+ * in_irq()       - We're in (hard) IRQ context
+ * in_softirq()   - We have BH disabled, or are processing softirqs
+ * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled
+ * in_serving_softirq() - We're in softirq context
+ * in_nmi()       - We're in NMI context
+ * in_task()	  - We're in task context
+ *
+ * Note: due to the BH disabled confusion: in_softirq(),in_interrupt() really
+ *       should not be used in new code.
  */
 #define in_irq()		(hardirq_count())
 #define in_softirq()		(softirq_count())
 #define in_interrupt()		(irq_count())
 #define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
-
-/*
- * Are we in NMI context?
- */
-#define in_nmi()	(preempt_count() & NMI_MASK)
+#define in_nmi()		(preempt_count() & NMI_MASK)
+#define in_task()		(!(preempt_count() & \
+				   (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
 
 /*
  * The preempt_count offset after preempt_disable();
-- 
cgit v1.2.3


From f4ec57b632fe15ed7a355099cb96ed4b647416de Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 24 Nov 2016 12:46:26 +0100
Subject: locking/ww_mutex: Use relaxed atomics

The stamp is a sequence number, we don't care about memory ordering.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ww_mutex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 2bb5deb0012e..7b0066814fa0 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -120,7 +120,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
 				   struct ww_class *ww_class)
 {
 	ctx->task = current;
-	ctx->stamp = atomic_long_inc_return(&ww_class->stamp);
+	ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp);
 	ctx->acquired = 0;
 #ifdef CONFIG_DEBUG_MUTEXES
 	ctx->ww_class = ww_class;
-- 
cgit v1.2.3


From 8d7cc7adcec55bfeb28cc2dc58b996c95f180337 Mon Sep 17 00:00:00 2001
From: Martyn Welch <martyn@welchs.me.uk>
Date: Sat, 3 Dec 2016 23:45:01 +0000
Subject: VME: Remove shutdown entry from vme_driver

The vme_driver structure currently has a "shutdown" entry. This entry is
never used, it lacks the correct parameter (it should be providing a
pointer to the relevant vme_dev struct to even *look* usable), the VME
subsystem currently doesn't provide support for shutdown functions and no
in-tree drivers use it (hardly surprising, given it'd never be called).

Remove the entry from vme_driver to avoid confusion.

Signed-off-by: Martyn Welch <martyn.welch@collabora.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/vme.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vme.h b/include/linux/vme.h
index ea6095deba20..8c589176c2f8 100644
--- a/include/linux/vme.h
+++ b/include/linux/vme.h
@@ -113,7 +113,6 @@ struct vme_driver {
 	int (*match)(struct vme_dev *);
 	int (*probe)(struct vme_dev *);
 	int (*remove)(struct vme_dev *);
-	void (*shutdown)(void);
 	struct device_driver driver;
 	struct list_head devices;
 };
-- 
cgit v1.2.3


From 076802d0061520bc646ee121fe0f34fce6c55a93 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Sat, 3 Dec 2016 00:15:25 +0200
Subject: mei: bus: enable non-blocking RX

Enable non-blocking receive for drivers on mei bus, this allows checking
for data availability by mei client drivers. This is most effective for
fixed address clients, that lacks flow control.

This function adds new API function mei_cldev_recv_nonblock(), it
retuns -EGAIN if function will block.

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus-fixup.c |  4 ++--
 drivers/misc/mei/bus.c       | 31 +++++++++++++++++++++++++++++--
 drivers/misc/mei/mei_dev.h   |  7 ++++++-
 include/linux/mei_cl_bus.h   |  6 ++++--
 4 files changed, 41 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 7f2cef9011ae..18e05ca7584f 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -141,7 +141,7 @@ static int mei_osver(struct mei_cl_device *cldev)
 	if (ret < 0)
 		return ret;
 
-	ret = __mei_cl_recv(cldev->cl, buf, length);
+	ret = __mei_cl_recv(cldev->cl, buf, length, 0);
 	if (ret < 0)
 		return ret;
 
@@ -272,7 +272,7 @@ static int mei_nfc_if_version(struct mei_cl *cl,
 		return -ENOMEM;
 
 	ret = 0;
-	bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length);
+	bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length, 0);
 	if (bytes_recv < if_version_length) {
 		dev_err(bus->dev, "Could not read IF version\n");
 		ret = -EIO;
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 2fd254ecde2f..0037153c80a6 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -98,15 +98,18 @@ out:
  * @cl: host client
  * @buf: buffer to receive
  * @length: buffer length
+ * @mode: io mode
  *
  * Return: read size in bytes of < 0 on error
  */
-ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
+ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length,
+		      unsigned int mode)
 {
 	struct mei_device *bus;
 	struct mei_cl_cb *cb;
 	size_t r_length;
 	ssize_t rets;
+	bool nonblock = !!(mode & MEI_CL_IO_RX_NONBLOCK);
 
 	if (WARN_ON(!cl || !cl->dev))
 		return -ENODEV;
@@ -127,6 +130,11 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
 	if (rets && rets != -EBUSY)
 		goto out;
 
+	if (nonblock) {
+		rets = -EAGAIN;
+		goto out;
+	}
+
 	/* wait on event only if there is no other waiter */
 	/* synchronized under device mutex */
 	if (!waitqueue_active(&cl->rx_wait)) {
@@ -191,6 +199,25 @@ ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
 }
 EXPORT_SYMBOL_GPL(mei_cldev_send);
 
+/**
+ * mei_cldev_recv_nonblock - non block client receive (read)
+ *
+ * @cldev: me client device
+ * @buf: buffer to receive
+ * @length: buffer length
+ *
+ * Return: read size in bytes of < 0 on error
+ *         -EAGAIN if function will block.
+ */
+ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf,
+				size_t length)
+{
+	struct mei_cl *cl = cldev->cl;
+
+	return __mei_cl_recv(cl, buf, length, MEI_CL_IO_RX_NONBLOCK);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_recv_nonblock);
+
 /**
  * mei_cldev_recv - client receive (read)
  *
@@ -204,7 +231,7 @@ ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
 {
 	struct mei_cl *cl = cldev->cl;
 
-	return __mei_cl_recv(cl, buf, length);
+	return __mei_cl_recv(cl, buf, length, 0);
 }
 EXPORT_SYMBOL_GPL(mei_cldev_recv);
 
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index 0e94df517410..699693cd8c59 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -115,10 +115,14 @@ enum mei_cb_file_ops {
  *
  * @MEI_CL_IO_TX_BLOCKING: send is blocking
  * @MEI_CL_IO_TX_INTERNAL: internal communication between driver and FW
+ *
+ * @MEI_CL_IO_RX_NONBLOCK: recv is non-blocking
  */
 enum mei_cl_io_mode {
 	MEI_CL_IO_TX_BLOCKING = BIT(0),
 	MEI_CL_IO_TX_INTERNAL = BIT(1),
+
+	MEI_CL_IO_RX_NONBLOCK = BIT(2),
 };
 
 /*
@@ -319,7 +323,8 @@ void mei_cl_bus_rescan_work(struct work_struct *work);
 void mei_cl_bus_dev_fixup(struct mei_cl_device *dev);
 ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
 		      unsigned int mode);
-ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length);
+ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length,
+		      unsigned int mode);
 bool mei_cl_bus_rx_event(struct mei_cl *cl);
 bool mei_cl_bus_notify_event(struct mei_cl *cl);
 void mei_cl_bus_remove_devices(struct mei_device *bus);
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 017f5232b3de..a0d274fe08f1 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -75,7 +75,7 @@ void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv);
 /**
  * module_mei_cl_driver - Helper macro for registering mei cl driver
  *
- * @__mei_cldrv mei_cl_driver structure
+ * @__mei_cldrv: mei_cl_driver structure
  *
  *  Helper macro for mei cl drivers which do not do anything special in module
  *  init/exit, for eliminating a boilerplate code.
@@ -86,7 +86,9 @@ void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv);
 		      mei_cldev_driver_unregister)
 
 ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length);
-ssize_t  mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
+ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
+ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf,
+				size_t length);
 
 int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb);
 int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
-- 
cgit v1.2.3


From f45be72c8ec0b85263d1fe1e6c681d8c87e198e6 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Sat, 3 Dec 2016 12:34:29 -0800
Subject: hyperv: Fix spelling of HV_UNKOWN

Changed it to HV_UNKNOWN

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 6 +++---
 include/linux/hyperv.h    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index cbb96f2f0d1a..e5d1b0e6c922 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -134,7 +134,7 @@ static const struct vmbus_device vmbus_devs[] = {
 	},
 
 	/* Unknown GUID */
-	{ .dev_type = HV_UNKOWN,
+	{ .dev_type = HV_UNKNOWN,
 	  .perf_device = false,
 	},
 };
@@ -163,9 +163,9 @@ static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 	u16 i;
 
 	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
-		return HV_UNKOWN;
+		return HV_UNKNOWN;
 
-	for (i = HV_IDE; i < HV_UNKOWN; i++) {
+	for (i = HV_IDE; i < HV_UNKNOWN; i++) {
 		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
 			return i;
 	}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2a52d9abb305..35053f99522b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -696,7 +696,7 @@ enum vmbus_device_type {
 	HV_FCOPY,
 	HV_BACKUP,
 	HV_DM,
-	HV_UNKOWN,
+	HV_UNKNOWN,
 };
 
 struct vmbus_device {
-- 
cgit v1.2.3


From fc76936d3ea5720a6e0948a08381b803a68deb28 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <sthemmin@microsoft.com>
Date: Sat, 3 Dec 2016 12:34:39 -0800
Subject: vmbus: add support for dynamic device id's

This patch adds sysfs interface to dynamically bind new UUID values
to existing VMBus device. This is useful for generic UIO driver to
act similar to uio_pci_generic.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/vmbus_drv.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/hyperv.h |   6 ++
 2 files changed, 172 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 0276d2ef06ee..230c62e7f567 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -45,6 +45,11 @@
 #include <linux/random.h>
 #include "hyperv_vmbus.h"
 
+struct vmbus_dynid {
+	struct list_head node;
+	struct hv_vmbus_device_id id;
+};
+
 static struct acpi_device  *hv_acpi_dev;
 
 static struct completion probe_event;
@@ -500,7 +505,7 @@ static ssize_t device_show(struct device *dev,
 static DEVICE_ATTR_RO(device);
 
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
-static struct attribute *vmbus_attrs[] = {
+static struct attribute *vmbus_dev_attrs[] = {
 	&dev_attr_id.attr,
 	&dev_attr_state.attr,
 	&dev_attr_monitor_id.attr,
@@ -528,7 +533,7 @@ static struct attribute *vmbus_attrs[] = {
 	&dev_attr_device.attr,
 	NULL,
 };
-ATTRIBUTE_GROUPS(vmbus);
+ATTRIBUTE_GROUPS(vmbus_dev);
 
 /*
  * vmbus_uevent - add uevent for our device
@@ -565,10 +570,29 @@ static inline bool is_null_guid(const uuid_le *guid)
  * Return a matching hv_vmbus_device_id pointer.
  * If there is no match, return NULL.
  */
-static const struct hv_vmbus_device_id *hv_vmbus_get_id(
-					const struct hv_vmbus_device_id *id,
+static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
 					const uuid_le *guid)
 {
+	const struct hv_vmbus_device_id *id = NULL;
+	struct vmbus_dynid *dynid;
+
+	/* Look at the dynamic ids first, before the static ones */
+	spin_lock(&drv->dynids.lock);
+	list_for_each_entry(dynid, &drv->dynids.list, node) {
+		if (!uuid_le_cmp(dynid->id.guid, *guid)) {
+			id = &dynid->id;
+			break;
+		}
+	}
+	spin_unlock(&drv->dynids.lock);
+
+	if (id)
+		return id;
+
+	id = drv->id_table;
+	if (id == NULL)
+		return NULL; /* empty device table */
+
 	for (; !is_null_guid(&id->guid); id++)
 		if (!uuid_le_cmp(id->guid, *guid))
 			return id;
@@ -576,6 +600,134 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(
 	return NULL;
 }
 
+/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
+static int vmbus_add_dynid(struct hv_driver *drv, uuid_le *guid)
+{
+	struct vmbus_dynid *dynid;
+
+	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
+	if (!dynid)
+		return -ENOMEM;
+
+	dynid->id.guid = *guid;
+
+	spin_lock(&drv->dynids.lock);
+	list_add_tail(&dynid->node, &drv->dynids.list);
+	spin_unlock(&drv->dynids.lock);
+
+	return driver_attach(&drv->driver);
+}
+
+static void vmbus_free_dynids(struct hv_driver *drv)
+{
+	struct vmbus_dynid *dynid, *n;
+
+	spin_lock(&drv->dynids.lock);
+	list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
+		list_del(&dynid->node);
+		kfree(dynid);
+	}
+	spin_unlock(&drv->dynids.lock);
+}
+
+/* Parse string of form: 1b4e28ba-2fa1-11d2-883f-b9a761bde3f */
+static int get_uuid_le(const char *str, uuid_le *uu)
+{
+	unsigned int b[16];
+	int i;
+
+	if (strlen(str) < 37)
+		return -1;
+
+	for (i = 0; i < 36; i++) {
+		switch (i) {
+		case 8: case 13: case 18: case 23:
+			if (str[i] != '-')
+				return -1;
+			break;
+		default:
+			if (!isxdigit(str[i]))
+				return -1;
+		}
+	}
+
+	/* unparse little endian output byte order */
+	if (sscanf(str,
+		   "%2x%2x%2x%2x-%2x%2x-%2x%2x-%2x%2x-%2x%2x%2x%2x%2x%2x",
+		   &b[3], &b[2], &b[1], &b[0],
+		   &b[5], &b[4], &b[7], &b[6], &b[8], &b[9],
+		   &b[10], &b[11], &b[12], &b[13], &b[14], &b[15]) != 16)
+		return -1;
+
+	for (i = 0; i < 16; i++)
+		uu->b[i] = b[i];
+	return 0;
+}
+
+/*
+ * store_new_id - sysfs frontend to vmbus_add_dynid()
+ *
+ * Allow GUIDs to be added to an existing driver via sysfs.
+ */
+static ssize_t new_id_store(struct device_driver *driver, const char *buf,
+			    size_t count)
+{
+	struct hv_driver *drv = drv_to_hv_drv(driver);
+	uuid_le guid = NULL_UUID_LE;
+	ssize_t retval;
+
+	if (get_uuid_le(buf, &guid) != 0)
+		return -EINVAL;
+
+	if (hv_vmbus_get_id(drv, &guid))
+		return -EEXIST;
+
+	retval = vmbus_add_dynid(drv, &guid);
+	if (retval)
+		return retval;
+	return count;
+}
+static DRIVER_ATTR_WO(new_id);
+
+/*
+ * store_remove_id - remove a PCI device ID from this driver
+ *
+ * Removes a dynamic pci device ID to this driver.
+ */
+static ssize_t remove_id_store(struct device_driver *driver, const char *buf,
+			       size_t count)
+{
+	struct hv_driver *drv = drv_to_hv_drv(driver);
+	struct vmbus_dynid *dynid, *n;
+	uuid_le guid = NULL_UUID_LE;
+	size_t retval = -ENODEV;
+
+	if (get_uuid_le(buf, &guid))
+		return -EINVAL;
+
+	spin_lock(&drv->dynids.lock);
+	list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
+		struct hv_vmbus_device_id *id = &dynid->id;
+
+		if (!uuid_le_cmp(id->guid, guid)) {
+			list_del(&dynid->node);
+			kfree(dynid);
+			retval = count;
+			break;
+		}
+	}
+	spin_unlock(&drv->dynids.lock);
+
+	return retval;
+}
+static DRIVER_ATTR_WO(remove_id);
+
+static struct attribute *vmbus_drv_attrs[] = {
+	&driver_attr_new_id.attr,
+	&driver_attr_remove_id.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vmbus_drv);
 
 
 /*
@@ -590,7 +742,7 @@ static int vmbus_match(struct device *device, struct device_driver *driver)
 	if (is_hvsock_channel(hv_dev->channel))
 		return drv->hvsock;
 
-	if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type))
+	if (hv_vmbus_get_id(drv, &hv_dev->dev_type))
 		return 1;
 
 	return 0;
@@ -607,7 +759,7 @@ static int vmbus_probe(struct device *child_device)
 	struct hv_device *dev = device_to_hv_device(child_device);
 	const struct hv_vmbus_device_id *dev_id;
 
-	dev_id = hv_vmbus_get_id(drv->id_table, &dev->dev_type);
+	dev_id = hv_vmbus_get_id(drv, &dev->dev_type);
 	if (drv->probe) {
 		ret = drv->probe(dev, dev_id);
 		if (ret != 0)
@@ -684,7 +836,8 @@ static struct bus_type  hv_bus = {
 	.remove =		vmbus_remove,
 	.probe =		vmbus_probe,
 	.uevent =		vmbus_uevent,
-	.dev_groups =		vmbus_groups,
+	.dev_groups =		vmbus_dev_groups,
+	.drv_groups =		vmbus_drv_groups,
 };
 
 struct onmessage_work_context {
@@ -905,6 +1058,9 @@ int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, c
 	hv_driver->driver.mod_name = mod_name;
 	hv_driver->driver.bus = &hv_bus;
 
+	spin_lock_init(&hv_driver->dynids.lock);
+	INIT_LIST_HEAD(&hv_driver->dynids.list);
+
 	ret = driver_register(&hv_driver->driver);
 
 	return ret;
@@ -923,8 +1079,10 @@ void vmbus_driver_unregister(struct hv_driver *hv_driver)
 {
 	pr_info("unregistering driver %s\n", hv_driver->name);
 
-	if (!vmbus_exists())
+	if (!vmbus_exists()) {
 		driver_unregister(&hv_driver->driver);
+		vmbus_free_dynids(hv_driver);
+	}
 }
 EXPORT_SYMBOL_GPL(vmbus_driver_unregister);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 35053f99522b..42fe43fb0c80 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1119,6 +1119,12 @@ struct hv_driver {
 
 	struct device_driver driver;
 
+	/* dynamic device GUID's */
+	struct  {
+		spinlock_t lock;
+		struct list_head list;
+	} dynids;
+
 	int (*probe)(struct hv_device *, const struct hv_vmbus_device_id *);
 	int (*remove)(struct hv_device *);
 	void (*shutdown)(struct hv_device *);
-- 
cgit v1.2.3


From 18b709beb503bfc9a96a2e4b93d3cae4f5b17df0 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 6 Dec 2016 14:20:11 +0000
Subject: ACPI/IORT: Make dma masks set-up IORT specific

The introduction of acpi_dma_configure() allows to configure DMA
and related IOMMU for any device that is DMA capable. To achieve
that goal it ensures DMA masks are set-up to sane default values
before proceeding with IOMMU and DMA ops configuration.

On x86/ia64 systems, through acpi_bind_one(), acpi_dma_configure() is
called for every device that has an ACPI companion, in that every device
is considered DMA capable on x86/ia64 systems (ie acpi_get_dma_attr() API),
which has the side effect of initializing dma masks also for
pseudo-devices (eg CPUs and memory nodes) and potentially for devices
whose dma masks were not set-up before the acpi_dma_configure() API was
introduced, which may have noxious side effects.

Therefore, in preparation for IORT firmware specific DMA masks set-up,
wrap the default DMA masks set-up in acpi_dma_configure() inside an IORT
specific wrapper that reverts to a NOP on x86/ia64 systems, restoring the
default expected behaviour on x86/ia64 systems and keeping DMA default
masks set-up on IORT based (ie ARM) arch configurations.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Tomasz Nowicki <tn@semihalf.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/acpi/arm64/iort.c | 22 ++++++++++++++++++++++
 drivers/acpi/scan.c       | 14 +-------------
 include/linux/acpi_iort.h |  2 ++
 3 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 47bace8eafb6..e0d2e6e6e40c 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -546,6 +546,28 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
 	return ret ? NULL : ops;
 }
 
+/**
+ * iort_set_dma_mask - Set-up dma mask for a device.
+ *
+ * @dev: device to configure
+ */
+void iort_set_dma_mask(struct device *dev)
+{
+	/*
+	 * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
+	 * setup the correct supported mask.
+	 */
+	if (!dev->coherent_dma_mask)
+		dev->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	/*
+	 * Set it to coherent_dma_mask by default if the architecture
+	 * code has not set it.
+	 */
+	if (!dev->dma_mask)
+		dev->dma_mask = &dev->coherent_dma_mask;
+}
+
 /**
  * iort_iommu_configure - Set-up IOMMU configuration for a device.
  *
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 80698d3c5feb..93b00cf4eb39 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1380,19 +1380,7 @@ void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
 {
 	const struct iommu_ops *iommu;
 
-	/*
-	 * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
-	 * setup the correct supported mask.
-	 */
-	if (!dev->coherent_dma_mask)
-		dev->coherent_dma_mask = DMA_BIT_MASK(32);
-
-	/*
-	 * Set it to coherent_dma_mask by default if the architecture
-	 * code has not set it.
-	 */
-	if (!dev->dma_mask)
-		dev->dma_mask = &dev->coherent_dma_mask;
+	iort_set_dma_mask(dev);
 
 	iommu = iort_iommu_configure(dev);
 
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index dcb2b601e152..77e08099e554 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -35,6 +35,7 @@ bool iort_node_match(u8 type);
 u32 iort_msi_map_rid(struct device *dev, u32 req_id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
 /* IOMMU interface */
+void iort_set_dma_mask(struct device *dev);
 const struct iommu_ops *iort_iommu_configure(struct device *dev);
 #else
 static inline void acpi_iort_init(void) { }
@@ -45,6 +46,7 @@ static inline struct irq_domain *iort_get_device_domain(struct device *dev,
 							u32 req_id)
 { return NULL; }
 /* IOMMU interface */
+static inline void iort_set_dma_mask(struct device *dev) { }
 static inline
 const struct iommu_ops *iort_iommu_configure(struct device *dev)
 { return NULL; }
-- 
cgit v1.2.3


From 13983eb89d5afaa65acd4479fad151cbd4de5509 Mon Sep 17 00:00:00 2001
From: Tomasz Nowicki <tn@semihalf.com>
Date: Fri, 9 Sep 2016 21:24:03 +0200
Subject: PCI/ACPI: Extend pci_mcfg_lookup() to return ECAM config accessors

pci_mcfg_lookup() is the external interface to the generic MCFG code.
Previously it merely looked up the ECAM base address for a given domain and
bus range.  We want a way to add MCFG quirks, some of which may require
special config accessors and adjustments to the ECAM address range.

Extend pci_mcfg_lookup() so it can return a pointer to a pci_ecam_ops
structure and a struct resource for the ECAM address space.  For now, it
always returns &pci_generic_ecam_ops (the standard accessor) and the
resource described by the MCFG.

No functional changes intended.

[bhelgaas: changelog]
Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/arm64/kernel/pci.c  | 17 +++++------------
 drivers/acpi/pci_mcfg.c  | 28 +++++++++++++++++++++++++---
 include/linux/pci-acpi.h |  4 +++-
 3 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 266a7b2d4184..4f0e3ebfea4b 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -137,25 +137,18 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
 	struct device *dev = &root->device->dev;
 	struct resource *bus_res = &root->secondary;
 	u16 seg = root->segment;
+	struct pci_ecam_ops *ecam_ops;
 	struct resource cfgres;
 	struct acpi_device *adev;
 	struct pci_config_window *cfg;
-	unsigned int bsz;
+	int ret;
 
-	/* Use address from _CBA if present, otherwise lookup MCFG */
-	if (!root->mcfg_addr)
-		root->mcfg_addr = pci_mcfg_lookup(seg, bus_res);
-
-	if (!root->mcfg_addr) {
+	ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops);
+	if (ret) {
 		dev_err(dev, "%04x:%pR ECAM region not found\n", seg, bus_res);
 		return NULL;
 	}
 
-	bsz = 1 << pci_generic_ecam_ops.bus_shift;
-	cfgres.start = root->mcfg_addr + bus_res->start * bsz;
-	cfgres.end = cfgres.start + resource_size(bus_res) * bsz - 1;
-	cfgres.flags = IORESOURCE_MEM;
-
 	adev = acpi_resource_consumer(&cfgres);
 	if (adev)
 		dev_info(dev, "ECAM area %pR reserved by %s\n", &cfgres,
@@ -164,7 +157,7 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
 		dev_warn(dev, FW_BUG "ECAM area %pR not reserved in ACPI namespace\n",
 			 &cfgres);
 
-	cfg = pci_ecam_create(dev, &cfgres, bus_res, &pci_generic_ecam_ops);
+	cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
 	if (IS_ERR(cfg)) {
 		dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res,
 			PTR_ERR(cfg));
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index b5b376e081f5..ffcc6513e851 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
 
 /* Structure to hold entries from the MCFG table */
 struct mcfg_entry {
@@ -35,9 +36,18 @@ struct mcfg_entry {
 /* List to save MCFG entries */
 static LIST_HEAD(pci_mcfg_list);
 
-phys_addr_t pci_mcfg_lookup(u16 seg, struct resource *bus_res)
+int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,
+		    struct pci_ecam_ops **ecam_ops)
 {
+	struct pci_ecam_ops *ops = &pci_generic_ecam_ops;
+	struct resource *bus_res = &root->secondary;
+	u16 seg = root->segment;
 	struct mcfg_entry *e;
+	struct resource res;
+
+	/* Use address from _CBA if present, otherwise lookup MCFG */
+	if (root->mcfg_addr)
+		goto skip_lookup;
 
 	/*
 	 * We expect exact match, unless MCFG entry end bus covers more than
@@ -45,10 +55,22 @@ phys_addr_t pci_mcfg_lookup(u16 seg, struct resource *bus_res)
 	 */
 	list_for_each_entry(e, &pci_mcfg_list, list) {
 		if (e->segment == seg && e->bus_start == bus_res->start &&
-		    e->bus_end >= bus_res->end)
-			return e->addr;
+		    e->bus_end >= bus_res->end) {
+			root->mcfg_addr = e->addr;
+		}
+
 	}
 
+	if (!root->mcfg_addr)
+		return -ENXIO;
+
+skip_lookup:
+	memset(&res, 0, sizeof(res));
+	res.start = root->mcfg_addr + (bus_res->start << 20);
+	res.end = res.start + (resource_size(bus_res) << 20) - 1;
+	res.flags = IORESOURCE_MEM;
+	*cfgres = res;
+	*ecam_ops = ops;
 	return 0;
 }
 
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 7d63a66e8ed4..7a4e83a8c89c 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -24,7 +24,9 @@ static inline acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev)
 }
 extern phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle);
 
-extern phys_addr_t pci_mcfg_lookup(u16 domain, struct resource *bus_res);
+struct pci_ecam_ops;
+extern int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,
+			   struct pci_ecam_ops **ecam_ops);
 
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 {
-- 
cgit v1.2.3


From 2ca5b8ddc6f70d77a51851ba5e5cd0d39c27dd88 Mon Sep 17 00:00:00 2001
From: Christopher Covington <cov@codeaurora.org>
Date: Wed, 2 Nov 2016 11:11:27 -0500
Subject: PCI: Add MCFG quirks for Qualcomm QDF2432 host controller

The Qualcomm Technologies QDF2432 SoC does not support accesses smaller
than 32 bits to the PCI configuration space.  Register the appropriate
quirk.

[bhelgaas: add QCOM_ECAM32 macro, ifdef for ACPI and PCI_QUIRKS]
Signed-off-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/acpi/pci_mcfg.c  | 11 +++++++++++
 drivers/pci/ecam.c       | 12 ++++++++++++
 include/linux/pci-ecam.h |  4 ++++
 3 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index 1ef72857b710..cee33b078d3d 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -51,6 +51,17 @@ struct mcfg_fixup {
 
 static struct mcfg_fixup mcfg_quirks[] = {
 /*	{ OEM_ID, OEM_TABLE_ID, REV, SEGMENT, BUS_RANGE, ops, cfgres }, */
+
+#define QCOM_ECAM32(seg) \
+	{ "QCOM  ", "QDF2432 ", 1, seg, MCFG_BUS_ANY, &pci_32b_ops }
+	QCOM_ECAM32(0),
+	QCOM_ECAM32(1),
+	QCOM_ECAM32(2),
+	QCOM_ECAM32(3),
+	QCOM_ECAM32(4),
+	QCOM_ECAM32(5),
+	QCOM_ECAM32(6),
+	QCOM_ECAM32(7),
 };
 
 static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c
index 43ed08dd8b01..2fee61bb6559 100644
--- a/drivers/pci/ecam.c
+++ b/drivers/pci/ecam.c
@@ -162,3 +162,15 @@ struct pci_ecam_ops pci_generic_ecam_ops = {
 		.write		= pci_generic_config_write,
 	}
 };
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+/* ECAM ops for 32-bit access only (non-compliant) */
+struct pci_ecam_ops pci_32b_ops = {
+	.bus_shift	= 20,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= pci_generic_config_read32,
+		.write		= pci_generic_config_write32,
+	}
+};
+#endif
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index 7adad206b1f4..739d2330fd9e 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -59,6 +59,10 @@ void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn,
 /* default ECAM ops */
 extern struct pci_ecam_ops pci_generic_ecam_ops;
 
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+extern struct pci_ecam_ops pci_32b_ops;		/* 32-bit accesses only */
+#endif
+
 #ifdef CONFIG_PCI_HOST_GENERIC
 /* for DT-based PCI controllers that support ECAM */
 int pci_host_common_probe(struct platform_device *pdev,
-- 
cgit v1.2.3


From 5f00f1a0178cf52928366a5e1f376a65f1f3f389 Mon Sep 17 00:00:00 2001
From: Dongdong Liu <liudongdong3@huawei.com>
Date: Thu, 1 Dec 2016 00:45:35 -0600
Subject: PCI: Add MCFG quirks for HiSilicon Hip05/06/07 host controllers

The PCIe controller in Hip05/Hip06/Hip07 SoCs is not completely
ECAM-compliant.  It is non-ECAM only for the RC bus config space; for any
other bus underneath the root bus it does support ECAM access.

Add specific quirks for PCI config space accessors.  This involves:
1. New initialization call hisi_pcie_init() to obtain RC base
addresses from PNP0C02 at the root of the ACPI namespace (under \_SB).
2. New entry in common quirk array.

[bhelgaas: move to pcie-hisi.c and change Makefile/ifdefs so quirk doesn't
depend on CONFIG_PCI_HISI]
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
Signed-off-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/acpi/pci_mcfg.c      |  12 +++++
 drivers/pci/host/Makefile    |   2 +-
 drivers/pci/host/pcie-hisi.c | 101 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci-ecam.h     |   1 +
 4 files changed, 115 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index cee33b078d3d..dd162248c3ee 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -62,6 +62,18 @@ static struct mcfg_fixup mcfg_quirks[] = {
 	QCOM_ECAM32(5),
 	QCOM_ECAM32(6),
 	QCOM_ECAM32(7),
+
+#define HISI_QUAD_DOM(table_id, seg, ops) \
+	{ "HISI  ", table_id, 0, (seg) + 0, MCFG_BUS_ANY, ops }, \
+	{ "HISI  ", table_id, 0, (seg) + 1, MCFG_BUS_ANY, ops }, \
+	{ "HISI  ", table_id, 0, (seg) + 2, MCFG_BUS_ANY, ops }, \
+	{ "HISI  ", table_id, 0, (seg) + 3, MCFG_BUS_ANY, ops }
+	HISI_QUAD_DOM("HIP05   ",  0, &hisi_pcie_ops),
+	HISI_QUAD_DOM("HIP06   ",  0, &hisi_pcie_ops),
+	HISI_QUAD_DOM("HIP07   ",  0, &hisi_pcie_ops),
+	HISI_QUAD_DOM("HIP07   ",  4, &hisi_pcie_ops),
+	HISI_QUAD_DOM("HIP07   ",  8, &hisi_pcie_ops),
+	HISI_QUAD_DOM("HIP07   ", 12, &hisi_pcie_ops),
 };
 
 static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index 084cb4983645..64845f0d6138 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -25,7 +25,7 @@ obj-$(CONFIG_PCIE_IPROC_PLATFORM) += pcie-iproc-platform.o
 obj-$(CONFIG_PCIE_IPROC_BCMA) += pcie-iproc-bcma.o
 obj-$(CONFIG_PCIE_ALTERA) += pcie-altera.o
 obj-$(CONFIG_PCIE_ALTERA_MSI) += pcie-altera-msi.o
-obj-$(CONFIG_PCI_HISI) += pcie-hisi.o
+obj-$(CONFIG_ARM64) += pcie-hisi.o
 obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o
 obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o
 obj-$(CONFIG_PCI_HOST_THUNDER_PEM) += pci-thunder-pem.o
diff --git a/drivers/pci/host/pcie-hisi.c b/drivers/pci/host/pcie-hisi.c
index 56154c25980c..9bfa1ab27365 100644
--- a/drivers/pci/host/pcie-hisi.c
+++ b/drivers/pci/host/pcie-hisi.c
@@ -18,7 +18,106 @@
 #include <linux/of_pci.h>
 #include <linux/platform_device.h>
 #include <linux/of_device.h>
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
 #include <linux/regmap.h>
+#include "../pci.h"
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+
+static int hisi_pcie_acpi_rd_conf(struct pci_bus *bus, u32 devfn, int where,
+				  int size, u32 *val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	int dev = PCI_SLOT(devfn);
+
+	if (bus->number == cfg->busr.start) {
+		/* access only one slot on each root port */
+		if (dev > 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		else
+			return pci_generic_config_read32(bus, devfn, where,
+							 size, val);
+	}
+
+	return pci_generic_config_read(bus, devfn, where, size, val);
+}
+
+static int hisi_pcie_acpi_wr_conf(struct pci_bus *bus, u32 devfn,
+				  int where, int size, u32 val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	int dev = PCI_SLOT(devfn);
+
+	if (bus->number == cfg->busr.start) {
+		/* access only one slot on each root port */
+		if (dev > 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		else
+			return pci_generic_config_write32(bus, devfn, where,
+							  size, val);
+	}
+
+	return pci_generic_config_write(bus, devfn, where, size, val);
+}
+
+static void __iomem *hisi_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
+				       int where)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	void __iomem *reg_base = cfg->priv;
+
+	if (bus->number == cfg->busr.start)
+		return reg_base + where;
+	else
+		return pci_ecam_map_bus(bus, devfn, where);
+}
+
+static int hisi_pcie_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct acpi_device *adev = to_acpi_device(dev);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+	struct resource *res;
+	void __iomem *reg_base;
+	int ret;
+
+	/*
+	 * Retrieve RC base and size from a HISI0081 device with _UID
+	 * matching our segment.
+	 */
+	res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	ret = acpi_get_rc_resources(dev, "HISI0081", root->segment, res);
+	if (ret) {
+		dev_err(dev, "can't get rc base address\n");
+		return -ENOMEM;
+	}
+
+	reg_base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!reg_base)
+		return -ENOMEM;
+
+	cfg->priv = reg_base;
+	return 0;
+}
+
+struct pci_ecam_ops hisi_pcie_ops = {
+	.bus_shift    = 20,
+	.init         =  hisi_pcie_init,
+	.pci_ops      = {
+		.map_bus    = hisi_pcie_map_bus,
+		.read       = hisi_pcie_acpi_rd_conf,
+		.write      = hisi_pcie_acpi_wr_conf,
+	}
+};
+
+#endif
+
+#ifdef CONFIG_PCI_HISI
 
 #include "pcie-designware.h"
 
@@ -227,3 +326,5 @@ static struct platform_driver hisi_pcie_driver = {
 	},
 };
 builtin_platform_driver(hisi_pcie_driver);
+
+#endif
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index 739d2330fd9e..bdacbc883a22 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -61,6 +61,7 @@ extern struct pci_ecam_ops pci_generic_ecam_ops;
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
 extern struct pci_ecam_ops pci_32b_ops;		/* 32-bit accesses only */
+extern struct pci_ecam_ops hisi_pcie_ops;	/* HiSilicon */
 #endif
 
 #ifdef CONFIG_PCI_HOST_GENERIC
-- 
cgit v1.2.3


From 44f22bd91e88f9a1203a6e564a237e593f5f2f74 Mon Sep 17 00:00:00 2001
From: Tomasz Nowicki <tn@semihalf.com>
Date: Thu, 1 Dec 2016 00:07:56 -0600
Subject: PCI: Add MCFG quirks for Cavium ThunderX pass2.x host controller

ThunderX PCIe controller to off-chip devices (so-called PEM) is not fully
compliant with ECAM standard. It uses non-standard configuration space
accessors (see thunder_pem_ecam_ops) and custom configuration space
granulation (see bus_shift = 24). In order to access configuration space
and probe PEM as ACPI-based PCI host controller we need to add MCFG quirk
infrastructure. This involves:
1. A new thunder_pem_acpi_init() init function to locate PEM-specific
   register ranges using ACPI.
2. Export PEM thunder_pem_ecam_ops structure so it is visible to MCFG quirk
   code.
3. New quirk entries for each PEM segment. Each contains platform IDs,
   mentioned thunder_pem_ecam_ops and CFG resources.

Quirk is considered for ThunderX silicon pass2.x only which is identified
via MCFG revision 1.

ThunderX pass 2.x requires the following accessors:

  NUMA Node 0 PCI segments  0- 3: pci_generic_ecam_ops (ECAM-compliant)
  NUMA Node 0 PCI segments  4- 9: thunder_pem_ecam_ops (MCFG quirk)
  NUMA Node 1 PCI segments 10-13: pci_generic_ecam_ops (ECAM-compliant)
  NUMA Node 1 PCI segments 14-19: thunder_pem_ecam_ops (MCFG quirk)

[bhelgaas: adapt to use acpi_get_rc_resources(), update Makefile/ifdefs so
quirk doesn't depend on CONFIG_PCI_HOST_THUNDER_PEM]
Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/acpi/pci_mcfg.c            | 19 ++++++++++++++++
 drivers/pci/host/Kconfig           |  3 ++-
 drivers/pci/host/Makefile          |  2 +-
 drivers/pci/host/pci-thunder-pem.c | 44 ++++++++++++++++++++++++++++++++++++++
 include/linux/pci-ecam.h           |  1 +
 5 files changed, 67 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index dd162248c3ee..17cbb07ce16c 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -74,6 +74,25 @@ static struct mcfg_fixup mcfg_quirks[] = {
 	HISI_QUAD_DOM("HIP07   ",  4, &hisi_pcie_ops),
 	HISI_QUAD_DOM("HIP07   ",  8, &hisi_pcie_ops),
 	HISI_QUAD_DOM("HIP07   ", 12, &hisi_pcie_ops),
+
+#define THUNDER_PEM_RES(addr, node) \
+	DEFINE_RES_MEM((addr) + ((u64) (node) << 44), 0x39 * SZ_16M)
+#define THUNDER_PEM_QUIRK(rev, node) \
+	{ "CAVIUM", "THUNDERX", rev, 4 + (10 * (node)), MCFG_BUS_ANY,	    \
+	  &thunder_pem_ecam_ops, THUNDER_PEM_RES(0x88001f000000UL, node) },  \
+	{ "CAVIUM", "THUNDERX", rev, 5 + (10 * (node)), MCFG_BUS_ANY,	    \
+	  &thunder_pem_ecam_ops, THUNDER_PEM_RES(0x884057000000UL, node) },  \
+	{ "CAVIUM", "THUNDERX", rev, 6 + (10 * (node)), MCFG_BUS_ANY,	    \
+	  &thunder_pem_ecam_ops, THUNDER_PEM_RES(0x88808f000000UL, node) },  \
+	{ "CAVIUM", "THUNDERX", rev, 7 + (10 * (node)), MCFG_BUS_ANY,	    \
+	  &thunder_pem_ecam_ops, THUNDER_PEM_RES(0x89001f000000UL, node) },  \
+	{ "CAVIUM", "THUNDERX", rev, 8 + (10 * (node)), MCFG_BUS_ANY,	    \
+	  &thunder_pem_ecam_ops, THUNDER_PEM_RES(0x894057000000UL, node) },  \
+	{ "CAVIUM", "THUNDERX", rev, 9 + (10 * (node)), MCFG_BUS_ANY,	    \
+	  &thunder_pem_ecam_ops, THUNDER_PEM_RES(0x89808f000000UL, node) }
+	/* SoC pass2.x */
+	THUNDER_PEM_QUIRK(1, 0),
+	THUNDER_PEM_QUIRK(1, 1),
 };
 
 static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index d7e7c0a827c3..1239a8e63368 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -240,7 +240,8 @@ config PCIE_QCOM
 
 config PCI_HOST_THUNDER_PEM
 	bool "Cavium Thunder PCIe controller to off-chip devices"
-	depends on OF && ARM64
+	depends on ARM64
+	depends on OF || (ACPI && PCI_QUIRKS)
 	select PCI_HOST_COMMON
 	help
 	  Say Y here if you want PCIe support for CN88XX Cavium Thunder SoCs.
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index 64845f0d6138..97e6bfc55772 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -28,7 +28,7 @@ obj-$(CONFIG_PCIE_ALTERA_MSI) += pcie-altera-msi.o
 obj-$(CONFIG_ARM64) += pcie-hisi.o
 obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o
 obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o
-obj-$(CONFIG_PCI_HOST_THUNDER_PEM) += pci-thunder-pem.o
+obj-$(CONFIG_ARM64) += pci-thunder-pem.o
 obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
 obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o
 obj-$(CONFIG_PCIE_ROCKCHIP) += pcie-rockchip.o
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
index c3276eede82a..af722eb0ca75 100644
--- a/drivers/pci/host/pci-thunder-pem.c
+++ b/drivers/pci/host/pci-thunder-pem.c
@@ -18,8 +18,12 @@
 #include <linux/init.h>
 #include <linux/of_address.h>
 #include <linux/of_pci.h>
+#include <linux/pci-acpi.h>
 #include <linux/pci-ecam.h>
 #include <linux/platform_device.h>
+#include "../pci.h"
+
+#if defined(CONFIG_PCI_HOST_THUNDER_PEM) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
 
 #define PEM_CFG_WR 0x28
 #define PEM_CFG_RD 0x30
@@ -313,6 +317,43 @@ static int thunder_pem_init(struct device *dev, struct pci_config_window *cfg,
 	return 0;
 }
 
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+
+static int thunder_pem_acpi_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct acpi_device *adev = to_acpi_device(dev);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+	struct resource *res_pem;
+	int ret;
+
+	res_pem = devm_kzalloc(&adev->dev, sizeof(*res_pem), GFP_KERNEL);
+	if (!res_pem)
+		return -ENOMEM;
+
+	ret = acpi_get_rc_resources(dev, "THRX0002", root->segment, res_pem);
+	if (ret) {
+		dev_err(dev, "can't get rc base address\n");
+		return ret;
+	}
+
+	return thunder_pem_init(dev, cfg, res_pem);
+}
+
+struct pci_ecam_ops thunder_pem_ecam_ops = {
+	.bus_shift	= 24,
+	.init		= thunder_pem_acpi_init,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= thunder_pem_config_read,
+		.write		= thunder_pem_config_write,
+	}
+};
+
+#endif
+
+#ifdef CONFIG_PCI_HOST_THUNDER_PEM
+
 static int thunder_pem_platform_init(struct pci_config_window *cfg)
 {
 	struct device *dev = cfg->parent;
@@ -364,3 +405,6 @@ static struct platform_driver thunder_pem_driver = {
 	.probe = thunder_pem_probe,
 };
 builtin_platform_driver(thunder_pem_driver);
+
+#endif
+#endif
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index bdacbc883a22..e88d7db74200 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -62,6 +62,7 @@ extern struct pci_ecam_ops pci_generic_ecam_ops;
 #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
 extern struct pci_ecam_ops pci_32b_ops;		/* 32-bit accesses only */
 extern struct pci_ecam_ops hisi_pcie_ops;	/* HiSilicon */
+extern struct pci_ecam_ops thunder_pem_ecam_ops; /* Cavium ThunderX 2.x */
 #endif
 
 #ifdef CONFIG_PCI_HOST_GENERIC
-- 
cgit v1.2.3


From 648d93fc77da4f655cf13108417f33c91d745e2c Mon Sep 17 00:00:00 2001
From: Tomasz Nowicki <tn@semihalf.com>
Date: Wed, 30 Nov 2016 23:16:34 -0600
Subject: PCI: Add MCFG quirks for Cavium ThunderX pass1.x host controller

ThunderX pass1.x requires to emulate the EA headers for on-chip devices
hence it has to use custom pci_thunder_ecam_ops for accessing PCI config
space (pci-thunder-ecam.c). Add new entries to MCFG quirk array where it
can be applied while probing ACPI based PCI host controller.

ThunderX pass1.x is using the same way for accessing off-chip devices
(so-called PEM) as silicon pass-2.x so we need to add PEM quirk entries
too.

Quirk is considered for ThunderX silicon pass1.x only which is identified
via MCFG revision 2.

ThunderX pass 1.x requires the following accessors:

  NUMA node 0 PCI segments  0- 3: pci_thunder_ecam_ops (MCFG quirk)
  NUMA node 0 PCI segments  4- 9: thunder_pem_ecam_ops (MCFG quirk)
  NUMA node 1 PCI segments 10-13: pci_thunder_ecam_ops (MCFG quirk)
  NUMA node 1 PCI segments 14-19: thunder_pem_ecam_ops (MCFG quirk)

[bhelgaas: change Makefile/ifdefs so quirk doesn't depend on
CONFIG_PCI_HOST_THUNDER_ECAM]
Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/acpi/pci_mcfg.c             | 15 +++++++++++++++
 drivers/pci/host/Kconfig            |  3 ++-
 drivers/pci/host/Makefile           |  2 +-
 drivers/pci/host/pci-thunder-ecam.c |  9 ++++++++-
 include/linux/pci-ecam.h            |  3 ++-
 5 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index 17cbb07ce16c..1cfe65d87adf 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -93,6 +93,21 @@ static struct mcfg_fixup mcfg_quirks[] = {
 	/* SoC pass2.x */
 	THUNDER_PEM_QUIRK(1, 0),
 	THUNDER_PEM_QUIRK(1, 1),
+
+#define THUNDER_ECAM_QUIRK(rev, seg)					\
+	{ "CAVIUM", "THUNDERX", rev, seg, MCFG_BUS_ANY,			\
+	&pci_thunder_ecam_ops }
+	/* SoC pass1.x */
+	THUNDER_PEM_QUIRK(2, 0),	/* off-chip devices */
+	THUNDER_PEM_QUIRK(2, 1),	/* off-chip devices */
+	THUNDER_ECAM_QUIRK(2,  0),
+	THUNDER_ECAM_QUIRK(2,  1),
+	THUNDER_ECAM_QUIRK(2,  2),
+	THUNDER_ECAM_QUIRK(2,  3),
+	THUNDER_ECAM_QUIRK(2, 10),
+	THUNDER_ECAM_QUIRK(2, 11),
+	THUNDER_ECAM_QUIRK(2, 12),
+	THUNDER_ECAM_QUIRK(2, 13),
 };
 
 static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 1239a8e63368..c983892e56a0 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -248,7 +248,8 @@ config PCI_HOST_THUNDER_PEM
 
 config PCI_HOST_THUNDER_ECAM
 	bool "Cavium Thunder ECAM controller to on-chip devices on pass-1.x silicon"
-	depends on OF && ARM64
+	depends on ARM64
+	depends on OF || (ACPI && PCI_QUIRKS)
 	select PCI_HOST_COMMON
 	help
 	  Say Y here if you want ECAM support for CN88XX-Pass-1.x Cavium Thunder SoCs.
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index 97e6bfc55772..639494a55c51 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_PCIE_ALTERA) += pcie-altera.o
 obj-$(CONFIG_PCIE_ALTERA_MSI) += pcie-altera-msi.o
 obj-$(CONFIG_ARM64) += pcie-hisi.o
 obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o
-obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o
+obj-$(CONFIG_ARM64) += pci-thunder-ecam.o
 obj-$(CONFIG_ARM64) += pci-thunder-pem.o
 obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
 obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o
diff --git a/drivers/pci/host/pci-thunder-ecam.c b/drivers/pci/host/pci-thunder-ecam.c
index d50a3dc2d8db..3f54a43bbbea 100644
--- a/drivers/pci/host/pci-thunder-ecam.c
+++ b/drivers/pci/host/pci-thunder-ecam.c
@@ -14,6 +14,8 @@
 #include <linux/pci-ecam.h>
 #include <linux/platform_device.h>
 
+#if defined(CONFIG_PCI_HOST_THUNDER_ECAM) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
+
 static void set_val(u32 v, int where, int size, u32 *val)
 {
 	int shift = (where & 3) * 8;
@@ -346,7 +348,7 @@ static int thunder_ecam_config_write(struct pci_bus *bus, unsigned int devfn,
 	return pci_generic_config_write(bus, devfn, where, size, val);
 }
 
-static struct pci_ecam_ops pci_thunder_ecam_ops = {
+struct pci_ecam_ops pci_thunder_ecam_ops = {
 	.bus_shift	= 20,
 	.pci_ops	= {
 		.map_bus        = pci_ecam_map_bus,
@@ -355,6 +357,8 @@ static struct pci_ecam_ops pci_thunder_ecam_ops = {
 	}
 };
 
+#ifdef CONFIG_PCI_HOST_THUNDER_ECAM
+
 static const struct of_device_id thunder_ecam_of_match[] = {
 	{ .compatible = "cavium,pci-host-thunder-ecam" },
 	{ },
@@ -373,3 +377,6 @@ static struct platform_driver thunder_ecam_driver = {
 	.probe = thunder_ecam_probe,
 };
 builtin_platform_driver(thunder_ecam_driver);
+
+#endif
+#endif
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index e88d7db74200..00eb8eb774e2 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -62,7 +62,8 @@ extern struct pci_ecam_ops pci_generic_ecam_ops;
 #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
 extern struct pci_ecam_ops pci_32b_ops;		/* 32-bit accesses only */
 extern struct pci_ecam_ops hisi_pcie_ops;	/* HiSilicon */
-extern struct pci_ecam_ops thunder_pem_ecam_ops; /* Cavium ThunderX 2.x */
+extern struct pci_ecam_ops thunder_pem_ecam_ops; /* Cavium ThunderX 1.x & 2.x */
+extern struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */
 #endif
 
 #ifdef CONFIG_PCI_HOST_GENERIC
-- 
cgit v1.2.3


From c5d4603961009c39de94725213d8b5420f110f9e Mon Sep 17 00:00:00 2001
From: Duc Dang <dhdang@apm.com>
Date: Thu, 1 Dec 2016 18:27:07 -0800
Subject: PCI: Add MCFG quirks for X-Gene host controller

PCIe controllers in X-Gene SoCs are not ECAM compliant: software needs to
configure additional controller's register to address device at
bus:dev:function.

Add a quirk to discover controller MMIO register space and configure
controller registers to select and address the target secondary device.

The quirk will only be applied for X-Gene PCIe MCFG table with
OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).

Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Duc Dang <dhdang@apm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/acpi/pci_mcfg.c      |  25 +++++++++
 drivers/pci/host/Kconfig     |   4 +-
 drivers/pci/host/Makefile    |   2 +-
 drivers/pci/host/pci-xgene.c | 126 ++++++++++++++++++++++++++++++++++++++++---
 include/linux/pci-ecam.h     |   2 +
 5 files changed, 149 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index 1cfe65d87adf..a6a4ceaa6cc3 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -108,6 +108,31 @@ static struct mcfg_fixup mcfg_quirks[] = {
 	THUNDER_ECAM_QUIRK(2, 11),
 	THUNDER_ECAM_QUIRK(2, 12),
 	THUNDER_ECAM_QUIRK(2, 13),
+
+#define XGENE_V1_ECAM_MCFG(rev, seg) \
+	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
+		&xgene_v1_pcie_ecam_ops }
+#define XGENE_V2_ECAM_MCFG(rev, seg) \
+	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
+		&xgene_v2_pcie_ecam_ops }
+	/* X-Gene SoC with v1 PCIe controller */
+	XGENE_V1_ECAM_MCFG(1, 0),
+	XGENE_V1_ECAM_MCFG(1, 1),
+	XGENE_V1_ECAM_MCFG(1, 2),
+	XGENE_V1_ECAM_MCFG(1, 3),
+	XGENE_V1_ECAM_MCFG(1, 4),
+	XGENE_V1_ECAM_MCFG(2, 0),
+	XGENE_V1_ECAM_MCFG(2, 1),
+	XGENE_V1_ECAM_MCFG(2, 2),
+	XGENE_V1_ECAM_MCFG(2, 3),
+	XGENE_V1_ECAM_MCFG(2, 4),
+	/* X-Gene SoC with v2.1 PCIe controller */
+	XGENE_V2_ECAM_MCFG(3, 0),
+	XGENE_V2_ECAM_MCFG(3, 1),
+	/* X-Gene SoC with v2.2 PCIe controller */
+	XGENE_V2_ECAM_MCFG(4, 0),
+	XGENE_V2_ECAM_MCFG(4, 1),
+	XGENE_V2_ECAM_MCFG(4, 2),
 };
 
 static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index c983892e56a0..1fb55182ccaf 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -133,8 +133,8 @@ config PCIE_XILINX
 
 config PCI_XGENE
 	bool "X-Gene PCIe controller"
-	depends on ARCH_XGENE
-	depends on OF
+	depends on ARM64
+	depends on OF || (ACPI && PCI_QUIRKS)
 	select PCIEPORTBUS
 	help
 	  Say Y here if you want internal PCI support on APM X-Gene SoC.
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index 639494a55c51..6cc84b4f3a4d 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
 obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
 obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
 obj-$(CONFIG_PCIE_XILINX_NWL) += pcie-xilinx-nwl.o
-obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
+obj-$(CONFIG_ARM64) += pci-xgene.o
 obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
 obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
 obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
index 1de23d74783f..7c3b54b9eb17 100644
--- a/drivers/pci/host/pci-xgene.c
+++ b/drivers/pci/host/pci-xgene.c
@@ -27,6 +27,8 @@
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -64,7 +66,9 @@
 /* PCIe IP version */
 #define XGENE_PCIE_IP_VER_UNKN		0
 #define XGENE_PCIE_IP_VER_1		1
+#define XGENE_PCIE_IP_VER_2		2
 
+#if defined(CONFIG_PCI_XGENE) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
 struct xgene_pcie_port {
 	struct device_node	*node;
 	struct device		*dev;
@@ -91,13 +95,24 @@ static inline u32 pcie_bar_low_val(u32 addr, u32 flags)
 	return (addr & PCI_BASE_ADDRESS_MEM_MASK) | flags;
 }
 
+static inline struct xgene_pcie_port *pcie_bus_to_port(struct pci_bus *bus)
+{
+	struct pci_config_window *cfg;
+
+	if (acpi_disabled)
+		return (struct xgene_pcie_port *)(bus->sysdata);
+
+	cfg = bus->sysdata;
+	return (struct xgene_pcie_port *)(cfg->priv);
+}
+
 /*
  * When the address bit [17:16] is 2'b01, the Configuration access will be
  * treated as Type 1 and it will be forwarded to external PCIe device.
  */
 static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
 {
-	struct xgene_pcie_port *port = bus->sysdata;
+	struct xgene_pcie_port *port = pcie_bus_to_port(bus);
 
 	if (bus->number >= (bus->primary + 1))
 		return port->cfg_base + AXI_EP_CFG_ACCESS;
@@ -111,7 +126,7 @@ static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
  */
 static void xgene_pcie_set_rtdid_reg(struct pci_bus *bus, uint devfn)
 {
-	struct xgene_pcie_port *port = bus->sysdata;
+	struct xgene_pcie_port *port = pcie_bus_to_port(bus);
 	unsigned int b, d, f;
 	u32 rtdid_val = 0;
 
@@ -158,7 +173,7 @@ static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
 static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
 				    int where, int size, u32 *val)
 {
-	struct xgene_pcie_port *port = bus->sysdata;
+	struct xgene_pcie_port *port = pcie_bus_to_port(bus);
 
 	if (pci_generic_config_read32(bus, devfn, where & ~0x3, 4, val) !=
 	    PCIBIOS_SUCCESSFUL)
@@ -182,13 +197,103 @@ static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
 
 	return PCIBIOS_SUCCESSFUL;
 }
+#endif
 
-static struct pci_ops xgene_pcie_ops = {
-	.map_bus = xgene_pcie_map_bus,
-	.read = xgene_pcie_config_read32,
-	.write = pci_generic_config_write32,
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+static int xgene_get_csr_resource(struct acpi_device *adev,
+				  struct resource *res)
+{
+	struct device *dev = &adev->dev;
+	struct resource_entry *entry;
+	struct list_head list;
+	unsigned long flags;
+	int ret;
+
+	INIT_LIST_HEAD(&list);
+	flags = IORESOURCE_MEM;
+	ret = acpi_dev_get_resources(adev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *) flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse _CRS method, error code %d\n",
+			ret);
+		return ret;
+	}
+
+	if (ret == 0) {
+		dev_err(dev, "no IO and memory resources present in _CRS\n");
+		return -EINVAL;
+	}
+
+	entry = list_first_entry(&list, struct resource_entry, node);
+	*res = *entry->res;
+	acpi_dev_free_resource_list(&list);
+	return 0;
+}
+
+static int xgene_pcie_ecam_init(struct pci_config_window *cfg, u32 ipversion)
+{
+	struct device *dev = cfg->parent;
+	struct acpi_device *adev = to_acpi_device(dev);
+	struct xgene_pcie_port *port;
+	struct resource csr;
+	int ret;
+
+	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	ret = xgene_get_csr_resource(adev, &csr);
+	if (ret) {
+		dev_err(dev, "can't get CSR resource\n");
+		kfree(port);
+		return ret;
+	}
+	port->csr_base = devm_ioremap_resource(dev, &csr);
+	if (IS_ERR(port->csr_base)) {
+		kfree(port);
+		return -ENOMEM;
+	}
+
+	port->cfg_base = cfg->win;
+	port->version = ipversion;
+
+	cfg->priv = port;
+	return 0;
+}
+
+static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
+{
+	return xgene_pcie_ecam_init(cfg, XGENE_PCIE_IP_VER_1);
+}
+
+struct pci_ecam_ops xgene_v1_pcie_ecam_ops = {
+	.bus_shift      = 16,
+	.init           = xgene_v1_pcie_ecam_init,
+	.pci_ops        = {
+		.map_bus        = xgene_pcie_map_bus,
+		.read           = xgene_pcie_config_read32,
+		.write          = pci_generic_config_write,
+	}
+};
+
+static int xgene_v2_pcie_ecam_init(struct pci_config_window *cfg)
+{
+	return xgene_pcie_ecam_init(cfg, XGENE_PCIE_IP_VER_2);
+}
+
+struct pci_ecam_ops xgene_v2_pcie_ecam_ops = {
+	.bus_shift      = 16,
+	.init           = xgene_v2_pcie_ecam_init,
+	.pci_ops        = {
+		.map_bus        = xgene_pcie_map_bus,
+		.read           = xgene_pcie_config_read32,
+		.write          = pci_generic_config_write,
+	}
 };
+#endif
 
+#if defined(CONFIG_PCI_XGENE)
 static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr,
 				  u32 flags, u64 size)
 {
@@ -521,6 +626,12 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port,
 	return 0;
 }
 
+static struct pci_ops xgene_pcie_ops = {
+	.map_bus = xgene_pcie_map_bus,
+	.read = xgene_pcie_config_read32,
+	.write = pci_generic_config_write32,
+};
+
 static int xgene_pcie_probe_bridge(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -591,3 +702,4 @@ static struct platform_driver xgene_pcie_driver = {
 	.probe = xgene_pcie_probe_bridge,
 };
 builtin_platform_driver(xgene_pcie_driver);
+#endif
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index 00eb8eb774e2..f0d2b9451270 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -64,6 +64,8 @@ extern struct pci_ecam_ops pci_32b_ops;		/* 32-bit accesses only */
 extern struct pci_ecam_ops hisi_pcie_ops;	/* HiSilicon */
 extern struct pci_ecam_ops thunder_pem_ecam_ops; /* Cavium ThunderX 1.x & 2.x */
 extern struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */
+extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */
+extern struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */
 #endif
 
 #ifdef CONFIG_PCI_HOST_GENERIC
-- 
cgit v1.2.3


From 0aa8c57a04907a5d02068ff9f917629be97ea78d Mon Sep 17 00:00:00 2001
From: Aaron Conole <aconole@bytheb.org>
Date: Tue, 15 Nov 2016 17:48:44 -0500
Subject: netfilter: introduce accessor functions for hook entries

This allows easier future refactoring.

Signed-off-by: Aaron Conole <aconole@bytheb.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h       | 27 +++++++++++++++++++++++++++
 net/bridge/br_netfilter_hooks.c |  2 +-
 net/netfilter/core.c            | 10 ++++------
 net/netfilter/nf_queue.c        |  5 ++---
 4 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 69230140215b..575aa198097e 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -79,6 +79,33 @@ struct nf_hook_entry {
 	const struct nf_hook_ops	*orig_ops;
 };
 
+static inline void
+nf_hook_entry_init(struct nf_hook_entry *entry,	const struct nf_hook_ops *ops)
+{
+	entry->next = NULL;
+	entry->ops = *ops;
+	entry->orig_ops = ops;
+}
+
+static inline int
+nf_hook_entry_priority(const struct nf_hook_entry *entry)
+{
+	return entry->ops.priority;
+}
+
+static inline int
+nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb,
+		     struct nf_hook_state *state)
+{
+	return entry->ops.hook(entry->ops.priv, skb, state);
+}
+
+static inline const struct nf_hook_ops *
+nf_hook_entry_ops(const struct nf_hook_entry *entry)
+{
+	return entry->orig_ops;
+}
+
 static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      unsigned int hook,
 				      u_int8_t pf,
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 83d937f4415e..adad2eed29e6 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1010,7 +1010,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 
 	elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
 
-	while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF))
+	while (elem && (nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF))
 		elem = rcu_dereference(elem->next);
 
 	if (!elem)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index de30e08d58f2..2bb46e2d8d30 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -102,15 +102,13 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 	if (!entry)
 		return -ENOMEM;
 
-	entry->orig_ops	= reg;
-	entry->ops	= *reg;
-	entry->next	= NULL;
+	nf_hook_entry_init(entry, reg);
 
 	mutex_lock(&nf_hook_mutex);
 
 	/* Find the spot in the list */
 	while ((p = nf_entry_dereference(*pp)) != NULL) {
-		if (reg->priority < p->orig_ops->priority)
+		if (reg->priority < nf_hook_entry_priority(p))
 			break;
 		pp = &p->next;
 	}
@@ -140,7 +138,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 
 	mutex_lock(&nf_hook_mutex);
 	while ((p = nf_entry_dereference(*pp)) != NULL) {
-		if (p->orig_ops == reg) {
+		if (nf_hook_entry_ops(p) == reg) {
 			rcu_assign_pointer(*pp, p->next);
 			break;
 		}
@@ -311,7 +309,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 	int ret;
 
 	do {
-		verdict = entry->ops.hook(entry->ops.priv, skb, state);
+		verdict = nf_hook_entry_hookfn(entry, skb, state);
 		switch (verdict & NF_VERDICT_MASK) {
 		case NF_ACCEPT:
 			entry = rcu_dereference(entry->next);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 77cba9f6ccb6..4a7662486f44 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -185,7 +185,7 @@ static unsigned int nf_iterate(struct sk_buff *skb,
 
 	do {
 repeat:
-		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
+		verdict = nf_hook_entry_hookfn((*entryp), skb, state);
 		if (verdict != NF_ACCEPT) {
 			if (verdict != NF_REPEAT)
 				return verdict;
@@ -200,7 +200,6 @@ repeat:
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
 	struct nf_hook_entry *hook_entry = entry->hook;
-	struct nf_hook_ops *elem = &hook_entry->ops;
 	struct sk_buff *skb = entry->skb;
 	const struct nf_afinfo *afinfo;
 	int err;
@@ -209,7 +208,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 
 	/* Continue traversal iff userspace said ok... */
 	if (verdict == NF_REPEAT)
-		verdict = elem->hook(elem->priv, skb, &entry->state);
+		verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
 
 	if (verdict == NF_ACCEPT) {
 		afinfo = nf_get_afinfo(entry->state.pf);
-- 
cgit v1.2.3


From d415b9eb76fc55c03ef5451691170aa5771dcea3 Mon Sep 17 00:00:00 2001
From: Aaron Conole <aconole@redhat.com>
Date: Tue, 15 Nov 2016 17:48:45 -0500
Subject: netfilter: decouple nf_hook_entry and nf_hook_ops

During nfhook traversal we only need a very small subset of
nf_hook_ops members.

We need:
- next element
- hook function to call
- hook function priv argument

Bridge netfilter also needs 'thresh'; can be obtained via ->orig_ops.

nf_hook_entry struct is now 32 bytes on x86_64.

A followup patch will turn the run-time list into an array that only
stores hook functions plus their priv arguments, eliminating the ->next
element.

Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Aaron Conole <aconole@bytheb.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 575aa198097e..a4b97be30b28 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -75,7 +75,8 @@ struct nf_hook_ops {
 
 struct nf_hook_entry {
 	struct nf_hook_entry __rcu	*next;
-	struct nf_hook_ops		ops;
+	nf_hookfn			*hook;
+	void				*priv;
 	const struct nf_hook_ops	*orig_ops;
 };
 
@@ -83,21 +84,22 @@ static inline void
 nf_hook_entry_init(struct nf_hook_entry *entry,	const struct nf_hook_ops *ops)
 {
 	entry->next = NULL;
-	entry->ops = *ops;
+	entry->hook = ops->hook;
+	entry->priv = ops->priv;
 	entry->orig_ops = ops;
 }
 
 static inline int
 nf_hook_entry_priority(const struct nf_hook_entry *entry)
 {
-	return entry->ops.priority;
+	return entry->orig_ops->priority;
 }
 
 static inline int
 nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb,
 		     struct nf_hook_state *state)
 {
-	return entry->ops.hook(entry->ops.priv, skb, state);
+	return entry->hook(entry->priv, skb, state);
 }
 
 static inline const struct nf_hook_ops *
-- 
cgit v1.2.3


From 4d31eef5176df06f218201bc9c0ce40babb41660 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 22 Nov 2016 14:44:17 +0100
Subject: netfilter: x_tables: pass xt_counters struct instead of packet
 counter

On SMP we overload the packet counter (unsigned long) to contain
percpu offset.  Hide this from callers and pass xt_counters address
instead.

Preparation patch to allocate the percpu counters in page-sized batch
chunks.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 6 +-----
 net/ipv4/netfilter/arp_tables.c    | 4 ++--
 net/ipv4/netfilter/ip_tables.c     | 4 ++--
 net/ipv6/netfilter/ip6_tables.c    | 5 ++---
 net/netfilter/x_tables.c           | 9 +++++++++
 5 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index cd4eaf8df445..6e61edeb68e3 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -430,11 +430,7 @@ static inline unsigned long xt_percpu_counter_alloc(void)
 
 	return 0;
 }
-static inline void xt_percpu_counter_free(u64 pcnt)
-{
-	if (nr_cpu_ids > 1)
-		free_percpu((void __percpu *) (unsigned long) pcnt);
-}
+void xt_percpu_counter_free(struct xt_counters *cnt);
 
 static inline struct xt_counters *
 xt_get_this_cpu_counter(struct xt_counters *cnt)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 848a0704b28f..019f8e8dda6d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -439,7 +439,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
 err:
 	module_put(t->u.kernel.target->me);
 out:
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 
 	return ret;
 }
@@ -519,7 +519,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 }
 
 /* Checks and translates the user-supplied table segment (held in
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 46815c8a60d7..acc9a0c45bdf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -582,7 +582,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 		cleanup_match(ematch, net);
 	}
 
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 
 	return ret;
 }
@@ -670,7 +670,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 }
 
 /* Checks and translates the user-supplied table segment (held in
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6ff42b8301cc..88b56a98905b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -612,7 +612,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 		cleanup_match(ematch, net);
 	}
 
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 
 	return ret;
 }
@@ -699,8 +699,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
-
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 }
 
 /* Checks and translates the user-supplied table segment (held in
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ad818e52859b..0580029eb0ee 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1615,6 +1615,15 @@ void xt_proto_fini(struct net *net, u_int8_t af)
 }
 EXPORT_SYMBOL_GPL(xt_proto_fini);
 
+void xt_percpu_counter_free(struct xt_counters *counters)
+{
+	unsigned long pcnt = counters->pcnt;
+
+	if (nr_cpu_ids > 1)
+		free_percpu((void __percpu *)pcnt);
+}
+EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
+
 static int __net_init xt_net_init(struct net *net)
 {
 	int i;
-- 
cgit v1.2.3


From f28e15bacedd444608e25421c72eb2cf4527c9ca Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 22 Nov 2016 14:44:18 +0100
Subject: netfilter: x_tables: pass xt_counters struct to counter allocator

Keeps some noise away from a followup patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 27 +--------------------------
 net/ipv4/netfilter/arp_tables.c    |  5 +----
 net/ipv4/netfilter/ip_tables.c     |  5 +----
 net/ipv6/netfilter/ip6_tables.c    |  5 +----
 net/netfilter/x_tables.c           | 30 ++++++++++++++++++++++++++++++
 5 files changed, 34 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 6e61edeb68e3..05a94bd32c55 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -404,32 +404,7 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 }
 
 
-/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
- * real (percpu) counter.  On !SMP, its just the packet count,
- * so nothing needs to be done there.
- *
- * xt_percpu_counter_alloc returns the address of the percpu
- * counter, or 0 on !SMP. We force an alignment of 16 bytes
- * so that bytes/packets share a common cache line.
- *
- * Hence caller must use IS_ERR_VALUE to check for error, this
- * allows us to return 0 for single core systems without forcing
- * callers to deal with SMP vs. NONSMP issues.
- */
-static inline unsigned long xt_percpu_counter_alloc(void)
-{
-	if (nr_cpu_ids > 1) {
-		void __percpu *res = __alloc_percpu(sizeof(struct xt_counters),
-						    sizeof(struct xt_counters));
-
-		if (res == NULL)
-			return -ENOMEM;
-
-		return (__force unsigned long) res;
-	}
-
-	return 0;
-}
+bool xt_percpu_counter_alloc(struct xt_counters *counters);
 void xt_percpu_counter_free(struct xt_counters *cnt);
 
 static inline struct xt_counters *
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 019f8e8dda6d..808deb275ceb 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -415,13 +415,10 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
 {
 	struct xt_entry_target *t;
 	struct xt_target *target;
-	unsigned long pcnt;
 	int ret;
 
-	pcnt = xt_percpu_counter_alloc();
-	if (IS_ERR_VALUE(pcnt))
+	if (!xt_percpu_counter_alloc(&e->counters))
 		return -ENOMEM;
-	e->counters.pcnt = pcnt;
 
 	t = arpt_get_target(e);
 	target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index acc9a0c45bdf..a48430d3420f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -539,12 +539,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	unsigned int j;
 	struct xt_mtchk_param mtpar;
 	struct xt_entry_match *ematch;
-	unsigned long pcnt;
 
-	pcnt = xt_percpu_counter_alloc();
-	if (IS_ERR_VALUE(pcnt))
+	if (!xt_percpu_counter_alloc(&e->counters))
 		return -ENOMEM;
-	e->counters.pcnt = pcnt;
 
 	j = 0;
 	mtpar.net	= net;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 88b56a98905b..a5a92083fd62 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -570,12 +570,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	unsigned int j;
 	struct xt_mtchk_param mtpar;
 	struct xt_entry_match *ematch;
-	unsigned long pcnt;
 
-	pcnt = xt_percpu_counter_alloc();
-	if (IS_ERR_VALUE(pcnt))
+	if (!xt_percpu_counter_alloc(&e->counters))
 		return -ENOMEM;
-	e->counters.pcnt = pcnt;
 
 	j = 0;
 	mtpar.net	= net;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 0580029eb0ee..be5e83047594 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1615,6 +1615,36 @@ void xt_proto_fini(struct net *net, u_int8_t af)
 }
 EXPORT_SYMBOL_GPL(xt_proto_fini);
 
+/**
+ * xt_percpu_counter_alloc - allocate x_tables rule counter
+ *
+ * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
+ *
+ * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
+ * contain the address of the real (percpu) counter.
+ *
+ * Rule evaluation needs to use xt_get_this_cpu_counter() helper
+ * to fetch the real percpu counter.
+ *
+ * returns false on error.
+ */
+bool xt_percpu_counter_alloc(struct xt_counters *counter)
+{
+	void __percpu *res;
+
+	if (nr_cpu_ids <= 1)
+		return true;
+
+	res = __alloc_percpu(sizeof(struct xt_counters),
+			     sizeof(struct xt_counters));
+	if (!res)
+		return false;
+
+	counter->pcnt = (__force unsigned long)res;
+	return true;
+}
+EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
+
 void xt_percpu_counter_free(struct xt_counters *counters)
 {
 	unsigned long pcnt = counters->pcnt;
-- 
cgit v1.2.3


From ae0ac0ed6fcf5af3be0f63eb935f483f44a402d2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 22 Nov 2016 14:44:19 +0100
Subject: netfilter: x_tables: pack percpu counter allocations

instead of allocating each xt_counter individually, allocate 4k chunks
and then use these for counter allocation requests.

This should speed up rule evaluation by increasing data locality,
also speeds up ruleset loading because we reduce calls to the percpu
allocator.

As Eric points out we can't use PAGE_SIZE, page_allocator would fail on
arches with 64k page size.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h |  7 ++++++-
 net/ipv4/netfilter/arp_tables.c    |  9 ++++++---
 net/ipv4/netfilter/ip_tables.c     |  9 ++++++---
 net/ipv6/netfilter/ip6_tables.c    |  9 ++++++---
 net/netfilter/x_tables.c           | 33 ++++++++++++++++++++++++---------
 5 files changed, 48 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 05a94bd32c55..5117e4d2ddfa 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -403,8 +403,13 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 	return ret;
 }
 
+struct xt_percpu_counter_alloc_state {
+	unsigned int off;
+	const char __percpu *mem;
+};
 
-bool xt_percpu_counter_alloc(struct xt_counters *counters);
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+			     struct xt_counters *counter);
 void xt_percpu_counter_free(struct xt_counters *cnt);
 
 static inline struct xt_counters *
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 808deb275ceb..1258a9ab62ef 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -411,13 +411,14 @@ static inline int check_target(struct arpt_entry *e, const char *name)
 }
 
 static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+		 struct xt_percpu_counter_alloc_state *alloc_state)
 {
 	struct xt_entry_target *t;
 	struct xt_target *target;
 	int ret;
 
-	if (!xt_percpu_counter_alloc(&e->counters))
+	if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
 		return -ENOMEM;
 
 	t = arpt_get_target(e);
@@ -525,6 +526,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
 static int translate_table(struct xt_table_info *newinfo, void *entry0,
 			   const struct arpt_replace *repl)
 {
+	struct xt_percpu_counter_alloc_state alloc_state = { 0 };
 	struct arpt_entry *iter;
 	unsigned int *offsets;
 	unsigned int i;
@@ -587,7 +589,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, repl->name, repl->size);
+		ret = find_check_entry(iter, repl->name, repl->size,
+				       &alloc_state);
 		if (ret != 0)
 			break;
 		++i;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a48430d3420f..308b456723f0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -531,7 +531,8 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 
 static int
 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
-		 unsigned int size)
+		 unsigned int size,
+		 struct xt_percpu_counter_alloc_state *alloc_state)
 {
 	struct xt_entry_target *t;
 	struct xt_target *target;
@@ -540,7 +541,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	struct xt_mtchk_param mtpar;
 	struct xt_entry_match *ematch;
 
-	if (!xt_percpu_counter_alloc(&e->counters))
+	if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
 		return -ENOMEM;
 
 	j = 0;
@@ -676,6 +677,7 @@ static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		const struct ipt_replace *repl)
 {
+	struct xt_percpu_counter_alloc_state alloc_state = { 0 };
 	struct ipt_entry *iter;
 	unsigned int *offsets;
 	unsigned int i;
@@ -735,7 +737,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, net, repl->name, repl->size);
+		ret = find_check_entry(iter, net, repl->name, repl->size,
+				       &alloc_state);
 		if (ret != 0)
 			break;
 		++i;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index a5a92083fd62..d56d8ac09a94 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -562,7 +562,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
 
 static int
 find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
-		 unsigned int size)
+		 unsigned int size,
+		 struct xt_percpu_counter_alloc_state *alloc_state)
 {
 	struct xt_entry_target *t;
 	struct xt_target *target;
@@ -571,7 +572,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	struct xt_mtchk_param mtpar;
 	struct xt_entry_match *ematch;
 
-	if (!xt_percpu_counter_alloc(&e->counters))
+	if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
 		return -ENOMEM;
 
 	j = 0;
@@ -705,6 +706,7 @@ static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		const struct ip6t_replace *repl)
 {
+	struct xt_percpu_counter_alloc_state alloc_state = { 0 };
 	struct ip6t_entry *iter;
 	unsigned int *offsets;
 	unsigned int i;
@@ -764,7 +766,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, net, repl->name, repl->size);
+		ret = find_check_entry(iter, net, repl->name, repl->size,
+				       &alloc_state);
 		if (ret != 0)
 			break;
 		++i;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index be5e83047594..f6ce4a7036e6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 
 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+#define XT_PCPU_BLOCK_SIZE 4096
 
 struct compat_delta {
 	unsigned int offset; /* offset in kernel */
@@ -1618,6 +1619,7 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
 /**
  * xt_percpu_counter_alloc - allocate x_tables rule counter
  *
+ * @state: pointer to xt_percpu allocation state
  * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
  *
  * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
@@ -1626,21 +1628,34 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
  * Rule evaluation needs to use xt_get_this_cpu_counter() helper
  * to fetch the real percpu counter.
  *
+ * To speed up allocation and improve data locality, a 4kb block is
+ * allocated.
+ *
+ * xt_percpu_counter_alloc_state contains the base address of the
+ * allocated page and the current sub-offset.
+ *
  * returns false on error.
  */
-bool xt_percpu_counter_alloc(struct xt_counters *counter)
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+			     struct xt_counters *counter)
 {
-	void __percpu *res;
+	BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
 
 	if (nr_cpu_ids <= 1)
 		return true;
 
-	res = __alloc_percpu(sizeof(struct xt_counters),
-			     sizeof(struct xt_counters));
-	if (!res)
-		return false;
-
-	counter->pcnt = (__force unsigned long)res;
+	if (!state->mem) {
+		state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
+					    XT_PCPU_BLOCK_SIZE);
+		if (!state->mem)
+			return false;
+	}
+	counter->pcnt = (__force unsigned long)(state->mem + state->off);
+	state->off += sizeof(*counter);
+	if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
+		state->mem = NULL;
+		state->off = 0;
+	}
 	return true;
 }
 EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
@@ -1649,7 +1664,7 @@ void xt_percpu_counter_free(struct xt_counters *counters)
 {
 	unsigned long pcnt = counters->pcnt;
 
-	if (nr_cpu_ids > 1)
+	if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
 		free_percpu((void __percpu *)pcnt);
 }
 EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
-- 
cgit v1.2.3


From df122f58b834b24c27d7e2ac02a4910d3e56f6ae Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 28 Nov 2016 11:40:05 +0100
Subject: netfilter: ingress: translate 0 nf_hook_slow retval to -1

The caller assumes that < 0 means that skb was stolen (or free'd).

All other return values continue skb processing.

nf_hook_slow returns 3 different return value types:

A) a (negative) errno value: the skb was dropped (NF_DROP, e.g.
by iptables '-j DROP' rule).

B) 0. The skb was stolen by the hook or queued to userspace.

C) 1. all hooks returned NF_ACCEPT so the caller should invoke
   the okfn so packet processing can continue.

nft ingress facility currently doesn't have the 'okfn' that
the NF_HOOK() macros use; there is no nfqueue support either.

So 1 means that nf_hook_ingress() caller should go on processing the skb.

In order to allow use of NF_STOLEN from ingress we need to translate
this to an errno number, else we'd crash because we continue with
already-free'd (or about to be free-d) skb.

The errno value isn't checked, its just important that its less than 0,
so return -1.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ingress.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 2dc3b49b804a..59476061de86 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -19,6 +19,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 {
 	struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress);
 	struct nf_hook_state state;
+	int ret;
 
 	/* Must recheck the ingress hook head, in the event it became NULL
 	 * after the check in nf_hook_ingress_active evaluated to true.
@@ -29,7 +30,11 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	nf_hook_state_init(&state, NF_NETDEV_INGRESS,
 			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
-	return nf_hook_slow(skb, &state, e);
+	ret = nf_hook_slow(skb, &state, e);
+	if (ret == 0)
+		return -1;
+
+	return ret;
 }
 
 static inline void nf_hook_ingress_init(struct net_device *dev)
-- 
cgit v1.2.3


From 562b488443f658151abc9732e1a9762e27c694a0 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 25 Nov 2016 10:53:39 +0100
Subject: gpio: pl061: move platform data into driver

No boardfile defines any PL061 platform data anymore: the
Integrator IM/PD-1 includes the file but is not making use
of the struct. Let's delete the include and all references,
then move the platform data into the driver for later
consolidation into the driver state container.

The only resource defined by the IM/PD-1 is the IRQ which
is passed through the AMBA PrimeCell bus abstraction
struct amba_device.

Cc: arm@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-integrator/impd1.c |  1 -
 drivers/gpio/gpio-pl061.c        | 14 +++++++++++++-
 include/linux/amba/pl061.h       | 16 ----------------
 3 files changed, 13 insertions(+), 18 deletions(-)
 delete mode 100644 include/linux/amba/pl061.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c
index ed9a01484030..a109f6482413 100644
--- a/arch/arm/mach-integrator/impd1.c
+++ b/arch/arm/mach-integrator/impd1.c
@@ -21,7 +21,6 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
 #include <linux/amba/mmci.h>
-#include <linux/amba/pl061.h>
 #include <linux/io.h>
 #include <linux/platform_data/clk-integrator.h>
 #include <linux/slab.h>
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index 47f397236417..cbcc631181e0 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -23,7 +23,6 @@
 #include <linux/gpio.h>
 #include <linux/device.h>
 #include <linux/amba/bus.h>
-#include <linux/amba/pl061.h>
 #include <linux/slab.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm.h>
@@ -39,6 +38,19 @@
 
 #define PL061_GPIO_NR	8
 
+struct pl061_platform_data {
+	/* number of the first GPIO */
+	unsigned	gpio_base;
+
+	/* number of the first IRQ.
+	 * If the IRQ functionality in not desired this must be set to 0.
+	 */
+	unsigned	irq_base;
+
+	u8		directions;	/* startup directions, 1: out, 0: in */
+	u8		values;		/* startup values */
+};
+
 #ifdef CONFIG_PM
 struct pl061_context_save_regs {
 	u8 gpio_data;
diff --git a/include/linux/amba/pl061.h b/include/linux/amba/pl061.h
deleted file mode 100644
index fb83c0453489..000000000000
--- a/include/linux/amba/pl061.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <linux/types.h>
-
-/* platform data for the PL061 GPIO driver */
-
-struct pl061_platform_data {
-	/* number of the first GPIO */
-	unsigned	gpio_base;
-
-	/* number of the first IRQ.
-	 * If the IRQ functionality in not desired this must be set to 0.
-	 */
-	unsigned	irq_base;
-
-	u8		directions;	/* startup directions, 1: out, 0: in */
-	u8		values;		/* startup values */
-};
-- 
cgit v1.2.3


From 37d6a0a6f4700ad3ae7bbf8db38b4557e97b3fe4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 25 Nov 2016 11:57:09 +0100
Subject: PCI: Add pci_register_host_bridge() interface

Make the existing pci_host_bridge structure a proper device that is usable
by PCI host drivers in a more standard way. In addition to the existing
pci_scan_bus(), pci_scan_root_bus(), pci_scan_root_bus_msi(), and
pci_create_root_bus() interfaces, this unfortunately means having to add
yet another interface doing basically the same thing, and add some extra
code in the initial step.

However, this time it's more likely to be extensible enough that we won't
have to do another one again in the future, and we should be able to reduce
code much more as a result.

The main idea is to pull the allocation of 'struct pci_host_bridge' out of
the registration, and let individual host drivers and architecture code
fill the members before calling the registration function.

There are a number of things we can do based on this:

* Use a single memory allocation for the driver-specific structure
  and the generic PCI host bridge
* consolidate the contents of driver-specific structures by moving
  them into pci_host_bridge
* Add a consistent interface for removing a PCI host bridge again
  when unloading a host driver module
* Replace the architecture specific __weak pcibios_*() functions with
  callbacks in a pci_host_bridge device
* Move common boilerplate code from host drivers into the generic
  function, based on contents of the structure
* Extend pci_host_bridge with additional members when needed without
  having to add arguments to pci_scan_*().
* Move members of struct pci_bus into pci_host_bridge to avoid
  having lots of identical copies.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <helgaas@kernel.org>
---
 drivers/pci/probe.c | 238 +++++++++++++++++++++++++++++++---------------------
 include/linux/pci.h |   4 +
 2 files changed, 145 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ab002671fa60..4853c8fbd701 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -521,7 +521,7 @@ static void pci_release_host_bridge_dev(struct device *dev)
 	kfree(bridge);
 }
 
-static struct pci_host_bridge *pci_alloc_host_bridge(struct pci_bus *b)
+static struct pci_host_bridge *pci_alloc_host_bridge(void)
 {
 	struct pci_host_bridge *bridge;
 
@@ -530,7 +530,7 @@ static struct pci_host_bridge *pci_alloc_host_bridge(struct pci_bus *b)
 		return NULL;
 
 	INIT_LIST_HEAD(&bridge->windows);
-	bridge->bus = b;
+
 	return bridge;
 }
 
@@ -717,6 +717,122 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus)
 	dev_set_msi_domain(&bus->dev, d);
 }
 
+static int pci_register_host_bridge(struct pci_host_bridge *bridge)
+{
+	struct device *parent = bridge->dev.parent;
+	struct resource_entry *window, *n;
+	struct pci_bus *bus, *b;
+	resource_size_t offset;
+	LIST_HEAD(resources);
+	struct resource *res;
+	char addr[64], *fmt;
+	const char *name;
+	int err;
+
+	bus = pci_alloc_bus(NULL);
+	if (!bus)
+		return -ENOMEM;
+
+	bridge->bus = bus;
+
+	/* temporarily move resources off the list */
+	list_splice_init(&bridge->windows, &resources);
+	bus->sysdata = bridge->sysdata;
+	bus->msi = bridge->msi;
+	bus->ops = bridge->ops;
+	bus->number = bus->busn_res.start = bridge->busnr;
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
+#endif
+
+	b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
+	if (b) {
+		/* If we already got to this bus through a different bridge, ignore it */
+		dev_dbg(&b->dev, "bus already known\n");
+		err = -EEXIST;
+		goto free;
+	}
+
+	dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(bus),
+		     bridge->busnr);
+
+	err = pcibios_root_bridge_prepare(bridge);
+	if (err)
+		goto free;
+
+	err = device_register(&bridge->dev);
+	if (err)
+		put_device(&bridge->dev);
+
+	bus->bridge = get_device(&bridge->dev);
+	device_enable_async_suspend(bus->bridge);
+	pci_set_bus_of_node(bus);
+	pci_set_bus_msi_domain(bus);
+
+	if (!parent)
+		set_dev_node(bus->bridge, pcibus_to_node(bus));
+
+	bus->dev.class = &pcibus_class;
+	bus->dev.parent = bus->bridge;
+
+	dev_set_name(&bus->dev, "%04x:%02x", pci_domain_nr(bus), bus->number);
+	name = dev_name(&bus->dev);
+
+	err = device_register(&bus->dev);
+	if (err)
+		goto unregister;
+
+	pcibios_add_bus(bus);
+
+	/* Create legacy_io and legacy_mem files for this bus */
+	pci_create_legacy_files(bus);
+
+	if (parent)
+		dev_info(parent, "PCI host bridge to bus %s\n", name);
+	else
+		pr_info("PCI host bridge to bus %s\n", name);
+
+	/* Add initial resources to the bus */
+	resource_list_for_each_entry_safe(window, n, &resources) {
+		list_move_tail(&window->node, &bridge->windows);
+		offset = window->offset;
+		res = window->res;
+
+		if (res->flags & IORESOURCE_BUS)
+			pci_bus_insert_busn_res(bus, bus->number, res->end);
+		else
+			pci_bus_add_resource(bus, res, 0);
+
+		if (offset) {
+			if (resource_type(res) == IORESOURCE_IO)
+				fmt = " (bus address [%#06llx-%#06llx])";
+			else
+				fmt = " (bus address [%#010llx-%#010llx])";
+
+			snprintf(addr, sizeof(addr), fmt,
+				 (unsigned long long)(res->start - offset),
+				 (unsigned long long)(res->end - offset));
+		} else
+			addr[0] = '\0';
+
+		dev_info(&bus->dev, "root bus resource %pR%s\n", res, addr);
+	}
+
+	down_write(&pci_bus_sem);
+	list_add_tail(&bus->node, &pci_root_buses);
+	up_write(&pci_bus_sem);
+
+	return 0;
+
+unregister:
+	put_device(&bridge->dev);
+	device_unregister(&bridge->dev);
+
+free:
+	kfree(bus);
+	return err;
+}
+
 static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 					   struct pci_dev *bridge, int busnr)
 {
@@ -2130,113 +2246,43 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
 {
 }
 
-struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
-		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+static struct pci_bus *pci_create_root_bus_msi(struct device *parent,
+		int bus, struct pci_ops *ops, void *sysdata,
+		struct list_head *resources, struct msi_controller *msi)
 {
 	int error;
 	struct pci_host_bridge *bridge;
-	struct pci_bus *b, *b2;
-	struct resource_entry *window, *n;
-	struct resource *res;
-	resource_size_t offset;
-	char bus_addr[64];
-	char *fmt;
-
-	b = pci_alloc_bus(NULL);
-	if (!b)
-		return NULL;
 
-	b->sysdata = sysdata;
-	b->ops = ops;
-	b->number = b->busn_res.start = bus;
-#ifdef CONFIG_PCI_DOMAINS_GENERIC
-	b->domain_nr = pci_bus_find_domain_nr(b, parent);
-#endif
-	b2 = pci_find_bus(pci_domain_nr(b), bus);
-	if (b2) {
-		/* If we already got to this bus through a different bridge, ignore it */
-		dev_dbg(&b2->dev, "bus already known\n");
-		goto err_out;
-	}
-
-	bridge = pci_alloc_host_bridge(b);
+	bridge = pci_alloc_host_bridge();
 	if (!bridge)
-		goto err_out;
+		return NULL;
 
 	bridge->dev.parent = parent;
 	bridge->dev.release = pci_release_host_bridge_dev;
-	dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus);
-	error = pcibios_root_bridge_prepare(bridge);
-	if (error) {
-		kfree(bridge);
-		goto err_out;
-	}
-
-	error = device_register(&bridge->dev);
-	if (error) {
-		put_device(&bridge->dev);
-		goto err_out;
-	}
-	b->bridge = get_device(&bridge->dev);
-	device_enable_async_suspend(b->bridge);
-	pci_set_bus_of_node(b);
-	pci_set_bus_msi_domain(b);
 
-	if (!parent)
-		set_dev_node(b->bridge, pcibus_to_node(b));
-
-	b->dev.class = &pcibus_class;
-	b->dev.parent = b->bridge;
-	dev_set_name(&b->dev, "%04x:%02x", pci_domain_nr(b), bus);
-	error = device_register(&b->dev);
-	if (error)
-		goto class_dev_reg_err;
+	list_splice_init(resources, &bridge->windows);
+	bridge->sysdata = sysdata;
+	bridge->busnr = bus;
+	bridge->ops = ops;
+	bridge->msi = msi;
 
-	pcibios_add_bus(b);
-
-	/* Create legacy_io and legacy_mem files for this bus */
-	pci_create_legacy_files(b);
-
-	if (parent)
-		dev_info(parent, "PCI host bridge to bus %s\n", dev_name(&b->dev));
-	else
-		printk(KERN_INFO "PCI host bridge to bus %s\n", dev_name(&b->dev));
-
-	/* Add initial resources to the bus */
-	resource_list_for_each_entry_safe(window, n, resources) {
-		list_move_tail(&window->node, &bridge->windows);
-		res = window->res;
-		offset = window->offset;
-		if (res->flags & IORESOURCE_BUS)
-			pci_bus_insert_busn_res(b, bus, res->end);
-		else
-			pci_bus_add_resource(b, res, 0);
-		if (offset) {
-			if (resource_type(res) == IORESOURCE_IO)
-				fmt = " (bus address [%#06llx-%#06llx])";
-			else
-				fmt = " (bus address [%#010llx-%#010llx])";
-			snprintf(bus_addr, sizeof(bus_addr), fmt,
-				 (unsigned long long) (res->start - offset),
-				 (unsigned long long) (res->end - offset));
-		} else
-			bus_addr[0] = '\0';
-		dev_info(&b->dev, "root bus resource %pR%s\n", res, bus_addr);
-	}
+	error = pci_register_host_bridge(bridge);
+	if (error < 0)
+		goto err_out;
 
-	down_write(&pci_bus_sem);
-	list_add_tail(&b->node, &pci_root_buses);
-	up_write(&pci_bus_sem);
+	return bridge->bus;
 
-	return b;
-
-class_dev_reg_err:
-	put_device(&bridge->dev);
-	device_unregister(&bridge->dev);
 err_out:
-	kfree(b);
+	kfree(bridge);
 	return NULL;
 }
+
+struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+{
+	return pci_create_root_bus_msi(parent, bus, ops, sysdata, resources,
+				       NULL);
+}
 EXPORT_SYMBOL_GPL(pci_create_root_bus);
 
 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max)
@@ -2317,12 +2363,10 @@ struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus,
 			break;
 		}
 
-	b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
+	b = pci_create_root_bus_msi(parent, bus, ops, sysdata, resources, msi);
 	if (!b)
 		return NULL;
 
-	b->msi = msi;
-
 	if (!found) {
 		dev_info(&b->dev,
 		 "No busn resource found for root bus, will use [bus %02x-ff]\n",
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e49f70dbd9b..f79634612fbf 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -420,9 +420,13 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
 struct pci_host_bridge {
 	struct device dev;
 	struct pci_bus *bus;		/* root bus */
+	struct pci_ops *ops;
+	void *sysdata;
+	int busnr;
 	struct list_head windows;	/* resource_entry */
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
+	struct msi_controller *msi;
 	unsigned int ignore_reset_delay:1;	/* for entire hierarchy */
 	/* Resource alignment requirements */
 	resource_size_t (*align_resource)(struct pci_dev *dev,
-- 
cgit v1.2.3


From 5909406598d9fab58be860b72dff9409bff11653 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 25 Nov 2016 11:57:10 +0100
Subject: PCI: Allow driver-specific data in host bridge

Provide a way to allocate driver-specific data along with a PCI host bridge
structure. The bridge's ->private field points to this data.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <helgaas@kernel.org>
---
 drivers/pci/probe.c |  6 +++---
 include/linux/pci.h | 11 +++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 4853c8fbd701..cf9cb6f30782 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -521,11 +521,11 @@ static void pci_release_host_bridge_dev(struct device *dev)
 	kfree(bridge);
 }
 
-static struct pci_host_bridge *pci_alloc_host_bridge(void)
+static struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
 {
 	struct pci_host_bridge *bridge;
 
-	bridge = kzalloc(sizeof(*bridge), GFP_KERNEL);
+	bridge = kzalloc(sizeof(*bridge) + priv, GFP_KERNEL);
 	if (!bridge)
 		return NULL;
 
@@ -2253,7 +2253,7 @@ static struct pci_bus *pci_create_root_bus_msi(struct device *parent,
 	int error;
 	struct pci_host_bridge *bridge;
 
-	bridge = pci_alloc_host_bridge();
+	bridge = pci_alloc_host_bridge(0);
 	if (!bridge)
 		return NULL;
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f79634612fbf..beacb17e81fb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -434,10 +434,21 @@ struct pci_host_bridge {
 			resource_size_t start,
 			resource_size_t size,
 			resource_size_t align);
+	unsigned long private[0] ____cacheline_aligned;
 };
 
 #define	to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev)
 
+static inline void *pci_host_bridge_priv(struct pci_host_bridge *bridge)
+{
+	return (void *)bridge->private;
+}
+
+static inline struct pci_host_bridge *pci_host_bridge_from_priv(void *priv)
+{
+	return container_of(priv, struct pci_host_bridge, private);
+}
+
 struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
 
 void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
-- 
cgit v1.2.3


From a52d1443bba1db98907521414727eee22ae8c380 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 25 Nov 2016 11:57:11 +0100
Subject: PCI: Export host bridge registration interface

Allow PCI host bridge drivers to use the new host bridge interfaces to
register their host bridge.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <helgaas@kernel.org>
---
 drivers/pci/probe.c | 6 ++++--
 include/linux/pci.h | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index cf9cb6f30782..f85ecb595c30 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -521,7 +521,7 @@ static void pci_release_host_bridge_dev(struct device *dev)
 	kfree(bridge);
 }
 
-static struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
+struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
 {
 	struct pci_host_bridge *bridge;
 
@@ -533,6 +533,7 @@ static struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
 
 	return bridge;
 }
+EXPORT_SYMBOL(pci_alloc_host_bridge);
 
 static const unsigned char pcix_bus_speed[] = {
 	PCI_SPEED_UNKNOWN,		/* 0 */
@@ -717,7 +718,7 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus)
 	dev_set_msi_domain(&bus->dev, d);
 }
 
-static int pci_register_host_bridge(struct pci_host_bridge *bridge)
+int pci_register_host_bridge(struct pci_host_bridge *bridge)
 {
 	struct device *parent = bridge->dev.parent;
 	struct resource_entry *window, *n;
@@ -832,6 +833,7 @@ free:
 	kfree(bus);
 	return err;
 }
+EXPORT_SYMBOL(pci_register_host_bridge);
 
 static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 					   struct pci_dev *bridge, int busnr)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index beacb17e81fb..aa5e8af16cfc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -449,6 +449,8 @@ static inline struct pci_host_bridge *pci_host_bridge_from_priv(void *priv)
 	return container_of(priv, struct pci_host_bridge, private);
 }
 
+struct pci_host_bridge *pci_alloc_host_bridge(size_t priv);
+int pci_register_host_bridge(struct pci_host_bridge *bridge);
 struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
 
 void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
-- 
cgit v1.2.3


From 777c6e0daebb3fcefbbd6f620410a946b07ef6d0 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 7 Dec 2016 14:54:38 +0100
Subject: hotplug: Make register and unregister notifier API symmetric

Yu Zhao has noticed that __unregister_cpu_notifier only unregisters its
notifiers when HOTPLUG_CPU=y while the registration might succeed even
when HOTPLUG_CPU=n if MODULE is enabled. This means that e.g. zswap
might keep a stale notifier on the list on the manual clean up during
the pool tear down and thus corrupt the list. Resulting in the following

[  144.964346] BUG: unable to handle kernel paging request at ffff880658a2be78
[  144.971337] IP: [<ffffffffa290b00b>] raw_notifier_chain_register+0x1b/0x40
<snipped>
[  145.122628] Call Trace:
[  145.125086]  [<ffffffffa28e5cf8>] __register_cpu_notifier+0x18/0x20
[  145.131350]  [<ffffffffa2a5dd73>] zswap_pool_create+0x273/0x400
[  145.137268]  [<ffffffffa2a5e0fc>] __zswap_param_set+0x1fc/0x300
[  145.143188]  [<ffffffffa2944c1d>] ? trace_hardirqs_on+0xd/0x10
[  145.149018]  [<ffffffffa2908798>] ? kernel_param_lock+0x28/0x30
[  145.154940]  [<ffffffffa2a3e8cf>] ? __might_fault+0x4f/0xa0
[  145.160511]  [<ffffffffa2a5e237>] zswap_compressor_param_set+0x17/0x20
[  145.167035]  [<ffffffffa2908d3c>] param_attr_store+0x5c/0xb0
[  145.172694]  [<ffffffffa290848d>] module_attr_store+0x1d/0x30
[  145.178443]  [<ffffffffa2b2b41f>] sysfs_kf_write+0x4f/0x70
[  145.183925]  [<ffffffffa2b2a5b9>] kernfs_fop_write+0x149/0x180
[  145.189761]  [<ffffffffa2a99248>] __vfs_write+0x18/0x40
[  145.194982]  [<ffffffffa2a9a412>] vfs_write+0xb2/0x1a0
[  145.200122]  [<ffffffffa2a9a732>] SyS_write+0x52/0xa0
[  145.205177]  [<ffffffffa2ff4d97>] entry_SYSCALL_64_fastpath+0x12/0x17

This can be even triggered manually by changing
/sys/module/zswap/parameters/compressor multiple times.

Fix this issue by making unregister APIs symmetric to the register so
there are no surprises.

Fixes: 47e627bc8c9a ("[PATCH] hotplug: Allow modules to use the cpu hotplug notifiers even if !CONFIG_HOTPLUG_CPU")
Reported-and-tested-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Streetman <ddstreet@ieee.org>
Link: http://lkml.kernel.org/r/20161207135438.4310-1-mhocko@kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h | 15 ++++-----------
 kernel/cpu.c        |  2 +-
 2 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index b886dc17f2f3..e571128ad99a 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -93,22 +93,16 @@ extern bool cpuhp_tasks_frozen;
 		{ .notifier_call = fn, .priority = pri };	\
 	__register_cpu_notifier(&fn##_nb);			\
 }
-#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */
-#define cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
-#define __cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
-#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */
 
-#ifdef CONFIG_HOTPLUG_CPU
 extern int register_cpu_notifier(struct notifier_block *nb);
 extern int __register_cpu_notifier(struct notifier_block *nb);
 extern void unregister_cpu_notifier(struct notifier_block *nb);
 extern void __unregister_cpu_notifier(struct notifier_block *nb);
-#else
 
-#ifndef MODULE
-extern int register_cpu_notifier(struct notifier_block *nb);
-extern int __register_cpu_notifier(struct notifier_block *nb);
-#else
+#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */
+#define cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
+#define __cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
+
 static inline int register_cpu_notifier(struct notifier_block *nb)
 {
 	return 0;
@@ -118,7 +112,6 @@ static inline int __register_cpu_notifier(struct notifier_block *nb)
 {
 	return 0;
 }
-#endif
 
 static inline void unregister_cpu_notifier(struct notifier_block *nb)
 {
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 29de1a9352c0..217fd2e7f435 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -659,7 +659,6 @@ void __init cpuhp_threads_init(void)
 	kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 EXPORT_SYMBOL(register_cpu_notifier);
 EXPORT_SYMBOL(__register_cpu_notifier);
 void unregister_cpu_notifier(struct notifier_block *nb)
@@ -676,6 +675,7 @@ void __unregister_cpu_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(__unregister_cpu_notifier);
 
+#ifdef CONFIG_HOTPLUG_CPU
 /**
  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
  * @cpu: a CPU id
-- 
cgit v1.2.3


From 1da5c46fa965ff90f5ffc080b6ab3fae5e227bc3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 29 Nov 2016 18:50:57 +0100
Subject: kthread: Make struct kthread kmalloc'ed

commit 23196f2e5f5d "kthread: Pin the stack via try_get_task_stack() /
put_task_stack() in to_live_kthread() function" is a workaround for the
fragile design of struct kthread being allocated on the task stack.

struct kthread in its current form should be removed, but this needs
cleanups outside of kthread.c.

As a first step move struct kthread away from the task stack by making it
kmalloc'ed. This allows to access kthread.exited without the magic of
trying to pin task stack and the try logic in to_live_kthread().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Chunming Zhou <David1.Zhou@amd.com>
Cc: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20161129175057.GA5330@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/kthread.h |  1 +
 kernel/fork.c           |  2 ++
 kernel/kthread.c        | 58 ++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 48 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index a6e82a69c363..c1c3e63d52c1 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -48,6 +48,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 	__k;								   \
 })
 
+void free_kthread_struct(struct task_struct *k);
 void kthread_bind(struct task_struct *k, unsigned int cpu);
 void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
 int kthread_stop(struct task_struct *k);
diff --git a/kernel/fork.c b/kernel/fork.c
index 600e93b5e539..7ffa16033ded 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -354,6 +354,8 @@ void free_task(struct task_struct *tsk)
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
 	arch_release_task_struct(tsk);
+	if (tsk->flags & PF_KTHREAD)
+		free_kthread_struct(tsk);
 	free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index be2cc1f9dd57..9d64b6526d0b 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -53,14 +53,38 @@ enum KTHREAD_BITS {
 	KTHREAD_IS_PARKED,
 };
 
-#define __to_kthread(vfork)	\
-	container_of(vfork, struct kthread, exited)
+static inline void set_kthread_struct(void *kthread)
+{
+	/*
+	 * We abuse ->set_child_tid to avoid the new member and because it
+	 * can't be wrongly copied by copy_process(). We also rely on fact
+	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
+	 */
+	current->set_child_tid = (__force void __user *)kthread;
+}
 
 static inline struct kthread *to_kthread(struct task_struct *k)
 {
-	return __to_kthread(k->vfork_done);
+	WARN_ON(!(k->flags & PF_KTHREAD));
+	return (__force void *)k->set_child_tid;
+}
+
+void free_kthread_struct(struct task_struct *k)
+{
+	/*
+	 * Can be NULL if this kthread was created by kernel_thread()
+	 * or if kmalloc() in kthread() failed.
+	 */
+	kfree(to_kthread(k));
 }
 
+#define __to_kthread(vfork)	\
+	container_of(vfork, struct kthread, exited)
+
+/*
+ * TODO: kill it and use to_kthread(). But we still need the users
+ * like kthread_stop() which has to sync with the exiting kthread.
+ */
 static struct kthread *to_live_kthread(struct task_struct *k)
 {
 	struct completion *vfork = ACCESS_ONCE(k->vfork_done);
@@ -181,14 +205,11 @@ static int kthread(void *_create)
 	int (*threadfn)(void *data) = create->threadfn;
 	void *data = create->data;
 	struct completion *done;
-	struct kthread self;
+	struct kthread *self;
 	int ret;
 
-	self.flags = 0;
-	self.data = data;
-	init_completion(&self.exited);
-	init_completion(&self.parked);
-	current->vfork_done = &self.exited;
+	self = kmalloc(sizeof(*self), GFP_KERNEL);
+	set_kthread_struct(self);
 
 	/* If user was SIGKILLed, I release the structure. */
 	done = xchg(&create->done, NULL);
@@ -196,6 +217,19 @@ static int kthread(void *_create)
 		kfree(create);
 		do_exit(-EINTR);
 	}
+
+	if (!self) {
+		create->result = ERR_PTR(-ENOMEM);
+		complete(done);
+		do_exit(-ENOMEM);
+	}
+
+	self->flags = 0;
+	self->data = data;
+	init_completion(&self->exited);
+	init_completion(&self->parked);
+	current->vfork_done = &self->exited;
+
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	create->result = current;
@@ -203,12 +237,10 @@ static int kthread(void *_create)
 	schedule();
 
 	ret = -EINTR;
-
-	if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
-		__kthread_parkme(&self);
+	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+		__kthread_parkme(self);
 		ret = threadfn(data);
 	}
-	/* we can't just return, we must preserve "self" on stack */
 	do_exit(ret);
 }
 
-- 
cgit v1.2.3


From 89caaa2d80b7bf9bd8632cd3137254f8c685e5db Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Wed, 7 Dec 2016 15:20:07 +0100
Subject: net: stmmac: add support for independent DMA pbl for tx/rx

GMAC and newer supports independent programmable burst lengths for
DMA tx/rx. Add new optional devicetree properties representing this.

To be backwards compatible, snps,pbl will still be valid, but
snps,txpbl/snps,rxpbl will override the value in snps,pbl if set.

If the IP is synthesized to use the AXI interface, there is a register
and a matching DT property inside the optional stmmac-axi-config DT node
for controlling burst lengths, named snps,blen.
However, using this register, it is not possible to control tx and rx
independently. Also, this register is not available if the IP was
synthesized with, e.g., the AHB interface.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Acked-by: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/stmmac.txt      |  6 +++++-
 Documentation/networking/stmmac.txt                   | 19 +++++++++++++------
 drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c   | 12 ++++++------
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c      | 12 +++++++-----
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c |  2 ++
 include/linux/stmmac.h                                |  2 ++
 6 files changed, 35 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index b95ff998ba73..8080038ff1b2 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -34,7 +34,11 @@ Optional properties:
   platforms.
 - tx-fifo-depth: See ethernet.txt file in the same directory
 - rx-fifo-depth: See ethernet.txt file in the same directory
-- snps,pbl		Programmable Burst Length
+- snps,pbl		Programmable Burst Length (tx and rx)
+- snps,txpbl		Tx Programmable Burst Length. Only for GMAC and newer.
+			If set, DMA tx will use this value rather than snps,pbl.
+- snps,rxpbl		Rx Programmable Burst Length. Only for GMAC and newer.
+			If set, DMA rx will use this value rather than snps,pbl.
 - snps,aal		Address-Aligned Beats
 - snps,fixed-burst	Program the DMA to use the fixed burst mode
 - snps,mixed-burst	Program the DMA to use the mixed burst mode
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 014f4f756cb7..6add57374f70 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -153,7 +153,8 @@ Where:
    o pbl: the Programmable Burst Length is maximum number of beats to
        be transferred in one DMA transaction.
        GMAC also enables the 4xPBL by default.
-   o fixed_burst/mixed_burst/burst_len
+   o txpbl/rxpbl: GMAC and newer supports independent DMA pbl for tx/rx.
+   o fixed_burst/mixed_burst/aal
  o clk_csr: fixed CSR Clock range selection.
  o has_gmac: uses the GMAC core.
  o enh_desc: if sets the MAC will use the enhanced descriptor structure.
@@ -205,16 +206,22 @@ tuned according to the HW capabilities.
 
 struct stmmac_dma_cfg {
 	int pbl;
+	int txpbl;
+	int rxpbl;
 	int fixed_burst;
-	int burst_len_supported;
+	int mixed_burst;
+	bool aal;
 };
 
 Where:
- o pbl: Programmable Burst Length
+ o pbl: Programmable Burst Length (tx and rx)
+ o txpbl: Transmit Programmable Burst Length. Only for GMAC and newer.
+	 If set, DMA tx will use this value rather than pbl.
+ o rxpbl: Receive Programmable Burst Length. Only for GMAC and newer.
+	 If set, DMA rx will use this value rather than pbl.
  o fixed_burst: program the DMA to use the fixed burst mode
- o burst_len: this is the value we put in the register
-	      supported values are provided as macros in
-	      linux/stmmac.h header file.
+ o mixed_burst: program the DMA to use the mixed burst mode
+ o aal: Address-Aligned Beats
 
 ---
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 318ae9f10104..99b8040af592 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -89,20 +89,20 @@ static void dwmac1000_dma_init(void __iomem *ioaddr,
 			       u32 dma_tx, u32 dma_rx, int atds)
 {
 	u32 value = readl(ioaddr + DMA_BUS_MODE);
+	int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
+	int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
 
 	/*
 	 * Set the DMA PBL (Programmable Burst Length) mode.
 	 *
 	 * Note: before stmmac core 3.50 this mode bit was 4xPBL, and
 	 * post 3.5 mode bit acts as 8*PBL.
-	 *
-	 * This configuration doesn't take care about the Separate PBL
-	 * so only the bits: 13-8 are programmed with the PBL passed from the
-	 * platform.
 	 */
 	value |= DMA_BUS_MODE_MAXPBL;
-	value &= ~DMA_BUS_MODE_PBL_MASK;
-	value |= (dma_cfg->pbl << DMA_BUS_MODE_PBL_SHIFT);
+	value |= DMA_BUS_MODE_USP;
+	value &= ~(DMA_BUS_MODE_PBL_MASK | DMA_BUS_MODE_RPBL_MASK);
+	value |= (txpbl << DMA_BUS_MODE_PBL_SHIFT);
+	value |= (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
 
 	/* Set the Fixed burst mode */
 	if (dma_cfg->fixed_burst)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 7d82a3464097..2c3b2098f350 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -71,11 +71,14 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 	writel(value, ioaddr + DMA_SYS_BUS_MODE);
 }
 
-static void dwmac4_dma_init_channel(void __iomem *ioaddr, int pbl,
+static void dwmac4_dma_init_channel(void __iomem *ioaddr,
+				    struct stmmac_dma_cfg *dma_cfg,
 				    u32 dma_tx_phy, u32 dma_rx_phy,
 				    u32 channel)
 {
 	u32 value;
+	int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
+	int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
 
 	/* set PBL for each channels. Currently we affect same configuration
 	 * on each channel
@@ -85,11 +88,11 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr, int pbl,
 	writel(value, ioaddr + DMA_CHAN_CONTROL(channel));
 
 	value = readl(ioaddr + DMA_CHAN_TX_CONTROL(channel));
-	value = value | (pbl << DMA_BUS_MODE_PBL_SHIFT);
+	value = value | (txpbl << DMA_BUS_MODE_PBL_SHIFT);
 	writel(value, ioaddr + DMA_CHAN_TX_CONTROL(channel));
 
 	value = readl(ioaddr + DMA_CHAN_RX_CONTROL(channel));
-	value = value | (pbl << DMA_BUS_MODE_RPBL_SHIFT);
+	value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
 	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(channel));
 
 	/* Mask interrupts by writing to CSR7 */
@@ -120,8 +123,7 @@ static void dwmac4_dma_init(void __iomem *ioaddr,
 	writel(value, ioaddr + DMA_SYS_BUS_MODE);
 
 	for (i = 0; i < DMA_CHANNEL_NB_MAX; i++)
-		dwmac4_dma_init_channel(ioaddr, dma_cfg->pbl,
-					dma_tx, dma_rx, i);
+		dwmac4_dma_init_channel(ioaddr, dma_cfg, dma_tx, dma_rx, i);
 }
 
 static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 81800f23a9c4..96afe0561c99 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -315,6 +315,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl);
 	if (!dma_cfg->pbl)
 		dma_cfg->pbl = DEFAULT_DMA_PBL;
+	of_property_read_u32(np, "snps,txpbl", &dma_cfg->txpbl);
+	of_property_read_u32(np, "snps,rxpbl", &dma_cfg->rxpbl);
 
 	dma_cfg->aal = of_property_read_bool(np, "snps,aal");
 	dma_cfg->fixed_burst = of_property_read_bool(np, "snps,fixed-burst");
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 3537fb33cc90..e6d7a5940819 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -88,6 +88,8 @@ struct stmmac_mdio_bus_data {
 
 struct stmmac_dma_cfg {
 	int pbl;
+	int txpbl;
+	int rxpbl;
 	int fixed_burst;
 	int mixed_burst;
 	bool aal;
-- 
cgit v1.2.3


From 4022d039a315951e59d95d22e79198d861ce4490 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Wed, 7 Dec 2016 15:20:08 +0100
Subject: net: smmac: allow configuring lower pbl values

The driver currently always sets the PBLx8/PBLx4 bit, which means that
the pbl values configured via the pbl/txpbl/rxpbl DT properties are
always multiplied by 8/4 in the hardware.

In order to allow the DT to configure lower pbl values, while at the
same time not changing behavior of any existing device trees using the
pbl/txpbl/rxpbl settings, add a property to disable the multiplication
of the pbl by 8/4 in the hardware.

Suggested-by: Rabin Vincent <rabinv@axis.com>
Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Acked-by: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/stmmac.txt      | 2 ++
 Documentation/networking/stmmac.txt                   | 5 ++++-
 drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c   | 3 ++-
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c      | 3 ++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c      | 2 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 +
 include/linux/stmmac.h                                | 1 +
 7 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index 8080038ff1b2..128da752fec9 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -39,6 +39,8 @@ Optional properties:
 			If set, DMA tx will use this value rather than snps,pbl.
 - snps,rxpbl		Rx Programmable Burst Length. Only for GMAC and newer.
 			If set, DMA rx will use this value rather than snps,pbl.
+- snps,no-pbl-x8	Don't multiply the pbl/txpbl/rxpbl values by 8.
+			For core rev < 3.50, don't multiply the values by 4.
 - snps,aal		Address-Aligned Beats
 - snps,fixed-burst	Program the DMA to use the fixed burst mode
 - snps,mixed-burst	Program the DMA to use the mixed burst mode
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 6add57374f70..2bb07078f535 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -152,8 +152,9 @@ Where:
  o dma_cfg: internal DMA parameters
    o pbl: the Programmable Burst Length is maximum number of beats to
        be transferred in one DMA transaction.
-       GMAC also enables the 4xPBL by default.
+       GMAC also enables the 4xPBL by default. (8xPBL for GMAC 3.50 and newer)
    o txpbl/rxpbl: GMAC and newer supports independent DMA pbl for tx/rx.
+   o pblx8: Enable 8xPBL (4xPBL for core rev < 3.50). Enabled by default.
    o fixed_burst/mixed_burst/aal
  o clk_csr: fixed CSR Clock range selection.
  o has_gmac: uses the GMAC core.
@@ -208,6 +209,7 @@ struct stmmac_dma_cfg {
 	int pbl;
 	int txpbl;
 	int rxpbl;
+	bool pblx8;
 	int fixed_burst;
 	int mixed_burst;
 	bool aal;
@@ -219,6 +221,7 @@ Where:
 	 If set, DMA tx will use this value rather than pbl.
  o rxpbl: Receive Programmable Burst Length. Only for GMAC and newer.
 	 If set, DMA rx will use this value rather than pbl.
+ o pblx8: Enable 8xPBL (4xPBL for core rev < 3.50). Enabled by default.
  o fixed_burst: program the DMA to use the fixed burst mode
  o mixed_burst: program the DMA to use the mixed burst mode
  o aal: Address-Aligned Beats
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 99b8040af592..612d3aaac9a4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -98,7 +98,8 @@ static void dwmac1000_dma_init(void __iomem *ioaddr,
 	 * Note: before stmmac core 3.50 this mode bit was 4xPBL, and
 	 * post 3.5 mode bit acts as 8*PBL.
 	 */
-	value |= DMA_BUS_MODE_MAXPBL;
+	if (dma_cfg->pblx8)
+		value |= DMA_BUS_MODE_MAXPBL;
 	value |= DMA_BUS_MODE_USP;
 	value &= ~(DMA_BUS_MODE_PBL_MASK | DMA_BUS_MODE_RPBL_MASK);
 	value |= (txpbl << DMA_BUS_MODE_PBL_SHIFT);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 2c3b2098f350..8196ab5fc33c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -84,7 +84,8 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr,
 	 * on each channel
 	 */
 	value = readl(ioaddr + DMA_CHAN_CONTROL(channel));
-	value = value | DMA_BUS_MODE_PBL;
+	if (dma_cfg->pblx8)
+		value = value | DMA_BUS_MODE_PBL;
 	writel(value, ioaddr + DMA_CHAN_CONTROL(channel));
 
 	value = readl(ioaddr + DMA_CHAN_TX_CONTROL(channel));
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 56c8a2342c14..a2831773431a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -81,6 +81,7 @@ static void stmmac_default_data(struct plat_stmmacenet_data *plat)
 	plat->mdio_bus_data->phy_mask = 0;
 
 	plat->dma_cfg->pbl = 32;
+	plat->dma_cfg->pblx8 = true;
 	/* TODO: AXI */
 
 	/* Set default value for multicast hash bins */
@@ -115,6 +116,7 @@ static int quark_default_data(struct plat_stmmacenet_data *plat,
 	plat->mdio_bus_data->phy_mask = 0;
 
 	plat->dma_cfg->pbl = 16;
+	plat->dma_cfg->pblx8 = true;
 	plat->dma_cfg->fixed_burst = 1;
 	/* AXI (TODO) */
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 96afe0561c99..082cd48db6a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -317,6 +317,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		dma_cfg->pbl = DEFAULT_DMA_PBL;
 	of_property_read_u32(np, "snps,txpbl", &dma_cfg->txpbl);
 	of_property_read_u32(np, "snps,rxpbl", &dma_cfg->rxpbl);
+	dma_cfg->pblx8 = !of_property_read_bool(np, "snps,no-pbl-x8");
 
 	dma_cfg->aal = of_property_read_bool(np, "snps,aal");
 	dma_cfg->fixed_burst = of_property_read_bool(np, "snps,fixed-burst");
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index e6d7a5940819..266dab9ad782 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -90,6 +90,7 @@ struct stmmac_dma_cfg {
 	int pbl;
 	int txpbl;
 	int rxpbl;
+	bool pblx8;
 	int fixed_burst;
 	int mixed_burst;
 	bool aal;
-- 
cgit v1.2.3


From 13bfff25c081f4e060af761c4082b5a96f756810 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 7 Dec 2016 08:29:10 -0800
Subject: net: rfs: add a jump label

RFS is not commonly used, so add a jump label to avoid some conditionals
in fast path.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  |  1 +
 include/net/sock.h         | 25 ++++++++++++++-----------
 net/core/dev.c             |  2 ++
 net/core/sysctl_net_core.c |  5 ++++-
 4 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1ff5ea6e1221..994f7423a74b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -192,6 +192,7 @@ struct net_device_stats {
 #ifdef CONFIG_RPS
 #include <linux/static_key.h>
 extern struct static_key rps_needed;
+extern struct static_key rfs_needed;
 #endif
 
 struct neighbour;
diff --git a/include/net/sock.h b/include/net/sock.h
index 1749e38d0301..2729e77950b7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -913,17 +913,20 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
 static inline void sock_rps_record_flow(const struct sock *sk)
 {
 #ifdef CONFIG_RPS
-	/* Reading sk->sk_rxhash might incur an expensive cache line miss.
-	 *
-	 * TCP_ESTABLISHED does cover almost all states where RFS
-	 * might be useful, and is cheaper [1] than testing :
-	 *	IPv4: inet_sk(sk)->inet_daddr
-	 * 	IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
-	 * OR	an additional socket flag
-	 * [1] : sk_state and sk_prot are in the same cache line.
-	 */
-	if (sk->sk_state == TCP_ESTABLISHED)
-		sock_rps_record_flow_hash(sk->sk_rxhash);
+	if (static_key_false(&rfs_needed)) {
+		/* Reading sk->sk_rxhash might incur an expensive cache line
+		 * miss.
+		 *
+		 * TCP_ESTABLISHED does cover almost all states where RFS
+		 * might be useful, and is cheaper [1] than testing :
+		 *	IPv4: inet_sk(sk)->inet_daddr
+		 * 	IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+		 * OR	an additional socket flag
+		 * [1] : sk_state and sk_prot are in the same cache line.
+		 */
+		if (sk->sk_state == TCP_ESTABLISHED)
+			sock_rps_record_flow_hash(sk->sk_rxhash);
+	}
 #endif
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index bffb5253e778..1d33ce03365f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3447,6 +3447,8 @@ EXPORT_SYMBOL(rps_cpu_mask);
 
 struct static_key rps_needed __read_mostly;
 EXPORT_SYMBOL(rps_needed);
+struct static_key rfs_needed __read_mostly;
+EXPORT_SYMBOL(rfs_needed);
 
 static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0df2aa652530..2a46e4009f62 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -79,10 +79,13 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 
 		if (sock_table != orig_sock_table) {
 			rcu_assign_pointer(rps_sock_flow_table, sock_table);
-			if (sock_table)
+			if (sock_table) {
 				static_key_slow_inc(&rps_needed);
+				static_key_slow_inc(&rfs_needed);
+			}
 			if (orig_sock_table) {
 				static_key_slow_dec(&rps_needed);
+				static_key_slow_dec(&rfs_needed);
 				synchronize_rcu();
 				vfree(orig_sock_table);
 			}
-- 
cgit v1.2.3


From c8c8b127091b758f5768f906bcdeeb88bc9951ca Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 7 Dec 2016 09:19:33 -0800
Subject: udp: under rx pressure, try to condense skbs

Under UDP flood, many softirq producers try to add packets to
UDP receive queue, and one user thread is burning one cpu trying
to dequeue packets as fast as possible.

Two parts of the per packet cost are :
- copying payload from kernel space to user space,
- freeing memory pieces associated with skb.

If socket is under pressure, softirq handler(s) can try to pull in
skb->head the payload of the packet if it fits.

Meaning the softirq handler(s) can free/reuse the page fragment
immediately, instead of letting udp_recvmsg() do this hundreds of usec
later, possibly from another node.

Additional gains :
- We reduce skb->truesize and thus can store more packets per SO_RCVBUF
- We avoid cache line misses at copyout() time and consume_skb() time,
and avoid one put_page() with potential alien freeing on NUMA hosts.

This comes at the cost of a copy, bounded to available tail room, which
is usually small. (We might have to fix GRO_MAX_HEAD which looks bigger
than necessary)

This patch gave me about 5 % increase in throughput in my tests.

skb_condense() helper could probably used in other contexts.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/skbuff.c      | 28 ++++++++++++++++++++++++++++
 net/ipv4/udp.c         | 12 +++++++++++-
 3 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9c535fbccf2c..0cd92b0f2af5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1966,6 +1966,8 @@ static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
 	return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL;
 }
 
+void skb_condense(struct sk_buff *skb);
+
 /**
  *	skb_headroom - bytes at buffer head
  *	@skb: buffer to check
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b45cd1494243..84151cf40aeb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4931,3 +4931,31 @@ struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
 	return clone;
 }
 EXPORT_SYMBOL(pskb_extract);
+
+/**
+ * skb_condense - try to get rid of fragments/frag_list if possible
+ * @skb: buffer
+ *
+ * Can be used to save memory before skb is added to a busy queue.
+ * If packet has bytes in frags and enough tail room in skb->head,
+ * pull all of them, so that we can free the frags right now and adjust
+ * truesize.
+ * Notes:
+ *	We do not reallocate skb->head thus can not fail.
+ *	Caller must re-evaluate skb->truesize if needed.
+ */
+void skb_condense(struct sk_buff *skb)
+{
+	if (!skb->data_len ||
+	    skb->data_len > skb->end - skb->tail ||
+	    skb_cloned(skb))
+		return;
+
+	/* Nice, we can free page frag(s) right now */
+	__pskb_pull_tail(skb, skb->data_len);
+
+	/* Now adjust skb->truesize, since __pskb_pull_tail() does
+	 * not do this.
+	 */
+	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 16d88ba9ff1c..f5628ada47b5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1199,7 +1199,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct sk_buff_head *list = &sk->sk_receive_queue;
 	int rmem, delta, amt, err = -ENOMEM;
-	int size = skb->truesize;
+	int size;
 
 	/* try to avoid the costly atomic add/sub pair when the receive
 	 * queue is full; always allow at least a packet
@@ -1208,6 +1208,16 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 	if (rmem > sk->sk_rcvbuf)
 		goto drop;
 
+	/* Under mem pressure, it might be helpful to help udp_recvmsg()
+	 * having linear skbs :
+	 * - Reduce memory overhead and thus increase receive queue capacity
+	 * - Less cache line misses at copyout() time
+	 * - Less work at consume_skb() (less alien page frag freeing)
+	 */
+	if (rmem > (sk->sk_rcvbuf >> 1))
+		skb_condense(skb);
+	size = skb->truesize;
+
 	/* we drop only if the receive buf is full and the receive
 	 * queue contains some other skb
 	 */
-- 
cgit v1.2.3


From d2a4dd37f6b41fbcad76efbf63124eb3126c66fe Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 7 Dec 2016 10:57:59 -0800
Subject: bpf: fix state equivalence

Commmits 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers")
and 484611357c19 ("bpf: allow access into map value arrays") by themselves
are correct, but in combination they make state equivalence ignore 'id' field
of the register state which can lead to accepting invalid program.

Fixes: 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers")
Fixes: 484611357c19 ("bpf: allow access into map value arrays")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf_verifier.h | 14 +++++++-------
 kernel/bpf/verifier.c        |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7453c1281531..a13b031dc6b8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -18,13 +18,6 @@
 
 struct bpf_reg_state {
 	enum bpf_reg_type type;
-	/*
-	 * Used to determine if any memory access using this register will
-	 * result in a bad access.
-	 */
-	s64 min_value;
-	u64 max_value;
-	u32 id;
 	union {
 		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
 		s64 imm;
@@ -40,6 +33,13 @@ struct bpf_reg_state {
 		 */
 		struct bpf_map *map_ptr;
 	};
+	u32 id;
+	/* Used to determine if any memory access using this register will
+	 * result in a bad access. These two fields must be last.
+	 * See states_equal()
+	 */
+	s64 min_value;
+	u64 max_value;
 };
 
 enum bpf_stack_slot_type {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index da9fb2a9b7eb..5b14f85f45c6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2528,7 +2528,7 @@ static bool states_equal(struct bpf_verifier_env *env,
 		 * we didn't do a variable access into a map then we are a-ok.
 		 */
 		if (!varlen_map_access &&
-		    rold->type == rcur->type && rold->imm == rcur->imm)
+		    memcmp(rold, rcur, offsetofend(struct bpf_reg_state, id)) == 0)
 			continue;
 
 		/* If we didn't map access then again we don't care about the
-- 
cgit v1.2.3


From f38e7a32ee4fc9c8aeeac59e6e0462cd281586e3 Mon Sep 17 00:00:00 2001
From: "Woojung.Huh@microchip.com" <Woojung.Huh@microchip.com>
Date: Wed, 7 Dec 2016 20:26:07 +0000
Subject: phy: add phy fixup unregister functions

>From : Woojung Huh <woojung.huh@microchip.com>

Add functions to unregister phy fixup for modules.

int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask)
	Unregister phy fixup from phy_fixup_list per bus_id, phy_uid &
	phy_uid_mask

int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask)
	Unregister phy fixup from phy_fixup_list.
	Use it for fixup registered by phy_register_fixup_for_uid()

int phy_unregister_fixup_for_id(const char *bus_id)
	Unregister phy fixup from phy_fixup_list.
	Use it for fixup registered by phy_register_fixup_for_id()

Signed-off-by: Woojung Huh <woojung.huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phy.txt |  9 ++++++++
 drivers/net/phy/phy_device.c     | 47 ++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h              |  4 ++++
 3 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
index e017d933d530..16f90d817224 100644
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@@ -407,6 +407,15 @@ Board Fixups
  The stubs set one of the two matching criteria, and set the other one to
  match anything.
 
+ When phy_register_fixup() or *_for_uid()/*_for_id() is called at module,
+ unregister fixup and free allocate memory are required.
+
+ Call one of following function before unloading module.
+
+ int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask);
+ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
+ int phy_register_fixup_for_id(const char *phy_id);
+
 Standards
 
  IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two:
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index aeaf1bcb12d0..32fa7c76f29c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -235,6 +235,53 @@ int phy_register_fixup_for_id(const char *bus_id,
 }
 EXPORT_SYMBOL(phy_register_fixup_for_id);
 
+/**
+ * phy_unregister_fixup - remove a phy_fixup from the list
+ * @bus_id: A string matches fixup->bus_id (or PHY_ANY_ID) in phy_fixup_list
+ * @phy_uid: A phy id matches fixup->phy_id (or PHY_ANY_UID) in phy_fixup_list
+ * @phy_uid_mask: Applied to phy_uid and fixup->phy_uid before comparison
+ */
+int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask)
+{
+	struct list_head *pos, *n;
+	struct phy_fixup *fixup;
+	int ret;
+
+	ret = -ENODEV;
+
+	mutex_lock(&phy_fixup_lock);
+	list_for_each_safe(pos, n, &phy_fixup_list) {
+		fixup = list_entry(pos, struct phy_fixup, list);
+
+		if ((!strcmp(fixup->bus_id, bus_id)) &&
+		    ((fixup->phy_uid & phy_uid_mask) ==
+		     (phy_uid & phy_uid_mask))) {
+			list_del(&fixup->list);
+			kfree(fixup);
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&phy_fixup_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(phy_unregister_fixup);
+
+/* Unregisters a fixup of any PHY with the UID in phy_uid */
+int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask)
+{
+	return phy_unregister_fixup(PHY_ANY_ID, phy_uid, phy_uid_mask);
+}
+EXPORT_SYMBOL(phy_unregister_fixup_for_uid);
+
+/* Unregisters a fixup of the PHY with id string bus_id */
+int phy_unregister_fixup_for_id(const char *bus_id)
+{
+	return phy_unregister_fixup(bus_id, PHY_ANY_UID, 0xffffffff);
+}
+EXPORT_SYMBOL(phy_unregister_fixup_for_id);
+
 /* Returns 1 if fixup matches phydev in bus_id and phy_uid.
  * Fixups can be set to match any in one or more fields.
  */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index feb8a98e8dd3..f7d95f644eed 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -860,6 +860,10 @@ int phy_register_fixup_for_id(const char *bus_id,
 int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
 			       int (*run)(struct phy_device *));
 
+int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask);
+int phy_unregister_fixup_for_id(const char *bus_id);
+int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
+
 int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable);
 int phy_get_eee_err(struct phy_device *phydev);
 int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data);
-- 
cgit v1.2.3


From 17bedab2723145d17b14084430743549e6943d03 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 7 Dec 2016 15:53:11 -0800
Subject: bpf: xdp: Allow head adjustment in XDP prog

This patch allows XDP prog to extend/remove the packet
data at the head (like adding or removing header).  It is
done by adding a new XDP helper bpf_xdp_adjust_head().

It also renames bpf_helper_changes_skb_data() to
bpf_helper_changes_pkt_data() to better reflect
that XDP prog does not work on skb.

This patch adds one "xdp_adjust_head" bit to bpf_prog for the
XDP-capable driver to check if the XDP prog requires
bpf_xdp_adjust_head() support.  The driver can then decide
to error out during XDP_SETUP_PROG.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/powerpc/net/bpf_jit_comp64.c                  |  4 ++--
 arch/s390/net/bpf_jit_comp.c                       |  2 +-
 arch/x86/net/bpf_jit_comp.c                        |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     |  5 ++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  5 ++++
 .../net/ethernet/netronome/nfp/nfp_net_common.c    |  4 ++++
 drivers/net/ethernet/qlogic/qede/qede_main.c       |  5 ++++
 include/linux/filter.h                             |  6 +++--
 include/uapi/linux/bpf.h                           | 11 ++++++++-
 kernel/bpf/core.c                                  |  2 +-
 kernel/bpf/syscall.c                               |  2 ++
 kernel/bpf/verifier.c                              |  2 +-
 net/core/filter.c                                  | 28 ++++++++++++++++++++--
 13 files changed, 67 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 0fe98a567125..73a5cf18fd84 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -766,7 +766,7 @@ emit_clear:
 			func = (u8 *) __bpf_call_base + imm;
 
 			/* Save skb pointer if we need to re-cache skb data */
-			if (bpf_helper_changes_skb_data(func))
+			if (bpf_helper_changes_pkt_data(func))
 				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
 
 			bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -775,7 +775,7 @@ emit_clear:
 			PPC_MR(b2p[BPF_REG_0], 3);
 
 			/* refresh skb cache */
-			if (bpf_helper_changes_skb_data(func)) {
+			if (bpf_helper_changes_pkt_data(func)) {
 				/* reload skb pointer to r3 */
 				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
 				bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bee281f3163d..167b31b186c1 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -981,7 +981,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		EMIT2(0x0d00, REG_14, REG_W1);
 		/* lgr %b0,%r2: load return value into %b0 */
 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
-		if (bpf_helper_changes_skb_data((void *)func)) {
+		if (bpf_helper_changes_pkt_data((void *)func)) {
 			jit->seen |= SEEN_SKB_CHANGE;
 			/* lg %b1,ST_OFF_SKBP(%r15) */
 			EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index fe04a04dab8e..e76d1af60f7a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -853,7 +853,7 @@ xadd:			if (is_imm8(insn->off))
 			func = (u8 *) __bpf_call_base + imm32;
 			jmp_offset = func - (image + addrs[i]);
 			if (seen_ld_abs) {
-				reload_skb_data = bpf_helper_changes_skb_data(func);
+				reload_skb_data = bpf_helper_changes_pkt_data(func);
 				if (reload_skb_data) {
 					EMIT1(0x57); /* push %rdi */
 					jmp_offset += 22; /* pop, mov, sub, mov */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 49a81f1fc1d6..f441eda63bec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2686,6 +2686,11 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 	int err;
 	int i;
 
+	if (prog && prog->xdp_adjust_head) {
+		en_err(priv, "Does not support bpf_xdp_adjust_head()\n");
+		return -EOPNOTSUPP;
+	}
+
 	xdp_ring_num = prog ? priv->rx_ring_num : 0;
 
 	/* No need to reconfigure buffers when simply swapping the
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 07020276fe73..cbfa38fc72c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3183,6 +3183,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 	bool reset, was_opened;
 	int i;
 
+	if (prog && prog->xdp_adjust_head) {
+		netdev_err(netdev, "Does not support bpf_xdp_adjust_head()\n");
+		return -EOPNOTSUPP;
+	}
+
 	mutex_lock(&priv->state_lock);
 
 	if ((netdev->features & NETIF_F_LRO) && prog) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 00d9a03be31d..e8d448109e03 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2946,6 +2946,10 @@ static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog)
 	};
 	int err;
 
+	if (prog && prog->xdp_adjust_head) {
+		nn_err(nn, "Does not support bpf_xdp_adjust_head()\n");
+		return -EOPNOTSUPP;
+	}
 	if (!prog && !nn->xdp_prog)
 		return 0;
 	if (prog && nn->xdp_prog) {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index cf1dd1436d93..aecdd1c5c0ea 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -2507,6 +2507,11 @@ static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog)
 {
 	struct qede_reload_args args;
 
+	if (prog && prog->xdp_adjust_head) {
+		DP_ERR(edev, "Does not support bpf_xdp_adjust_head()\n");
+		return -EOPNOTSUPP;
+	}
+
 	/* If we're called, there was already a bpf reference increment */
 	args.func = &qede_xdp_reload_func;
 	args.u.new_prog = prog;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f078d2b1cff6..6a1658308612 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -406,7 +406,8 @@ struct bpf_prog {
 	u16			jited:1,	/* Is our filter JIT'ed? */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
-				dst_needed:1;	/* Do we need dst entry? */
+				dst_needed:1,	/* Do we need dst entry? */
+				xdp_adjust_head:1; /* Adjusting pkt head? */
 	kmemcheck_bitfield_end(meta);
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	u32			len;		/* Number of filter blocks */
@@ -440,6 +441,7 @@ struct bpf_skb_data_end {
 struct xdp_buff {
 	void *data;
 	void *data_end;
+	void *data_hard_start;
 };
 
 /* compute the linear packet data range [data, data_end) which
@@ -595,7 +597,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
-bool bpf_helper_changes_skb_data(void *func);
+bool bpf_helper_changes_pkt_data(void *func);
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6123d9b8e828..0eb0e87dbe9f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -424,6 +424,12 @@ union bpf_attr {
  *     @len: length of header to be pushed in front
  *     @flags: Flags (unused for now)
  *     Return: 0 on success or negative error
+ *
+ * int bpf_xdp_adjust_head(xdp_md, delta)
+ *     Adjust the xdp_md.data by delta
+ *     @xdp_md: pointer to xdp_md
+ *     @delta: An positive/negative integer to be added to xdp_md.data
+ *     Return: 0 on success or negative on error
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -469,7 +475,8 @@ union bpf_attr {
 	FN(csum_update),		\
 	FN(set_hash_invalid),		\
 	FN(get_numa_node_id),		\
-	FN(skb_change_head),
+	FN(skb_change_head),		\
+	FN(xdp_adjust_head),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -576,6 +583,8 @@ struct bpf_sock {
 	__u32 protocol;
 };
 
+#define XDP_PACKET_HEADROOM 256
+
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
  * return codes are reserved for future use. Unknown return codes will result
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index bdcc9f4ba767..83e0d153b0b4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1143,7 +1143,7 @@ struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
 	return prog;
 }
 
-bool __weak bpf_helper_changes_skb_data(void *func)
+bool __weak bpf_helper_changes_pkt_data(void *func)
 {
 	return false;
 }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 88f609f1c0c3..4819ec9d95f6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -579,6 +579,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
 				prog->dst_needed = 1;
 			if (insn->imm == BPF_FUNC_get_prandom_u32)
 				bpf_user_rnd_init_once();
+			if (insn->imm == BPF_FUNC_xdp_adjust_head)
+				prog->xdp_adjust_head = 1;
 			if (insn->imm == BPF_FUNC_tail_call) {
 				/* mark bpf_tail_call as different opcode
 				 * to avoid conditional branch in
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5b14f85f45c6..d28f9a3380a9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1216,7 +1216,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
 		return -EINVAL;
 	}
 
-	changes_data = bpf_helper_changes_skb_data(fn->func);
+	changes_data = bpf_helper_changes_pkt_data(fn->func);
 
 	memset(&meta, 0, sizeof(meta));
 	meta.pkt_access = fn->pkt_access;
diff --git a/net/core/filter.c b/net/core/filter.c
index b751202e12f8..b1461708a977 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2234,7 +2234,28 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-bool bpf_helper_changes_skb_data(void *func)
+BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
+{
+	void *data = xdp->data + offset;
+
+	if (unlikely(data < xdp->data_hard_start ||
+		     data > xdp->data_end - ETH_HLEN))
+		return -EINVAL;
+
+	xdp->data = data;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
+	.func		= bpf_xdp_adjust_head,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
+bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
 	    func == bpf_skb_vlan_pop ||
@@ -2244,7 +2265,8 @@ bool bpf_helper_changes_skb_data(void *func)
 	    func == bpf_skb_change_tail ||
 	    func == bpf_skb_pull_data ||
 	    func == bpf_l3_csum_replace ||
-	    func == bpf_l4_csum_replace)
+	    func == bpf_l4_csum_replace ||
+	    func == bpf_xdp_adjust_head)
 		return true;
 
 	return false;
@@ -2670,6 +2692,8 @@ xdp_func_proto(enum bpf_func_id func_id)
 		return &bpf_xdp_event_output_proto;
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
+	case BPF_FUNC_xdp_adjust_head:
+		return &bpf_xdp_adjust_head_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
-- 
cgit v1.2.3


From 71a2f11511b4d1dc8b8e326e10ec6533b534ddf1 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 5 Dec 2016 05:23:20 +0000
Subject: clk: add devm_get_clk_from_child() API

Some driver is using this type of DT bindings for clock (more detail,
see ${LINUX}/Documentation/devicetree/bindings/sound/simple-card.txt).

	sound_soc {
		...
		cpu {
			clocks = <&xxx>;
			...
		};
		codec {
			clocks = <&xxx>;
			...
		};
	};

Current driver in this case uses of_clk_get() for each node, but there
is no devm_of_clk_get() today.
OTOH, the problem of having devm_of_clk_get() is that it encourages the
use of of_clk_get() when clk_get() is more desirable.

Thus, this patch adds new devm_get_clk_from_chile() which explicitly
reads as get a clock from a child node of this device.
By this function, we can also use this type of DT bindings

	sound_soc {
		clocks = <&xxx>, <&xxx>;
		clock-names = "cpu", "codec";
		clock-ranges;
		...
		cpu {
			...
		};
		codec {
			...
		};
	};

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
[sboyd@codeurora.org: Rename subject to clk + add API]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-devres.c | 21 +++++++++++++++++++++
 include/linux/clk.h      | 29 +++++++++++++++++++++++++----
 2 files changed, 46 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
index 8f571548870f..3a218c3a06ae 100644
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -53,3 +53,24 @@ void devm_clk_put(struct device *dev, struct clk *clk)
 	WARN_ON(ret);
 }
 EXPORT_SYMBOL(devm_clk_put);
+
+struct clk *devm_get_clk_from_child(struct device *dev,
+				    struct device_node *np, const char *con_id)
+{
+	struct clk **ptr, *clk;
+
+	ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	clk = of_clk_get_by_name(np, con_id);
+	if (!IS_ERR(clk)) {
+		*ptr = clk;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return clk;
+}
+EXPORT_SYMBOL(devm_get_clk_from_child);
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 123c02788807..e9d36b3e49de 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -17,8 +17,9 @@
 #include <linux/notifier.h>
 
 struct device;
-
 struct clk;
+struct device_node;
+struct of_phandle_args;
 
 /**
  * DOC: clk notifier callback types
@@ -248,6 +249,23 @@ struct clk *clk_get(struct device *dev, const char *id);
  */
 struct clk *devm_clk_get(struct device *dev, const char *id);
 
+/**
+ * devm_get_clk_from_child - lookup and obtain a managed reference to a
+ *			     clock producer from child node.
+ * @dev: device for clock "consumer"
+ * @np: pointer to clock consumer node
+ * @con_id: clock consumer ID
+ *
+ * This function parses the clocks, and uses them to look up the
+ * struct clk from the registered list of clock providers by using
+ * @np and @con_id
+ *
+ * The clock will automatically be freed when the device is unbound
+ * from the bus.
+ */
+struct clk *devm_get_clk_from_child(struct device *dev,
+				    struct device_node *np, const char *con_id);
+
 /**
  * clk_enable - inform the system when the clock source should be running.
  * @clk: clock source
@@ -432,6 +450,12 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id)
 	return NULL;
 }
 
+static inline struct clk *devm_get_clk_from_child(struct device *dev,
+				struct device_node *np, const char *con_id)
+{
+	return NULL;
+}
+
 static inline void clk_put(struct clk *clk) {}
 
 static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
@@ -501,9 +525,6 @@ static inline void clk_disable_unprepare(struct clk *clk)
 	clk_unprepare(clk);
 }
 
-struct device_node;
-struct of_phandle_args;
-
 #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK)
 struct clk *of_clk_get(struct device_node *np, int index);
 struct clk *of_clk_get_by_name(struct device_node *np, const char *name);
-- 
cgit v1.2.3


From 8cf868affdc459beee1a941df0cfaba1673740e3 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 28 Nov 2016 13:03:21 -0500
Subject: tracing: Have the reg function allow to fail

Some tracepoints have a registration function that gets enabled when the
tracepoint is enabled. There may be cases that the registraction function
must fail (for example, can't allocate enough memory). In this case, the
tracepoint should also fail to register, otherwise the user would not know
why the tracepoint is not working.

Cc: David Howells <dhowells@redhat.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/powerpc/include/asm/trace.h                  |  4 ++--
 arch/powerpc/platforms/powernv/opal-tracepoints.c |  6 ++++--
 arch/powerpc/platforms/pseries/lpar.c             |  6 ++++--
 arch/x86/include/asm/trace/exceptions.h           |  2 +-
 arch/x86/include/asm/trace/irq_vectors.h          |  2 +-
 arch/x86/kernel/tracepoint.c                      |  3 ++-
 drivers/i2c/i2c-core.c                            |  3 ++-
 include/linux/tracepoint-defs.h                   |  2 +-
 include/linux/tracepoint.h                        |  2 +-
 include/trace/events/i2c.h                        |  2 +-
 kernel/trace/trace_benchmark.c                    |  3 ++-
 kernel/trace/trace_benchmark.h                    |  2 +-
 kernel/tracepoint.c                               | 12 +++++++++---
 samples/trace_events/trace-events-sample.c        |  3 ++-
 samples/trace_events/trace-events-sample.h        |  2 +-
 15 files changed, 34 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 32e36b16773f..c05cef6ee06c 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -54,7 +54,7 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
 );
 
 #ifdef CONFIG_PPC_PSERIES
-extern void hcall_tracepoint_regfunc(void);
+extern int hcall_tracepoint_regfunc(void);
 extern void hcall_tracepoint_unregfunc(void);
 
 TRACE_EVENT_FN_COND(hcall_entry,
@@ -104,7 +104,7 @@ TRACE_EVENT_FN_COND(hcall_exit,
 #endif
 
 #ifdef CONFIG_PPC_POWERNV
-extern void opal_tracepoint_regfunc(void);
+extern int opal_tracepoint_regfunc(void);
 extern void opal_tracepoint_unregfunc(void);
 
 TRACE_EVENT_FN(opal_entry,
diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c
index 1e496b780efd..3c447002edff 100644
--- a/arch/powerpc/platforms/powernv/opal-tracepoints.c
+++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c
@@ -6,9 +6,10 @@
 #ifdef HAVE_JUMP_LABEL
 struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
 
-void opal_tracepoint_regfunc(void)
+int opal_tracepoint_regfunc(void)
 {
 	static_key_slow_inc(&opal_tracepoint_key);
+	return 0;
 }
 
 void opal_tracepoint_unregfunc(void)
@@ -25,9 +26,10 @@ void opal_tracepoint_unregfunc(void)
 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 extern long opal_tracepoint_refcount;
 
-void opal_tracepoint_regfunc(void)
+int opal_tracepoint_regfunc(void)
 {
 	opal_tracepoint_refcount++;
+	return 0;
 }
 
 void opal_tracepoint_unregfunc(void)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index aa35245d8d6d..c0423ce3955c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -661,9 +661,10 @@ EXPORT_SYMBOL(arch_free_page);
 #ifdef HAVE_JUMP_LABEL
 struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;
 
-void hcall_tracepoint_regfunc(void)
+int hcall_tracepoint_regfunc(void)
 {
 	static_key_slow_inc(&hcall_tracepoint_key);
+	return 0;
 }
 
 void hcall_tracepoint_unregfunc(void)
@@ -680,9 +681,10 @@ void hcall_tracepoint_unregfunc(void)
 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 extern long hcall_tracepoint_refcount;
 
-void hcall_tracepoint_regfunc(void)
+int hcall_tracepoint_regfunc(void)
 {
 	hcall_tracepoint_refcount++;
+	return 0;
 }
 
 void hcall_tracepoint_unregfunc(void)
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
index 2fbc66c7885b..2422b14c50a7 100644
--- a/arch/x86/include/asm/trace/exceptions.h
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -6,7 +6,7 @@
 
 #include <linux/tracepoint.h>
 
-extern void trace_irq_vector_regfunc(void);
+extern int trace_irq_vector_regfunc(void);
 extern void trace_irq_vector_unregfunc(void);
 
 DECLARE_EVENT_CLASS(x86_exceptions,
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 38a09a13a9bc..32dd6a9e343c 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -6,7 +6,7 @@
 
 #include <linux/tracepoint.h>
 
-extern void trace_irq_vector_regfunc(void);
+extern int trace_irq_vector_regfunc(void);
 extern void trace_irq_vector_unregfunc(void);
 
 DECLARE_EVENT_CLASS(x86_irq_vector,
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
index 1c113db9ed57..15515132bf0d 100644
--- a/arch/x86/kernel/tracepoint.c
+++ b/arch/x86/kernel/tracepoint.c
@@ -34,7 +34,7 @@ static void switch_idt(void *arg)
 	local_irq_restore(flags);
 }
 
-void trace_irq_vector_regfunc(void)
+int trace_irq_vector_regfunc(void)
 {
 	mutex_lock(&irq_vector_mutex);
 	if (!trace_irq_vector_refcount) {
@@ -44,6 +44,7 @@ void trace_irq_vector_regfunc(void)
 	}
 	trace_irq_vector_refcount++;
 	mutex_unlock(&irq_vector_mutex);
+	return 0;
 }
 
 void trace_irq_vector_unregfunc(void)
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index b432b64e307a..6a2b995d7fc4 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -77,9 +77,10 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
 static struct static_key i2c_trace_msg = STATIC_KEY_INIT_FALSE;
 static bool is_registered;
 
-void i2c_transfer_trace_reg(void)
+int i2c_transfer_trace_reg(void)
 {
 	static_key_slow_inc(&i2c_trace_msg);
+	return 0;
 }
 
 void i2c_transfer_trace_unreg(void)
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 4ac89acb6136..a03192052066 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -29,7 +29,7 @@ struct tracepoint_func {
 struct tracepoint {
 	const char *name;		/* Tracepoint name */
 	struct static_key key;
-	void (*regfunc)(void);
+	int (*regfunc)(void);
 	void (*unregfunc)(void);
 	struct tracepoint_func __rcu *funcs;
 };
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index be586c632a0c..f72fcfe0e66a 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -81,7 +81,7 @@ static inline void tracepoint_synchronize_unregister(void)
 }
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
-extern void syscall_regfunc(void);
+extern int syscall_regfunc(void);
 extern void syscall_unregfunc(void);
 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
 
diff --git a/include/trace/events/i2c.h b/include/trace/events/i2c.h
index fe17187df65d..4abb8eab34d3 100644
--- a/include/trace/events/i2c.h
+++ b/include/trace/events/i2c.h
@@ -20,7 +20,7 @@
 /*
  * drivers/i2c/i2c-core.c
  */
-extern void i2c_transfer_trace_reg(void);
+extern int i2c_transfer_trace_reg(void);
 extern void i2c_transfer_trace_unreg(void);
 
 /*
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 0f109c4130d3..f76d0416dd83 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -164,11 +164,12 @@ static int benchmark_event_kthread(void *arg)
  * When the benchmark tracepoint is enabled, it calls this
  * function and the thread that calls the tracepoint is created.
  */
-void trace_benchmark_reg(void)
+int trace_benchmark_reg(void)
 {
 	bm_event_thread = kthread_run(benchmark_event_kthread,
 				      NULL, "event_benchmark");
 	WARN_ON(!bm_event_thread);
+	return 0;
 }
 
 /*
diff --git a/kernel/trace/trace_benchmark.h b/kernel/trace/trace_benchmark.h
index 3c1df1df4e29..ebdbfc2f2a64 100644
--- a/kernel/trace/trace_benchmark.h
+++ b/kernel/trace/trace_benchmark.h
@@ -6,7 +6,7 @@
 
 #include <linux/tracepoint.h>
 
-extern void trace_benchmark_reg(void);
+extern int trace_benchmark_reg(void);
 extern void trace_benchmark_unreg(void);
 
 #define BENCHMARK_EVENT_STRLEN		128
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index d0639d917899..1f9a31f934a4 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -194,9 +194,13 @@ static int tracepoint_add_func(struct tracepoint *tp,
 			       struct tracepoint_func *func, int prio)
 {
 	struct tracepoint_func *old, *tp_funcs;
+	int ret;
 
-	if (tp->regfunc && !static_key_enabled(&tp->key))
-		tp->regfunc();
+	if (tp->regfunc && !static_key_enabled(&tp->key)) {
+		ret = tp->regfunc();
+		if (ret < 0)
+			return ret;
+	}
 
 	tp_funcs = rcu_dereference_protected(tp->funcs,
 			lockdep_is_held(&tracepoints_mutex));
@@ -529,7 +533,7 @@ EXPORT_SYMBOL_GPL(for_each_kernel_tracepoint);
 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 static int sys_tracepoint_refcount;
 
-void syscall_regfunc(void)
+int syscall_regfunc(void)
 {
 	struct task_struct *p, *t;
 
@@ -541,6 +545,8 @@ void syscall_regfunc(void)
 		read_unlock(&tasklist_lock);
 	}
 	sys_tracepoint_refcount++;
+
+	return 0;
 }
 
 void syscall_unregfunc(void)
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 880a7d1d27d2..30e282d33d4d 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -79,7 +79,7 @@ static int simple_thread_fn(void *arg)
 
 static DEFINE_MUTEX(thread_mutex);
 
-void foo_bar_reg(void)
+int foo_bar_reg(void)
 {
 	pr_info("Starting thread for foo_bar_fn\n");
 	/*
@@ -90,6 +90,7 @@ void foo_bar_reg(void)
 	mutex_lock(&thread_mutex);
 	simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn");
 	mutex_unlock(&thread_mutex);
+	return 0;
 }
 
 void foo_bar_unreg(void)
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index d6b75bb495b3..76a75ab7a608 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -354,7 +354,7 @@ TRACE_EVENT_CONDITION(foo_bar_with_cond,
 	TP_printk("foo %s %d", __get_str(foo), __entry->bar)
 );
 
-void foo_bar_reg(void);
+int foo_bar_reg(void);
 void foo_bar_unreg(void);
 
 /*
-- 
cgit v1.2.3


From f9d03f96b988002027d4b28ea1b7a24729a4c9b5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Dec 2016 15:20:32 -0700
Subject: block: improve handling of the magic discard payload

Instead of allocating a single unused biovec for discard requests, send
them down without any payload.  Instead we allow the driver to add a
"special" payload using a biovec embedded into struct request (unioned
over other fields never used while in the driver), and overloading
the number of segments for this case.

This has a couple of advantages:

 - we don't have to allocate the bio_vec
 - the amount of special casing for discard requests in the block
   layer is significantly reduced
 - using this same scheme for other request types is trivial,
   which will be important for implementing the new WRITE_ZEROES
   op on devices where it actually requires a payload (e.g. SCSI)
 - we can get rid of playing games with the request length, as
   we'll never touch it and completions will work just fine
 - it will allow us to support ranged discard operations in the
   future by merging non-contiguous discard bios into a single
   request
 - last but not least it removes a lot of code

This patch is the common base for my WIP series for ranges discards and to
remove discard_zeroes_data in favor of always using REQ_OP_WRITE_ZEROES,
so it would be good to get it in quickly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c                | 10 +--------
 block/blk-core.c           | 34 ++---------------------------
 block/blk-lib.c            |  2 +-
 block/blk-merge.c          | 53 +++++++++++++++-------------------------------
 drivers/nvme/host/core.c   | 17 ++++-----------
 drivers/nvme/host/nvme.h   |  6 ++++--
 drivers/nvme/host/pci.c    | 27 +++++++++++------------
 drivers/nvme/host/rdma.c   | 13 +++++-------
 drivers/nvme/target/loop.c |  4 ++--
 drivers/scsi/scsi_lib.c    |  6 +++---
 drivers/scsi/sd.c          | 24 ++++++++-------------
 include/linux/bio.h        |  3 ++-
 include/linux/blkdev.h     | 15 ++++++++++---
 13 files changed, 76 insertions(+), 138 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 83db1f37fd0b..2b375020fc49 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1840,15 +1840,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
 	BUG_ON(sectors <= 0);
 	BUG_ON(sectors >= bio_sectors(bio));
 
-	/*
-	 * Discards need a mutable bio_vec to accommodate the payload
-	 * required by the DSM TRIM and UNMAP commands.
-	 */
-	if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
-		split = bio_clone_bioset(bio, gfp, bs);
-	else
-		split = bio_clone_fast(bio, gfp, bs);
-
+	split = bio_clone_fast(bio, gfp, bs);
 	if (!split)
 		return NULL;
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 4b7ec5958055..bd642a43b98b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1475,38 +1475,6 @@ void blk_put_request(struct request *req)
 }
 EXPORT_SYMBOL(blk_put_request);
 
-/**
- * blk_add_request_payload - add a payload to a request
- * @rq: request to update
- * @page: page backing the payload
- * @offset: offset in page
- * @len: length of the payload.
- *
- * This allows to later add a payload to an already submitted request by
- * a block driver.  The driver needs to take care of freeing the payload
- * itself.
- *
- * Note that this is a quite horrible hack and nothing but handling of
- * discard requests should ever use it.
- */
-void blk_add_request_payload(struct request *rq, struct page *page,
-		int offset, unsigned int len)
-{
-	struct bio *bio = rq->bio;
-
-	bio->bi_io_vec->bv_page = page;
-	bio->bi_io_vec->bv_offset = offset;
-	bio->bi_io_vec->bv_len = len;
-
-	bio->bi_iter.bi_size = len;
-	bio->bi_vcnt = 1;
-	bio->bi_phys_segments = 1;
-
-	rq->__data_len = rq->resid_len = len;
-	rq->nr_phys_segments = 1;
-}
-EXPORT_SYMBOL_GPL(blk_add_request_payload);
-
 bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 			    struct bio *bio)
 {
@@ -2642,6 +2610,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		return false;
 	}
 
+	WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD);
+
 	req->__data_len -= total_bytes;
 
 	/* update sector only for requests with clear definition of sector */
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 510a6fb15318..ed89c8f4b2a0 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -80,7 +80,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 			req_sects = end_sect - sector;
 		}
 
-		bio = next_bio(bio, 1, gfp_mask);
+		bio = next_bio(bio, 0, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio->bi_bdev = bdev;
 		bio_set_op_attrs(bio, op, 0);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1002afdfee99..182398cb1524 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -241,18 +241,13 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 	if (!bio)
 		return 0;
 
-	/*
-	 * This should probably be returning 0, but blk_add_request_payload()
-	 * (Christoph!!!!)
-	 */
 	switch (bio_op(bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
-	case REQ_OP_WRITE_SAME:
 	case REQ_OP_WRITE_ZEROES:
+		return 0;
+	case REQ_OP_WRITE_SAME:
 		return 1;
-	default:
-		break;
 	}
 
 	fbio = bio;
@@ -410,39 +405,21 @@ new_segment:
 	*bvprv = *bvec;
 }
 
+static inline int __blk_bvec_map_sg(struct request_queue *q, struct bio_vec bv,
+		struct scatterlist *sglist, struct scatterlist **sg)
+{
+	*sg = sglist;
+	sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
+	return 1;
+}
+
 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
 			     struct scatterlist *sglist,
 			     struct scatterlist **sg)
 {
 	struct bio_vec bvec, bvprv = { NULL };
 	struct bvec_iter iter;
-	int nsegs, cluster;
-
-	nsegs = 0;
-	cluster = blk_queue_cluster(q);
-
-	switch (bio_op(bio)) {
-	case REQ_OP_DISCARD:
-	case REQ_OP_SECURE_ERASE:
-	case REQ_OP_WRITE_ZEROES:
-		/*
-		 * This is a hack - drivers should be neither modifying the
-		 * biovec, nor relying on bi_vcnt - but because of
-		 * blk_add_request_payload(), a discard bio may or may not have
-		 * a payload we need to set up here (thank you Christoph) and
-		 * bi_vcnt is really the only way of telling if we need to.
-		 */
-		if (!bio->bi_vcnt)
-			return 0;
-		/* Fall through */
-	case REQ_OP_WRITE_SAME:
-		*sg = sglist;
-		bvec = bio_iovec(bio);
-		sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
-		return 1;
-	default:
-		break;
-	}
+	int cluster = blk_queue_cluster(q), nsegs = 0;
 
 	for_each_bio(bio)
 		bio_for_each_segment(bvec, bio, iter)
@@ -462,7 +439,11 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 	struct scatterlist *sg = NULL;
 	int nsegs = 0;
 
-	if (rq->bio)
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		nsegs = __blk_bvec_map_sg(q, rq->special_vec, sglist, &sg);
+	else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME)
+		nsegs = __blk_bvec_map_sg(q, bio_iovec(rq->bio), sglist, &sg);
+	else if (rq->bio)
 		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
 
 	if (unlikely(rq->rq_flags & RQF_COPY_USER) &&
@@ -495,7 +476,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 	 * Something must have been wrong if the figured number of
 	 * segment is bigger than number of req's physical segments
 	 */
-	WARN_ON(nsegs > rq->nr_phys_segments);
+	WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
 
 	return nsegs;
 }
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1b48514fbe99..3b1d6478dcfb 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -239,8 +239,6 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmnd)
 {
 	struct nvme_dsm_range *range;
-	struct page *page;
-	int offset;
 	unsigned int nr_bytes = blk_rq_bytes(req);
 
 	range = kmalloc(sizeof(*range), GFP_ATOMIC);
@@ -257,17 +255,10 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 	cmnd->dsm.nr = 0;
 	cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
 
-	req->completion_data = range;
-	page = virt_to_page(range);
-	offset = offset_in_page(range);
-	blk_add_request_payload(req, page, offset, sizeof(*range));
-
-	/*
-	 * we set __data_len back to the size of the area to be discarded
-	 * on disk. This allows us to report completion on the full amount
-	 * of blocks described by the request.
-	 */
-	req->__data_len = nr_bytes;
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range);
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
 	return BLK_MQ_RQ_QUEUE_OK;
 }
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a3d6ffd874af..bd5321441d12 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -236,8 +236,10 @@ static inline unsigned nvme_map_len(struct request *rq)
 
 static inline void nvme_cleanup_cmd(struct request *req)
 {
-	if (req_op(req) == REQ_OP_DISCARD)
-		kfree(req->completion_data);
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+		kfree(page_address(req->special_vec.bv_page) +
+		      req->special_vec.bv_offset);
+	}
 }
 
 static inline int nvme_error_status(u16 status)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 82b9b3f1f21d..717d6ea47ee4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -302,14 +302,14 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
 static __le64 **iod_list(struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	return (__le64 **)(iod->sg + req->nr_phys_segments);
+	return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
 static int nvme_init_iod(struct request *rq, unsigned size,
 		struct nvme_dev *dev)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
-	int nseg = rq->nr_phys_segments;
+	int nseg = blk_rq_nr_phys_segments(rq);
 
 	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
 		iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
@@ -339,8 +339,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
 	__le64 **list = iod_list(req);
 	dma_addr_t prp_dma = iod->first_dma;
 
-	nvme_cleanup_cmd(req);
-
 	if (iod->npages == 0)
 		dma_pool_free(dev->prp_small_pool, list[0], prp_dma);
 	for (i = 0; i < iod->npages; i++) {
@@ -510,7 +508,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 			DMA_TO_DEVICE : DMA_FROM_DEVICE;
 	int ret = BLK_MQ_RQ_QUEUE_ERROR;
 
-	sg_init_table(iod->sg, req->nr_phys_segments);
+	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
 	iod->nents = blk_rq_map_sg(q, req, iod->sg);
 	if (!iod->nents)
 		goto out;
@@ -566,6 +564,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 		}
 	}
 
+	nvme_cleanup_cmd(req);
 	nvme_free_iod(dev, req);
 }
 
@@ -596,20 +595,20 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 		}
 	}
 
-	map_len = nvme_map_len(req);
-	ret = nvme_init_iod(req, map_len, dev);
+	ret = nvme_setup_cmd(ns, req, &cmnd);
 	if (ret != BLK_MQ_RQ_QUEUE_OK)
 		return ret;
 
-	ret = nvme_setup_cmd(ns, req, &cmnd);
+	map_len = nvme_map_len(req);
+	ret = nvme_init_iod(req, map_len, dev);
 	if (ret != BLK_MQ_RQ_QUEUE_OK)
-		goto out;
+		goto out_free_cmd;
 
-	if (req->nr_phys_segments)
+	if (blk_rq_nr_phys_segments(req))
 		ret = nvme_map_data(dev, req, map_len, &cmnd);
 
 	if (ret != BLK_MQ_RQ_QUEUE_OK)
-		goto out;
+		goto out_cleanup_iod;
 
 	blk_mq_start_request(req);
 
@@ -620,14 +619,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 		else
 			ret = BLK_MQ_RQ_QUEUE_ERROR;
 		spin_unlock_irq(&nvmeq->q_lock);
-		goto out;
+		goto out_cleanup_iod;
 	}
 	__nvme_submit_cmd(nvmeq, &cmnd);
 	nvme_process_cq(nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
 	return BLK_MQ_RQ_QUEUE_OK;
-out:
+out_cleanup_iod:
 	nvme_free_iod(dev, req);
+out_free_cmd:
+	nvme_cleanup_cmd(req);
 	return ret;
 }
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index b037d0cb2a7e..251101bf982f 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -952,8 +952,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_device *dev = queue->device;
 	struct ib_device *ibdev = dev->dev;
-	int nents, count;
-	int ret;
+	int count, ret;
 
 	req->num_sge = 1;
 	req->inline_data = false;
@@ -965,16 +964,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 		return nvme_rdma_set_sg_null(c);
 
 	req->sg_table.sgl = req->first_sgl;
-	ret = sg_alloc_table_chained(&req->sg_table, rq->nr_phys_segments,
-				req->sg_table.sgl);
+	ret = sg_alloc_table_chained(&req->sg_table,
+			blk_rq_nr_phys_segments(rq), req->sg_table.sgl);
 	if (ret)
 		return -ENOMEM;
 
-	nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
-	BUG_ON(nents > rq->nr_phys_segments);
-	req->nents = nents;
+	req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
 
-	count = ib_dma_map_sg(ibdev, req->sg_table.sgl, nents,
+	count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
 		    rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 	if (unlikely(count <= 0)) {
 		sg_free_table_chained(&req->sg_table, true);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 57ded6b3ed8a..9aaa70071ae5 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -185,13 +185,13 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (blk_rq_bytes(req)) {
 		iod->sg_table.sgl = iod->first_sgl;
 		ret = sg_alloc_table_chained(&iod->sg_table,
-			req->nr_phys_segments, iod->sg_table.sgl);
+				blk_rq_nr_phys_segments(req),
+				iod->sg_table.sgl);
 		if (ret)
 			return BLK_MQ_RQ_QUEUE_BUSY;
 
 		iod->req.sg = iod->sg_table.sgl;
 		iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
-		BUG_ON(iod->req.sg_cnt > req->nr_phys_segments);
 	}
 
 	blk_mq_start_request(req);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 47a5c8783b89..9a8ccff1121f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1007,8 +1007,8 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
 	/*
 	 * If sg table allocation fails, requeue request later.
 	 */
-	if (unlikely(sg_alloc_table_chained(&sdb->table, req->nr_phys_segments,
-					sdb->table.sgl)))
+	if (unlikely(sg_alloc_table_chained(&sdb->table,
+			blk_rq_nr_phys_segments(req), sdb->table.sgl)))
 		return BLKPREP_DEFER;
 
 	/* 
@@ -1040,7 +1040,7 @@ int scsi_init_io(struct scsi_cmnd *cmd)
 	bool is_mq = (rq->mq_ctx != NULL);
 	int error;
 
-	BUG_ON(!rq->nr_phys_segments);
+	BUG_ON(!blk_rq_nr_phys_segments(rq));
 
 	error = scsi_init_sgtable(rq, &cmd->sdb);
 	if (error)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 65738b0aad36..079c2d9759fb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -716,7 +716,6 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	sector_t sector = blk_rq_pos(rq);
 	unsigned int nr_sectors = blk_rq_sectors(rq);
-	unsigned int nr_bytes = blk_rq_bytes(rq);
 	unsigned int len;
 	int ret;
 	char *buf;
@@ -772,24 +771,19 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
 		goto out;
 	}
 
-	rq->completion_data = page;
 	rq->timeout = SD_TIMEOUT;
 
 	cmd->transfersize = len;
 	cmd->allowed = SD_MAX_RETRIES;
 
-	/*
-	 * Initially __data_len is set to the amount of data that needs to be
-	 * transferred to the target. This amount depends on whether WRITE SAME
-	 * or UNMAP is being used. After the scatterlist has been mapped by
-	 * scsi_init_io() we set __data_len to the size of the area to be
-	 * discarded on disk. This allows us to report completion on the full
-	 * amount of blocks described by the request.
-	 */
-	blk_add_request_payload(rq, page, 0, len);
-	ret = scsi_init_io(cmd);
-	rq->__data_len = nr_bytes;
+	rq->special_vec.bv_page = page;
+	rq->special_vec.bv_offset = 0;
+	rq->special_vec.bv_len = len;
+
+	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
+	rq->resid_len = len;
 
+	ret = scsi_init_io(cmd);
 out:
 	if (ret != BLKPREP_OK)
 		__free_page(page);
@@ -1182,8 +1176,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 {
 	struct request *rq = SCpnt->request;
 
-	if (req_op(rq) == REQ_OP_DISCARD)
-		__free_page(rq->completion_data);
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		__free_page(rq->special_vec.bv_page);
 
 	if (SCpnt->cmnd != rq->cmd) {
 		mempool_free(SCpnt->cmnd, sd_cdb_pool);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b15323934a29..7cf8a6c70a3f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -197,8 +197,9 @@ static inline unsigned bio_segments(struct bio *bio)
 	switch (bio_op(bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
-	case REQ_OP_WRITE_SAME:
 	case REQ_OP_WRITE_ZEROES:
+		return 0;
+	case REQ_OP_WRITE_SAME:
 		return 1;
 	default:
 		break;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ebeef2b79c5a..c5393766909d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -120,10 +120,13 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_HASHED		((__force req_flags_t)(1 << 16))
 /* IO stats tracking on */
 #define RQF_STATS		((__force req_flags_t)(1 << 17))
+/* Look at ->special_vec for the actual data payload instead of the
+   bio chain. */
+#define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ)
+	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
 
 #define BLK_MAX_CDB	16
 
@@ -175,6 +178,7 @@ struct request {
 	 */
 	union {
 		struct rb_node rb_node;	/* sort/lookup */
+		struct bio_vec special_vec;
 		void *completion_data;
 	};
 
@@ -909,8 +913,6 @@ extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
-extern void blk_add_request_payload(struct request *rq, struct page *page,
-		int offset, unsigned int len);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     struct bio_set *bs, gfp_t gfp_mask,
@@ -1153,6 +1155,13 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
+static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
+{
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		return 1;
+	return rq->nr_phys_segments;
+}
+
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
-- 
cgit v1.2.3


From fd4a0edf2a3d781c6ae07d2810776ce22302ee1c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 9 Dec 2016 16:45:04 +0100
Subject: vfs: replace calling i_op->readlink with vfs_readlink()

Also check d_is_symlink() in callers instead of inode->i_op->readlink
because following patches will allow NULL ->readlink for symlinks.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/namei.c         | 21 +++++++++++++++++++++
 fs/nfsd/nfs4xdr.c  |  8 ++++----
 fs/nfsd/vfs.c      |  6 ++----
 fs/stat.c          |  8 +++++---
 fs/xfs/xfs_ioctl.c |  4 ++--
 include/linux/fs.h |  1 +
 6 files changed, 35 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 5b4eed221530..12a4159de72a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4668,6 +4668,27 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 }
 EXPORT_SYMBOL(generic_readlink);
 
+/**
+ * vfs_readlink - copy symlink body into userspace buffer
+ * @dentry: dentry on which to get symbolic link
+ * @buffer: user memory pointer
+ * @buflen: size of buffer
+ *
+ * Does not touch atime.  That's up to the caller if necessary
+ *
+ * Does not call security hook.
+ */
+int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if (!inode->i_op->readlink)
+		return -EINVAL;
+
+	return inode->i_op->readlink(dentry, buffer, buflen);
+}
+EXPORT_SYMBOL(vfs_readlink);
+
 /**
  * vfs_get_link - get symlink body
  * @dentry: dentry on which to get symbolic link
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c2d2895a1ec1..645e1e3c3110 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3576,10 +3576,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 	if (!p)
 		return nfserr_resource;
 	/*
-	 * XXX: By default, the ->readlink() VFS op will truncate symlinks
-	 * if they would overflow the buffer.  Is this kosher in NFSv4?  If
-	 * not, one easy fix is: if ->readlink() precisely fills the buffer,
-	 * assume that truncation occurred, and return NFS4ERR_RESOURCE.
+	 * XXX: By default, vfs_readlink() will truncate symlinks if they
+	 * would overflow the buffer.  Is this kosher in NFSv4?  If not, one
+	 * easy fix is: if vfs_readlink() precisely fills the buffer, assume
+	 * that truncation occurred, and return NFS4ERR_RESOURCE.
 	 */
 	nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp,
 						(char *)p, &maxcount);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8ca642fe9b21..b854f02c1c36 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1451,7 +1451,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32
 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 {
-	struct inode	*inode;
 	mm_segment_t	oldfs;
 	__be32		err;
 	int		host_err;
@@ -1463,10 +1462,9 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 
 	path.mnt = fhp->fh_export->ex_path.mnt;
 	path.dentry = fhp->fh_dentry;
-	inode = d_inode(path.dentry);
 
 	err = nfserr_inval;
-	if (!inode->i_op->readlink)
+	if (!d_is_symlink(path.dentry))
 		goto out;
 
 	touch_atime(&path);
@@ -1475,7 +1473,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 	 */
 
 	oldfs = get_fs(); set_fs(KERNEL_DS);
-	host_err = inode->i_op->readlink(path.dentry, (char __user *)buf, *lenp);
+	host_err = vfs_readlink(path.dentry, (char __user *)buf, *lenp);
 	set_fs(oldfs);
 
 	if (host_err < 0)
diff --git a/fs/stat.c b/fs/stat.c
index bc045c7994e1..0b210c3ead5c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -329,12 +329,14 @@ retry:
 		struct inode *inode = d_backing_inode(path.dentry);
 
 		error = empty ? -ENOENT : -EINVAL;
-		if (inode->i_op->readlink) {
+		/*
+		 * AFS mountpoints allow readlink(2) but are not symlinks
+		 */
+		if (d_is_symlink(path.dentry) || inode->i_op->readlink) {
 			error = security_inode_readlink(path.dentry);
 			if (!error) {
 				touch_atime(&path);
-				error = inode->i_op->readlink(path.dentry,
-							      buf, bufsiz);
+				error = vfs_readlink(path.dentry, buf, bufsiz);
 			}
 		}
 		path_put(&path);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c245bed3249b..9b12f7c993e9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -287,7 +287,7 @@ xfs_readlink_by_handle(
 		return PTR_ERR(dentry);
 
 	/* Restrict this handle operation to symlinks only. */
-	if (!d_inode(dentry)->i_op->readlink) {
+	if (!d_is_symlink(dentry)) {
 		error = -EINVAL;
 		goto out_dput;
 	}
@@ -297,7 +297,7 @@ xfs_readlink_by_handle(
 		goto out_dput;
 	}
 
-	error = d_inode(dentry)->i_op->readlink(dentry, hreq->ohandle, olen);
+	error = vfs_readlink(dentry, hreq->ohandle, olen);
 
  out_dput:
 	dput(dentry);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dc0478c07b2a..eba20d1c068d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2935,6 +2935,7 @@ extern int vfs_lstat(const char __user *, struct kstat *);
 extern int vfs_fstat(unsigned int, struct kstat *);
 extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
 extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
+extern int vfs_readlink(struct dentry *, char __user *, int);
 
 extern int __generic_block_fiemap(struct inode *inode,
 				  struct fiemap_extent_info *fieinfo,
-- 
cgit v1.2.3


From 76fca90e9f3abc82114d9d02d8e14e0324a18ca2 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 9 Dec 2016 16:45:04 +0100
Subject: vfs: default to generic_readlink()

If i_op->readlink is NULL, but i_op->get_link is set then vfs_readlink()
defaults to calling generic_readlink().

The IOP_DEFAULT_READLINK flag indicates that the above conditions are met
and the default action can be taken.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/porting |  4 ++++
 Documentation/filesystems/vfs.txt |  9 ++++++---
 fs/namei.c                        | 15 ++++++++++++---
 include/linux/fs.h                |  1 +
 4 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index bdd025ceb763..95280079c0b3 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -596,3 +596,7 @@ in your dentry operations instead.
 [mandatory]
 	->rename() has an added flags argument.  Any flags not handled by the
         filesystem should result in EINVAL being returned.
+--
+[recommended]
+	->readlink is optional for symlinks.  Don't set, unless filesystem needs
+	to fake something for readlink(2).
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index b5039a00caaf..038241123ca5 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -451,9 +451,6 @@ otherwise noted.
 	exist; this is checked by the VFS.  Unlike plain rename,
 	source and target may be of different type.
 
-  readlink: called by the readlink(2) system call. Only required if
-	you want to support reading symbolic links
-
   get_link: called by the VFS to follow a symbolic link to the
 	inode it points to.  Only required if you want to support
 	symbolic links.  This method returns the symlink body
@@ -468,6 +465,12 @@ otherwise noted.
 	argument.  If request can't be handled without leaving RCU mode,
 	have it return ERR_PTR(-ECHILD).
 
+  readlink: this is now just an override for use by readlink(2) for the
+	cases when ->get_link uses nd_jump_link() or object is not in
+	fact a symlink.  Normally filesystems should only implement
+	->get_link for symlinks and readlink(2) will automatically use
+	that.
+
   permission: called by the VFS to check for access rights on a POSIX-like
   	filesystem.
 
diff --git a/fs/namei.c b/fs/namei.c
index 12a4159de72a..b87465d67c60 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4682,10 +4682,19 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
 	struct inode *inode = d_inode(dentry);
 
-	if (!inode->i_op->readlink)
-		return -EINVAL;
+	if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
+		if (unlikely(inode->i_op->readlink))
+			return inode->i_op->readlink(dentry, buffer, buflen);
+
+		if (!d_is_symlink(dentry))
+			return -EINVAL;
+
+		spin_lock(&inode->i_lock);
+		inode->i_opflags |= IOP_DEFAULT_READLINK;
+		spin_unlock(&inode->i_lock);
+	}
 
-	return inode->i_op->readlink(dentry, buffer, buflen);
+	return generic_readlink(dentry, buffer, buflen);
 }
 EXPORT_SYMBOL(vfs_readlink);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index eba20d1c068d..f6c206eae6ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -595,6 +595,7 @@ is_uncached_acl(struct posix_acl *acl)
 #define IOP_LOOKUP	0x0002
 #define IOP_NOFOLLOW	0x0004
 #define IOP_XATTR	0x0008
+#define IOP_DEFAULT_READLINK	0x0010
 
 /*
  * Keep mostly read-only and often accessed (especially for
-- 
cgit v1.2.3


From d16744ec8ad011793144bb932ce822cc0c1733cb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 9 Dec 2016 16:45:04 +0100
Subject: vfs: make generic_readlink() static

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/namei.c         | 4 ++--
 include/linux/fs.h | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index c9baf3b3ffd7..c248a9e1edd2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4650,7 +4650,8 @@ out:
  * have ->get_link() not calling nd_jump_link().  Using (or not using) it
  * for any given inode is up to filesystem.
  */
-int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+static int generic_readlink(struct dentry *dentry, char __user *buffer,
+			    int buflen)
 {
 	DEFINE_DELAYED_CALL(done);
 	struct inode *inode = d_inode(dentry);
@@ -4666,7 +4667,6 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 	do_delayed_call(&done);
 	return res;
 }
-EXPORT_SYMBOL(generic_readlink);
 
 /**
  * vfs_readlink - copy symlink body into userspace buffer
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f6c206eae6ac..e343d784651a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2915,7 +2915,6 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len,
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
-extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 int vfs_getattr_nosec(struct path *path, struct kstat *stat);
 extern int vfs_getattr(struct path *, struct kstat *);
-- 
cgit v1.2.3


From ae911c5e796d51cb2d1ed3a55e73b9cc88d176cf Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 8 Dec 2016 13:19:30 -0700
Subject: blk-mq: add blk_mq_start_stopped_hw_queue()

We have a variant for all hardware queues, but not one for a single
hardware queue.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 block/blk-mq.c         | 19 ++++++++++++-------
 include/linux/blk-mq.h |  1 +
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 90db5b490df9..c993476b630f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1064,18 +1064,23 @@ void blk_mq_start_hw_queues(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_start_hw_queues);
 
+void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
+{
+	if (!blk_mq_hctx_stopped(hctx))
+		return;
+
+	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+	blk_mq_run_hw_queue(hctx, async);
+}
+EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
+
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	queue_for_each_hw_ctx(q, hctx, i) {
-		if (!blk_mq_hctx_stopped(hctx))
-			continue;
-
-		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
-		blk_mq_run_hw_queue(hctx, async);
-	}
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_start_stopped_hw_queue(hctx, async);
 }
 EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 35a0af5ede6d..87e404aae267 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -231,6 +231,7 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
+void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
-- 
cgit v1.2.3


From 70b3ea056f3074be6d9256c312b64c0d90a4a683 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 7 Dec 2016 08:43:31 -0700
Subject: elevator: make the rqhash helpers exported

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 block/elevator.c         | 8 ++++----
 include/linux/elevator.h | 5 +++++
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index a18a5db274e4..40f0c04e5ad3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -248,13 +248,13 @@ static inline void __elv_rqhash_del(struct request *rq)
 	rq->rq_flags &= ~RQF_HASHED;
 }
 
-static void elv_rqhash_del(struct request_queue *q, struct request *rq)
+void elv_rqhash_del(struct request_queue *q, struct request *rq)
 {
 	if (ELV_ON_HASH(rq))
 		__elv_rqhash_del(rq);
 }
 
-static void elv_rqhash_add(struct request_queue *q, struct request *rq)
+void elv_rqhash_add(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
@@ -263,13 +263,13 @@ static void elv_rqhash_add(struct request_queue *q, struct request *rq)
 	rq->rq_flags |= RQF_HASHED;
 }
 
-static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
+void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
 {
 	__elv_rqhash_del(rq);
 	elv_rqhash_add(q, rq);
 }
 
-static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
+struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
 {
 	struct elevator_queue *e = q->elevator;
 	struct hlist_node *next;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index f219c9aed360..b276e9ef0e0b 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -108,6 +108,11 @@ struct elevator_type
 
 #define ELV_HASH_BITS 6
 
+void elv_rqhash_del(struct request_queue *q, struct request *rq);
+void elv_rqhash_add(struct request_queue *q, struct request *rq);
+void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
+struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
+
 /*
  * each queue has an elevator_queue associated with it
  */
-- 
cgit v1.2.3


From a6ab53742a52786ca594438eff2c80c3a242aaf3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Mon, 14 Nov 2016 15:53:05 -0800
Subject: configfs: Minimize #include directives

Only include the header files that are needed by configfs.h itself.
Add #include <linux/stat.h>.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
---
 include/linux/configfs.h | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index d9d6a9d77489..9fc9843c0300 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -35,14 +35,11 @@
 #ifndef _CONFIGFS_H_
 #define _CONFIGFS_H_
 
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/kref.h>
-#include <linux/mutex.h>
-#include <linux/err.h>
-
-#include <linux/atomic.h>
+#include <linux/stat.h>   /* S_IRUGO */
+#include <linux/types.h>  /* ssize_t */
+#include <linux/list.h>   /* struct list_head */
+#include <linux/kref.h>   /* struct kref */
+#include <linux/mutex.h>  /* struct mutex */
 
 #define CONFIGFS_ITEM_NAME_LEN	20
 
-- 
cgit v1.2.3


From 876bec6f9bbfcb394916d17e35226b086c04dc45 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 9 Dec 2016 16:18:30 -0800
Subject: vfs: refactor clone/dedupe_file_range common functions

Hoist both the XFS reflink inode state and preparation code and the XFS
file blocks compare functions into the VFS so that ocfs2 can take
advantage of it for reflink and dedupe.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/read_write.c      | 204 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_reflink.c | 213 +++------------------------------------------------
 include/linux/fs.h   |   6 ++
 3 files changed, 219 insertions(+), 204 deletions(-)

(limited to 'include/linux')

diff --git a/fs/read_write.c b/fs/read_write.c
index 6674a4b83c54..dbf3f7ffdf3f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1667,6 +1667,114 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
 	return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
 }
 
+/*
+ * Check that the two inodes are eligible for cloning, the ranges make
+ * sense, and then flush all dirty data.  Caller must ensure that the
+ * inodes have been locked against any other modifications.
+ */
+int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
+			       struct inode *inode_out, loff_t pos_out,
+			       u64 *len, bool is_dedupe)
+{
+	loff_t bs = inode_out->i_sb->s_blocksize;
+	loff_t blen;
+	loff_t isize;
+	bool same_inode = (inode_in == inode_out);
+	int ret;
+
+	/* Don't touch certain kinds of inodes */
+	if (IS_IMMUTABLE(inode_out))
+		return -EPERM;
+
+	if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+		return -ETXTBSY;
+
+	/* Don't reflink dirs, pipes, sockets... */
+	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+		return -EISDIR;
+	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+		return -EINVAL;
+
+	/* Are we going all the way to the end? */
+	isize = i_size_read(inode_in);
+	if (isize == 0) {
+		*len = 0;
+		return 0;
+	}
+
+	/* Zero length dedupe exits immediately; reflink goes to EOF. */
+	if (*len == 0) {
+		if (is_dedupe) {
+			*len = 0;
+			return 0;
+		}
+		*len = isize - pos_in;
+	}
+
+	/* Ensure offsets don't wrap and the input is inside i_size */
+	if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
+	    pos_in + *len > isize)
+		return -EINVAL;
+
+	/* Don't allow dedupe past EOF in the dest file */
+	if (is_dedupe) {
+		loff_t	disize;
+
+		disize = i_size_read(inode_out);
+		if (pos_out >= disize || pos_out + *len > disize)
+			return -EINVAL;
+	}
+
+	/* If we're linking to EOF, continue to the block boundary. */
+	if (pos_in + *len == isize)
+		blen = ALIGN(isize, bs) - pos_in;
+	else
+		blen = *len;
+
+	/* Only reflink if we're aligned to block boundaries */
+	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
+	    !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
+		return -EINVAL;
+
+	/* Don't allow overlapped reflink within the same file */
+	if (same_inode) {
+		if (pos_out + blen > pos_in && pos_out < pos_in + blen)
+			return -EINVAL;
+	}
+
+	/* Wait for the completion of any pending IOs on both files */
+	inode_dio_wait(inode_in);
+	if (!same_inode)
+		inode_dio_wait(inode_out);
+
+	ret = filemap_write_and_wait_range(inode_in->i_mapping,
+			pos_in, pos_in + *len - 1);
+	if (ret)
+		return ret;
+
+	ret = filemap_write_and_wait_range(inode_out->i_mapping,
+			pos_out, pos_out + *len - 1);
+	if (ret)
+		return ret;
+
+	/*
+	 * Check that the extents are the same.
+	 */
+	if (is_dedupe) {
+		bool		is_same = false;
+
+		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
+				inode_out, pos_out, *len, &is_same);
+		if (ret)
+			return ret;
+		if (!is_same)
+			return -EBADE;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
+
 int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 		struct file *file_out, loff_t pos_out, u64 len)
 {
@@ -1718,6 +1826,102 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 }
 EXPORT_SYMBOL(vfs_clone_file_range);
 
+/*
+ * Read a page's worth of file data into the page cache.  Return the page
+ * locked.
+ */
+static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+{
+	struct address_space *mapping;
+	struct page *page;
+	pgoff_t n;
+
+	n = offset >> PAGE_SHIFT;
+	mapping = inode->i_mapping;
+	page = read_mapping_page(mapping, n, NULL);
+	if (IS_ERR(page))
+		return page;
+	if (!PageUptodate(page)) {
+		put_page(page);
+		return ERR_PTR(-EIO);
+	}
+	lock_page(page);
+	return page;
+}
+
+/*
+ * Compare extents of two files to see if they are the same.
+ * Caller must have locked both inodes to prevent write races.
+ */
+int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+				  struct inode *dest, loff_t destoff,
+				  loff_t len, bool *is_same)
+{
+	loff_t src_poff;
+	loff_t dest_poff;
+	void *src_addr;
+	void *dest_addr;
+	struct page *src_page;
+	struct page *dest_page;
+	loff_t cmp_len;
+	bool same;
+	int error;
+
+	error = -EINVAL;
+	same = true;
+	while (len) {
+		src_poff = srcoff & (PAGE_SIZE - 1);
+		dest_poff = destoff & (PAGE_SIZE - 1);
+		cmp_len = min(PAGE_SIZE - src_poff,
+			      PAGE_SIZE - dest_poff);
+		cmp_len = min(cmp_len, len);
+		if (cmp_len <= 0)
+			goto out_error;
+
+		src_page = vfs_dedupe_get_page(src, srcoff);
+		if (IS_ERR(src_page)) {
+			error = PTR_ERR(src_page);
+			goto out_error;
+		}
+		dest_page = vfs_dedupe_get_page(dest, destoff);
+		if (IS_ERR(dest_page)) {
+			error = PTR_ERR(dest_page);
+			unlock_page(src_page);
+			put_page(src_page);
+			goto out_error;
+		}
+		src_addr = kmap_atomic(src_page);
+		dest_addr = kmap_atomic(dest_page);
+
+		flush_dcache_page(src_page);
+		flush_dcache_page(dest_page);
+
+		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+			same = false;
+
+		kunmap_atomic(dest_addr);
+		kunmap_atomic(src_addr);
+		unlock_page(dest_page);
+		unlock_page(src_page);
+		put_page(dest_page);
+		put_page(src_page);
+
+		if (!same)
+			break;
+
+		srcoff += cmp_len;
+		destoff += cmp_len;
+		len -= cmp_len;
+	}
+
+	*is_same = same;
+	return 0;
+
+out_error:
+	return error;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
+
 int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 {
 	struct file_dedupe_range_info *info;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index a279b4e7f5fe..95d6828967f0 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1164,111 +1164,6 @@ err:
 	return error;
 }
 
-/*
- * Read a page's worth of file data into the page cache.  Return the page
- * locked.
- */
-static struct page *
-xfs_get_page(
-	struct inode	*inode,
-	xfs_off_t	offset)
-{
-	struct address_space	*mapping;
-	struct page		*page;
-	pgoff_t			n;
-
-	n = offset >> PAGE_SHIFT;
-	mapping = inode->i_mapping;
-	page = read_mapping_page(mapping, n, NULL);
-	if (IS_ERR(page))
-		return page;
-	if (!PageUptodate(page)) {
-		put_page(page);
-		return ERR_PTR(-EIO);
-	}
-	lock_page(page);
-	return page;
-}
-
-/*
- * Compare extents of two files to see if they are the same.
- */
-static int
-xfs_compare_extents(
-	struct inode	*src,
-	xfs_off_t	srcoff,
-	struct inode	*dest,
-	xfs_off_t	destoff,
-	xfs_off_t	len,
-	bool		*is_same)
-{
-	xfs_off_t	src_poff;
-	xfs_off_t	dest_poff;
-	void		*src_addr;
-	void		*dest_addr;
-	struct page	*src_page;
-	struct page	*dest_page;
-	xfs_off_t	cmp_len;
-	bool		same;
-	int		error;
-
-	error = -EINVAL;
-	same = true;
-	while (len) {
-		src_poff = srcoff & (PAGE_SIZE - 1);
-		dest_poff = destoff & (PAGE_SIZE - 1);
-		cmp_len = min(PAGE_SIZE - src_poff,
-			      PAGE_SIZE - dest_poff);
-		cmp_len = min(cmp_len, len);
-		ASSERT(cmp_len > 0);
-
-		trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
-				XFS_I(dest), destoff);
-
-		src_page = xfs_get_page(src, srcoff);
-		if (IS_ERR(src_page)) {
-			error = PTR_ERR(src_page);
-			goto out_error;
-		}
-		dest_page = xfs_get_page(dest, destoff);
-		if (IS_ERR(dest_page)) {
-			error = PTR_ERR(dest_page);
-			unlock_page(src_page);
-			put_page(src_page);
-			goto out_error;
-		}
-		src_addr = kmap_atomic(src_page);
-		dest_addr = kmap_atomic(dest_page);
-
-		flush_dcache_page(src_page);
-		flush_dcache_page(dest_page);
-
-		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
-			same = false;
-
-		kunmap_atomic(dest_addr);
-		kunmap_atomic(src_addr);
-		unlock_page(dest_page);
-		unlock_page(src_page);
-		put_page(dest_page);
-		put_page(src_page);
-
-		if (!same)
-			break;
-
-		srcoff += cmp_len;
-		destoff += cmp_len;
-		len -= cmp_len;
-	}
-
-	*is_same = same;
-	return 0;
-
-out_error:
-	trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
-	return error;
-}
-
 /*
  * Link a range of blocks from one file to another.
  */
@@ -1286,14 +1181,11 @@ xfs_reflink_remap_range(
 	struct inode		*inode_out = file_inode(file_out);
 	struct xfs_inode	*dest = XFS_I(inode_out);
 	struct xfs_mount	*mp = src->i_mount;
-	loff_t			bs = inode_out->i_sb->s_blocksize;
 	bool			same_inode = (inode_in == inode_out);
 	xfs_fileoff_t		sfsbno, dfsbno;
 	xfs_filblks_t		fsblen;
 	xfs_extlen_t		cowextsize;
-	loff_t			isize;
 	ssize_t			ret;
-	loff_t			blen;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return -EOPNOTSUPP;
@@ -1310,26 +1202,8 @@ xfs_reflink_remap_range(
 		xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
 	}
 
-	/* Don't touch certain kinds of inodes */
-	ret = -EPERM;
-	if (IS_IMMUTABLE(inode_out))
-		goto out_unlock;
-
-	ret = -ETXTBSY;
-	if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
-		goto out_unlock;
-
-
-	/* Don't reflink dirs, pipes, sockets... */
-	ret = -EISDIR;
-	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
-		goto out_unlock;
+	/* Check file eligibility and prepare for block sharing. */
 	ret = -EINVAL;
-	if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
-		goto out_unlock;
-	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
-		goto out_unlock;
-
 	/* Don't reflink realtime inodes */
 	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
 		goto out_unlock;
@@ -1338,91 +1212,18 @@ xfs_reflink_remap_range(
 	if (IS_DAX(inode_in) || IS_DAX(inode_out))
 		goto out_unlock;
 
-	/* Are we going all the way to the end? */
-	isize = i_size_read(inode_in);
-	if (isize == 0) {
-		ret = 0;
-		goto out_unlock;
-	}
-
-	if (len == 0)
-		len = isize - pos_in;
-
-	/* Ensure offsets don't wrap and the input is inside i_size */
-	if (pos_in + len < pos_in || pos_out + len < pos_out ||
-	    pos_in + len > isize)
-		goto out_unlock;
-
-	/* Don't allow dedupe past EOF in the dest file */
-	if (is_dedupe) {
-		loff_t	disize;
-
-		disize = i_size_read(inode_out);
-		if (pos_out >= disize || pos_out + len > disize)
-			goto out_unlock;
-	}
-
-	/* If we're linking to EOF, continue to the block boundary. */
-	if (pos_in + len == isize)
-		blen = ALIGN(isize, bs) - pos_in;
-	else
-		blen = len;
-
-	/* Only reflink if we're aligned to block boundaries */
-	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-	    !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-		goto out_unlock;
-
-	/* Don't allow overlapped reflink within the same file */
-	if (same_inode) {
-		if (pos_out + blen > pos_in && pos_out < pos_in + blen)
-			goto out_unlock;
-	}
-
-	/* Wait for the completion of any pending IOs on both files */
-	inode_dio_wait(inode_in);
-	if (!same_inode)
-		inode_dio_wait(inode_out);
-
-	ret = filemap_write_and_wait_range(inode_in->i_mapping,
-			pos_in, pos_in + len - 1);
-	if (ret)
-		goto out_unlock;
-
-	ret = filemap_write_and_wait_range(inode_out->i_mapping,
-			pos_out, pos_out + len - 1);
-	if (ret)
+	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+			&len, is_dedupe);
+	if (ret || len == 0)
 		goto out_unlock;
 
 	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
 
-	/*
-	 * Check that the extents are the same.
-	 */
-	if (is_dedupe) {
-		bool		is_same = false;
-
-		ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
-				len, &is_same);
-		if (ret)
-			goto out_unlock;
-		if (!is_same) {
-			ret = -EBADE;
-			goto out_unlock;
-		}
-	}
-
+	/* Set flags and remap blocks. */
 	ret = xfs_reflink_set_inode_flag(src, dest);
 	if (ret)
 		goto out_unlock;
 
-	/*
-	 * Invalidate the page cache so that we can clear any CoW mappings
-	 * in the destination file.
-	 */
-	truncate_inode_pages_range(&inode_out->i_data, pos_out,
-				   PAGE_ALIGN(pos_out + len) - 1);
-
 	dfsbno = XFS_B_TO_FSBT(mp, pos_out);
 	sfsbno = XFS_B_TO_FSBT(mp, pos_in);
 	fsblen = XFS_B_TO_FSB(mp, len);
@@ -1431,6 +1232,10 @@ xfs_reflink_remap_range(
 	if (ret)
 		goto out_unlock;
 
+	/* Zap any page cache for the destination file's range. */
+	truncate_inode_pages_range(&inode_out->i_data, pos_out,
+				   PAGE_ALIGN(pos_out + len) - 1);
+
 	/*
 	 * Carry the cowextsize hint from src to dest if we're sharing the
 	 * entire source file to the entire destination file, the source file
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dc0478c07b2a..caea736fa09c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1778,8 +1778,14 @@ extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
 		unsigned long, loff_t *, int);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
+extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
+				      struct inode *inode_out, loff_t pos_out,
+				      u64 *len, bool is_dedupe);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 		struct file *file_out, loff_t pos_out, u64 len);
+extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+					 struct inode *dest, loff_t destoff,
+					 loff_t len, bool *is_same);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 
-- 
cgit v1.2.3


From c84d949057cab262b4d3110ead9a42a58c2958f7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 8 Dec 2016 11:41:55 -0800
Subject: udp: copy skb->truesize in the first cache line

In UDP RX handler, we currently clear skb->dev before skb
is added to receive queue, because device pointer is no longer
available once we exit from RCU section.

Since this first cache line is always hot, lets reuse this space
to store skb->truesize and thus avoid a cache line miss at
udp_recvmsg()/udp_skb_destructor time while receive queue
spinlock is held.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  9 ++++++++-
 net/ipv4/udp.c         | 13 ++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0cd92b0f2af5..332e76756f54 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -645,8 +645,15 @@ struct sk_buff {
 		struct rb_node	rbnode; /* used in netem & tcp stack */
 	};
 	struct sock		*sk;
-	struct net_device	*dev;
 
+	union {
+		struct net_device	*dev;
+		/* Some protocols might use this space to store information,
+		 * while device pointer would be NULL.
+		 * UDP receive path is one user.
+		 */
+		unsigned long		dev_scratch;
+	};
 	/*
 	 * This is the control buffer. It is free to use for every
 	 * layer. Please put your private variables there. If you
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e6a68d66f3b2..c608334d99aa 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1188,10 +1188,14 @@ static void udp_rmem_release(struct sock *sk, int size, int partial)
 		__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
 }
 
-/* Note: called with sk_receive_queue.lock held */
+/* Note: called with sk_receive_queue.lock held.
+ * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
+ * This avoids a cache line miss while receive_queue lock is held.
+ * Look at __udp_enqueue_schedule_skb() to find where this copy is done.
+ */
 void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
 {
-	udp_rmem_release(sk, skb->truesize, 1);
+	udp_rmem_release(sk, skb->dev_scratch, 1);
 }
 EXPORT_SYMBOL(udp_skb_destructor);
 
@@ -1246,6 +1250,10 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 		busy = busylock_acquire(sk);
 	}
 	size = skb->truesize;
+	/* Copy skb->truesize into skb->dev_scratch to avoid a cache line miss
+	 * in udp_skb_destructor()
+	 */
+	skb->dev_scratch = size;
 
 	/* we drop only if the receive buf is full and the receive
 	 * queue contains some other skb
@@ -1272,7 +1280,6 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 	/* no need to setup a destructor, we will explicitly release the
 	 * forward allocated memory on dequeue
 	 */
-	skb->dev = NULL;
 	sock_skb_set_dropcount(sk, skb);
 
 	__skb_queue_tail(list, skb);
-- 
cgit v1.2.3


From 6b229cf77d683f634f0edd876c6d1015402303ad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 8 Dec 2016 11:41:56 -0800
Subject: udp: add batching to udp_rmem_release()

If udp_recvmsg() constantly releases sk_rmem_alloc
for every read packet, it gives opportunity for
producers to immediately grab spinlocks and desperatly
try adding another packet, causing false sharing.

We can add a simple heuristic to give the signal
by batches of ~25 % of the queue capacity.

This patch considerably increases performance under
flood by about 50 %, since the thread draining the queue
is no longer slowed by false sharing.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h |  3 +++
 net/ipv4/udp.c      | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index d1fd8cd39478..c0f530809d1f 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -79,6 +79,9 @@ struct udp_sock {
 	int			(*gro_complete)(struct sock *sk,
 						struct sk_buff *skb,
 						int nhoff);
+
+	/* This field is dirtied by udp_recvmsg() */
+	int		forward_deficit;
 };
 
 static inline struct udp_sock *udp_sk(const struct sock *sk)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c608334d99aa..5a38faa12cde 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1177,8 +1177,20 @@ out:
 /* fully reclaim rmem/fwd memory allocated for skb */
 static void udp_rmem_release(struct sock *sk, int size, int partial)
 {
+	struct udp_sock *up = udp_sk(sk);
 	int amt;
 
+	if (likely(partial)) {
+		up->forward_deficit += size;
+		size = up->forward_deficit;
+		if (size < (sk->sk_rcvbuf >> 2) &&
+		    !skb_queue_empty(&sk->sk_receive_queue))
+			return;
+	} else {
+		size += up->forward_deficit;
+	}
+	up->forward_deficit = 0;
+
 	atomic_sub(size, &sk->sk_rmem_alloc);
 	sk->sk_forward_alloc += size;
 	amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
-- 
cgit v1.2.3


From dff25ddb48086afcb434770caa3d6849a4489b85 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 2 Dec 2016 22:53:30 -0500
Subject: nfs: add support for the umask attribute

Clients can set the umask attribute when creating files to cause the
server to apply it always except when inheriting permissions from the
parent directory.  That way, the new files will end up with the same
permissions as files created locally.

See https://tools.ietf.org/html/draft-ietf-nfsv4-umask-02 for more details.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c            |  7 ++++++-
 fs/nfs/nfs4proc.c       | 16 ++++++++++++----
 fs/nfs/nfs4xdr.c        | 36 ++++++++++++++++++++++++------------
 include/linux/nfs4.h    |  1 +
 include/linux/nfs_xdr.h |  2 ++
 5 files changed, 45 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7483722162fa..cb22a9f9ae7e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1529,8 +1529,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		return -ENAMETOOLONG;
 
 	if (open_flags & O_CREAT) {
+		struct nfs_server *server = NFS_SERVER(dir);
+
+		if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
+			mode &= ~current_umask();
+
 		attr.ia_valid |= ATTR_MODE;
-		attr.ia_mode = mode & ~current_umask();
+		attr.ia_mode = mode;
 	}
 	if (open_flags & O_TRUNC) {
 		attr.ia_valid |= ATTR_SIZE;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c5a508669655..d33242c8d95d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1224,6 +1224,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	atomic_inc(&sp->so_count);
 	p->o_arg.open_flags = flags;
 	p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
+	p->o_arg.umask = current_umask();
 	p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim);
 	p->o_arg.share_access = nfs4_map_atomic_open_share(server,
 			fmode, flags);
@@ -3337,7 +3338,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
 
 #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
 #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_MODE_UMASK - 1UL)
 
 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
@@ -4010,6 +4011,7 @@ static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		 int flags)
 {
+	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs4_label l, *ilabel = NULL;
 	struct nfs_open_context *ctx;
 	struct nfs4_state *state;
@@ -4021,7 +4023,8 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 	ilabel = nfs4_label_init_security(dir, dentry, sattr, &l);
 
-	sattr->ia_mode &= ~current_umask();
+	if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
+		sattr->ia_mode &= ~current_umask();
 	state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, NULL);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
@@ -4229,6 +4232,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
 		data->arg.attrs = sattr;
 		data->arg.ftype = ftype;
 		data->arg.bitmask = nfs4_bitmask(server, data->label);
+		data->arg.umask = current_umask();
 		data->res.server = server;
 		data->res.fh = &data->fh;
 		data->res.fattr = &data->fattr;
@@ -4326,13 +4330,15 @@ out:
 static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 		struct iattr *sattr)
 {
+	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs4_exception exception = { };
 	struct nfs4_label l, *label = NULL;
 	int err;
 
 	label = nfs4_label_init_security(dir, dentry, sattr, &l);
 
-	sattr->ia_mode &= ~current_umask();
+	if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
+		sattr->ia_mode &= ~current_umask();
 	do {
 		err = _nfs4_proc_mkdir(dir, dentry, sattr, label);
 		trace_nfs4_mkdir(dir, &dentry->d_name, err);
@@ -4435,13 +4441,15 @@ out:
 static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 		struct iattr *sattr, dev_t rdev)
 {
+	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs4_exception exception = { };
 	struct nfs4_label l, *label = NULL;
 	int err;
 
 	label = nfs4_label_init_security(dir, dentry, sattr, &l);
 
-	sattr->ia_mode &= ~current_umask();
+	if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
+		sattr->ia_mode &= ~current_umask();
 	do {
 		err = _nfs4_proc_mknod(dir, dentry, sattr, label, rdev);
 		trace_nfs4_mknod(dir, &dentry->d_name, err);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1c1768a8fcd1..1af6268a7d8c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include <linux/fs_struct.h>
 
 #include "nfs4_fs.h"
 #include "internal.h"
@@ -1008,7 +1009,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
 static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 				const struct nfs4_label *label,
 				const struct nfs_server *server,
-				bool excl_check)
+				bool excl_check, const umode_t *umask)
 {
 	char owner_name[IDMAP_NAMESZ];
 	char owner_group[IDMAP_NAMESZ];
@@ -1022,18 +1023,21 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 
 	/*
 	 * We reserve enough space to write the entire attribute buffer at once.
-	 * In the worst-case, this would be
-	 * 16(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
-	 * = 40 bytes, plus any contribution from variable-length fields
-	 *            such as owner/group.
 	 */
 	if (iap->ia_valid & ATTR_SIZE) {
 		bmval[0] |= FATTR4_WORD0_SIZE;
 		len += 8;
 	}
+	if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
+		umask = NULL;
 	if (iap->ia_valid & ATTR_MODE) {
-		bmval[1] |= FATTR4_WORD1_MODE;
-		len += 4;
+		if (umask) {
+			bmval[2] |= FATTR4_WORD2_MODE_UMASK;
+			len += 8;
+		} else {
+			bmval[1] |= FATTR4_WORD1_MODE;
+			len += 4;
+		}
 	}
 	if (iap->ia_valid & ATTR_UID) {
 		owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
@@ -1134,6 +1138,10 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 		*p++ = cpu_to_be32(label->len);
 		p = xdr_encode_opaque_fixed(p, label->label, label->len);
 	}
+	if (bmval[2] & FATTR4_WORD2_MODE_UMASK) {
+		*p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
+		*p++ = cpu_to_be32(*umask);
+	}
 
 /* out: */
 }
@@ -1188,7 +1196,8 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
 	}
 
 	encode_string(xdr, create->name->len, create->name->name);
-	encode_attrs(xdr, create->attrs, create->label, create->server, false);
+	encode_attrs(xdr, create->attrs, create->label, create->server, false,
+		     &create->umask);
 }
 
 static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr)
@@ -1408,11 +1417,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
 	switch(arg->createmode) {
 	case NFS4_CREATE_UNCHECKED:
 		*p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
-		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false);
+		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false,
+			     &arg->umask);
 		break;
 	case NFS4_CREATE_GUARDED:
 		*p = cpu_to_be32(NFS4_CREATE_GUARDED);
-		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false);
+		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false,
+			     &arg->umask);
 		break;
 	case NFS4_CREATE_EXCLUSIVE:
 		*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
@@ -1421,7 +1432,8 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
 	case NFS4_CREATE_EXCLUSIVE4_1:
 		*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
 		encode_nfs4_verifier(xdr, &arg->u.verifier);
-		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true);
+		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true,
+			     &arg->umask);
 	}
 }
 
@@ -1677,7 +1689,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
 {
 	encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
 	encode_nfs4_stateid(xdr, &arg->stateid);
-	encode_attrs(xdr, arg->iap, arg->label, server, false);
+	encode_attrs(xdr, arg->iap, arg->label, server, false, NULL);
 }
 
 static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 9094faf0699d..bca536341d1a 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -440,6 +440,7 @@ enum lock_type4 {
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_CLONE_BLKSIZE	(1UL << 13)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
+#define FATTR4_WORD2_MODE_UMASK		(1UL << 17)
 
 /* MDS threshold bitmap bits */
 #define THRESHOLD_RD                    (1UL << 0)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 617cfaa20ffc..348f7c158084 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -433,6 +433,7 @@ struct nfs_openargs {
 	enum open_claim_type4	claim;
 	enum createmode4	createmode;
 	const struct nfs4_label *label;
+	umode_t			umask;
 };
 
 struct nfs_openres {
@@ -958,6 +959,7 @@ struct nfs4_create_arg {
 	const struct nfs_fh *		dir_fh;
 	const u32 *			bitmask;
 	const struct nfs4_label		*label;
+	umode_t				umask;
 };
 
 struct nfs4_create_res {
-- 
cgit v1.2.3


From f680b2845694a197e7bfc9f56b241f9a3f17296e Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 20 Nov 2016 02:55:45 -0800
Subject: hwmon: (core) Clarify when read and write callbacks are mandatory

The callback descrption in hwmon.h was misleading and stated that read and
write callbacks would be optional. More accurate is is that the callbacks
are mandatory if readable / writeable attributes are present.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 9d2f8bde7d12..b6a86aa4a9e2 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -298,8 +298,7 @@ enum hwmon_pwm_attributes {
  *			Channel number
  *		The function returns the file permissions.
  *		If the return value is 0, no attribute will be created.
- * @read:       Read callback. Optional. If not provided, attributes
- *		will not be readable.
+ * @read:	Read callback. Mandatory if readable attributes are present.
  *		Parameters are:
  *		@dev:	Pointer to hardware monitoring device
  *		@type:	Sensor type
@@ -308,8 +307,7 @@ enum hwmon_pwm_attributes {
  *			Channel number
  *		@val:	Pointer to returned value
  *		The function returns 0 on success or a negative error number.
- * @write:	Write callback. Optional. If not provided, attributes
- *		will not be writable.
+ * @write:	Write callback. Mandatory if writeable attributes are present.
  *		Parameters are:
  *		@dev:	Pointer to hardware monitoring device
  *		@type:	Sensor type
-- 
cgit v1.2.3


From e159ab5cb1afb519601a961405933c61cdd5a56a Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 7 Aug 2016 20:51:25 -0700
Subject: hwmon: (core) Add support for string attributes to new API

The new API is so far only suited for data attributes and does not work
well for string attributes, specifically for the 'label' attributes.
Provide a separate callback function for those.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 33 +++++++++++++++++++++++++++++++--
 include/linux/hwmon.h | 19 +++++++++++++++++--
 2 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index a74c075a30ec..491231fa0580 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -178,6 +178,22 @@ static ssize_t hwmon_attr_show(struct device *dev,
 	return sprintf(buf, "%ld\n", val);
 }
 
+static ssize_t hwmon_attr_show_string(struct device *dev,
+				      struct device_attribute *devattr,
+				      char *buf)
+{
+	struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr);
+	char *s;
+	int ret;
+
+	ret = hattr->ops->read_string(dev, hattr->type, hattr->attr,
+				      hattr->index, &s);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%s\n", s);
+}
+
 static ssize_t hwmon_attr_store(struct device *dev,
 				struct device_attribute *devattr,
 				const char *buf, size_t count)
@@ -205,6 +221,17 @@ static int hwmon_attr_base(enum hwmon_sensor_types type)
 	return 1;
 }
 
+static bool is_string_attr(enum hwmon_sensor_types type, u32 attr)
+{
+	return (type == hwmon_temp && attr == hwmon_temp_label) ||
+	       (type == hwmon_in && attr == hwmon_in_label) ||
+	       (type == hwmon_curr && attr == hwmon_curr_label) ||
+	       (type == hwmon_power && attr == hwmon_power_label) ||
+	       (type == hwmon_energy && attr == hwmon_energy_label) ||
+	       (type == hwmon_humidity && attr == hwmon_humidity_label) ||
+	       (type == hwmon_fan && attr == hwmon_fan_label);
+}
+
 static struct attribute *hwmon_genattr(struct device *dev,
 				       const void *drvdata,
 				       enum hwmon_sensor_types type,
@@ -218,6 +245,7 @@ static struct attribute *hwmon_genattr(struct device *dev,
 	struct attribute *a;
 	umode_t mode;
 	char *name;
+	bool is_string = is_string_attr(type, attr);
 
 	/* The attribute is invisible if there is no template string */
 	if (!template)
@@ -227,7 +255,8 @@ static struct attribute *hwmon_genattr(struct device *dev,
 	if (!mode)
 		return ERR_PTR(-ENOENT);
 
-	if ((mode & S_IRUGO) && !ops->read)
+	if ((mode & S_IRUGO) && ((is_string && !ops->read_string) ||
+				 (!is_string && !ops->read)))
 		return ERR_PTR(-EINVAL);
 	if ((mode & S_IWUGO) && !ops->write)
 		return ERR_PTR(-EINVAL);
@@ -252,7 +281,7 @@ static struct attribute *hwmon_genattr(struct device *dev,
 	hattr->ops = ops;
 
 	dattr = &hattr->dev_attr;
-	dattr->show = hwmon_attr_show;
+	dattr->show = is_string ? hwmon_attr_show_string : hwmon_attr_show;
 	dattr->store = hwmon_attr_store;
 
 	a = &dattr->attr;
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index b6a86aa4a9e2..e68334aede4c 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -298,7 +298,8 @@ enum hwmon_pwm_attributes {
  *			Channel number
  *		The function returns the file permissions.
  *		If the return value is 0, no attribute will be created.
- * @read:	Read callback. Mandatory if readable attributes are present.
+ * @read:	Read callback for data attributes. Mandatory if readable
+ *		data attributes are present.
  *		Parameters are:
  *		@dev:	Pointer to hardware monitoring device
  *		@type:	Sensor type
@@ -307,7 +308,19 @@ enum hwmon_pwm_attributes {
  *			Channel number
  *		@val:	Pointer to returned value
  *		The function returns 0 on success or a negative error number.
- * @write:	Write callback. Mandatory if writeable attributes are present.
+ * @read_string:
+ *		Read callback for string attributes. Mandatory if string
+ *		attributes are present.
+ *		Parameters are:
+ *		@dev:	Pointer to hardware monitoring device
+ *		@type:	Sensor type
+ *		@attr:	Sensor attribute
+ *		@channel:
+ *			Channel number
+ *		@str:	Pointer to returned string
+ *		The function returns 0 on success or a negative error number.
+ * @write:	Write callback for data attributes. Mandatory if writeable
+ *		data attributes are present.
  *		Parameters are:
  *		@dev:	Pointer to hardware monitoring device
  *		@type:	Sensor type
@@ -322,6 +335,8 @@ struct hwmon_ops {
 			      u32 attr, int channel);
 	int (*read)(struct device *dev, enum hwmon_sensor_types type,
 		    u32 attr, int channel, long *val);
+	int (*read_string)(struct device *dev, enum hwmon_sensor_types type,
+		    u32 attr, int channel, char **str);
 	int (*write)(struct device *dev, enum hwmon_sensor_types type,
 		     u32 attr, int channel, long val);
 };
-- 
cgit v1.2.3


From af1bd36c06b5fad33baa7ee16820226efbd96cd9 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 16 Oct 2016 11:31:08 -0700
Subject: hwmon: (core) Deprecate hwmon_device_register()

Inform the user that hwmon_device_register() is deprecated,
and suggest conversion to the newest API. Also remove
hwmon_device_register() from the kernel API documentation.

Note that hwmon_device_register() is not marked as __deprecated()
since doing so might result in build errors.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/hwmon-kernel-api.txt | 34 +++++++++++++-------------------
 drivers/hwmon/hwmon.c                    |  3 +++
 include/linux/hwmon.h                    |  2 ++
 3 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/hwmon-kernel-api.txt b/Documentation/hwmon/hwmon-kernel-api.txt
index 562ef44adb5e..633fa90909eb 100644
--- a/Documentation/hwmon/hwmon-kernel-api.txt
+++ b/Documentation/hwmon/hwmon-kernel-api.txt
@@ -23,7 +23,6 @@ Each hardware monitoring driver must #include <linux/hwmon.h> and, in most
 cases, <linux/hwmon-sysfs.h>. linux/hwmon.h declares the following
 register/unregister functions:
 
-struct device *hwmon_device_register(struct device *dev);
 struct device *
 hwmon_device_register_with_groups(struct device *dev, const char *name,
 				  void *drvdata,
@@ -50,24 +49,19 @@ devm_hwmon_device_register_with_info(struct device *dev,
 void hwmon_device_unregister(struct device *dev);
 void devm_hwmon_device_unregister(struct device *dev);
 
-hwmon_device_register registers a hardware monitoring device. The parameter
-of this function is a pointer to the parent device.
-This function returns a pointer to the newly created hardware monitoring device
-or PTR_ERR for failure. If this registration function is used, hardware
-monitoring sysfs attributes are expected to have been created and attached to
-the parent device prior to calling hwmon_device_register. A name attribute must
-have been created by the caller.
-
-hwmon_device_register_with_groups is similar to hwmon_device_register. However,
-it has additional parameters. The name parameter is a pointer to the hwmon
-device name. The registration function wil create a name sysfs attribute
-pointing to this name. The drvdata parameter is the pointer to the local
-driver data.  hwmon_device_register_with_groups will attach this pointer
-to the newly allocated hwmon device. The pointer can be retrieved by the driver
-using dev_get_drvdata() on the hwmon device pointer. The groups parameter is
+hwmon_device_register_with_groups registers a hardware monitoring device.
+The first parameter of this function is a pointer to the parent device.
+The name parameter is a pointer to the hwmon device name. The registration
+function wil create a name sysfs attribute pointing to this name.
+The drvdata parameter is the pointer to the local driver data.
+hwmon_device_register_with_groups will attach this pointer to the newly
+allocated hwmon device. The pointer can be retrieved by the driver using
+dev_get_drvdata() on the hwmon device pointer. The groups parameter is
 a pointer to a list of sysfs attribute groups. The list must be NULL terminated.
 hwmon_device_register_with_groups creates the hwmon device with name attribute
 as well as all sysfs attributes attached to the hwmon device.
+This function returns a pointer to the newly created hardware monitoring device
+or PTR_ERR for failure.
 
 devm_hwmon_device_register_with_groups is similar to
 hwmon_device_register_with_groups. However, it is device managed, meaning the
@@ -87,13 +81,13 @@ hwmon_device_unregister deregisters a registered hardware monitoring device.
 The parameter of this function is the pointer to the registered hardware
 monitoring device structure. This function must be called from the driver
 remove function if the hardware monitoring device was registered with
-hwmon_device_register, hwmon_device_register_with_groups, or
-hwmon_device_register_with_info.
+hwmon_device_register_with_groups or hwmon_device_register_with_info.
 
 devm_hwmon_device_unregister does not normally have to be called. It is only
 needed for error handling, and only needed if the driver probe fails after
-the call to devm_hwmon_device_register_with_groups and if the automatic
-(device managed) removal would be too late.
+the call to devm_hwmon_device_register_with_groups or
+hwmon_device_register_with_info and if the automatic (device managed)
+removal would be too late.
 
 Using devm_hwmon_device_register_with_info()
 --------------------------------------------
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 112aae60f51f..971c9eb78e6a 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -691,6 +691,9 @@ EXPORT_SYMBOL_GPL(hwmon_device_register_with_info);
  */
 struct device *hwmon_device_register(struct device *dev)
 {
+	dev_warn(dev,
+		 "hwmon_device_register() is deprecated. Please convert the driver to use hwmon_device_register_with_info().\n");
+
 	return hwmon_device_register_with_groups(dev, NULL, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(hwmon_device_register);
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index e68334aede4c..2588e6ee7660 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -362,7 +362,9 @@ struct hwmon_chip_info {
 	const struct hwmon_channel_info **info;
 };
 
+/* hwmon_device_register() is deprecated */
 struct device *hwmon_device_register(struct device *dev);
+
 struct device *
 hwmon_device_register_with_groups(struct device *dev, const char *name,
 				  void *drvdata,
-- 
cgit v1.2.3


From 848ba0a2f20dc121a3ef5272a24641d2bd963d8b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 16 Oct 2016 17:20:43 -0700
Subject: hwmon: (core) Rename groups parameter in API to extra_groups

The 'groups' parameter of hwmon_device_register_with_info() and
devm_hwmon_device_register_with_info() is only necessary if extra
non-standard attributes need to be provided. Rename the parameter
to extra_groups and clarify the documentation.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/hwmon-kernel-api.txt | 22 +++++++++++-----------
 drivers/hwmon/hwmon.c                    |  8 ++++----
 include/linux/hwmon.h                    |  8 ++++----
 3 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/hwmon-kernel-api.txt b/Documentation/hwmon/hwmon-kernel-api.txt
index 633fa90909eb..2505ae67e2b6 100644
--- a/Documentation/hwmon/hwmon-kernel-api.txt
+++ b/Documentation/hwmon/hwmon-kernel-api.txt
@@ -37,14 +37,14 @@ struct device *
 hwmon_device_register_with_info(struct device *dev,
 				const char *name, void *drvdata,
 				const struct hwmon_chip_info *info,
-				const struct attribute_group **groups);
+				const struct attribute_group **extra_groups);
 
 struct device *
 devm_hwmon_device_register_with_info(struct device *dev,
-				     const char *name,
-				     void *drvdata,
-				     const struct hwmon_chip_info *info,
-				     const struct attribute_group **groups);
+				const char *name,
+				void *drvdata,
+				const struct hwmon_chip_info *info,
+				const struct attribute_group **extra_groups);
 
 void hwmon_device_unregister(struct device *dev);
 void devm_hwmon_device_unregister(struct device *dev);
@@ -100,9 +100,9 @@ const char *name	Device name
 void *drvdata		Driver private data
 const struct hwmon_chip_info *info
 			Pointer to chip description.
-const struct attribute_group **groups
-			Null-terminated list of additional sysfs attribute
-			groups.
+const struct attribute_group **extra_groups
+			Null-terminated list of additional non-standard
+			sysfs attribute groups.
 
 This function returns a pointer to the created hardware monitoring device
 on success and a negative error code for failure.
@@ -287,9 +287,9 @@ Driver-provided sysfs attributes
 
 If the hardware monitoring device is registered with
 hwmon_device_register_with_info or devm_hwmon_device_register_with_info,
-it is most likely not necessary to provide sysfs attributes. Only non-standard
-sysfs attributes need to be provided when one of those registration functions
-is used.
+it is most likely not necessary to provide sysfs attributes. Only additional
+non-standard sysfs attributes need to be provided when one of those registration
+functions is used.
 
 The header file linux/hwmon-sysfs.h provides a number of useful macros to
 declare and use hardware monitoring sysfs attributes.
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 8dc0466a9307..58c328f4508d 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -659,8 +659,8 @@ EXPORT_SYMBOL_GPL(hwmon_device_register_with_groups);
  * @dev: the parent device
  * @name: hwmon name attribute
  * @drvdata: driver data to attach to created device
- * @info: Pointer to hwmon chip information
- * @groups - pointer to list of driver specific attribute groups
+ * @info: pointer to hwmon chip information
+ * @extra_groups: pointer to list of additional non-standard attribute groups
  *
  * hwmon_device_unregister() must be called when the device is no
  * longer needed.
@@ -671,12 +671,12 @@ struct device *
 hwmon_device_register_with_info(struct device *dev, const char *name,
 				void *drvdata,
 				const struct hwmon_chip_info *chip,
-				const struct attribute_group **groups)
+				const struct attribute_group **extra_groups)
 {
 	if (chip && (!chip->ops || !chip->ops->is_visible || !chip->info))
 		return ERR_PTR(-EINVAL);
 
-	return __hwmon_device_register(dev, name, drvdata, chip, groups);
+	return __hwmon_device_register(dev, name, drvdata, chip, extra_groups);
 }
 EXPORT_SYMBOL_GPL(hwmon_device_register_with_info);
 
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 2588e6ee7660..78d59dba563e 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -377,12 +377,12 @@ struct device *
 hwmon_device_register_with_info(struct device *dev,
 				const char *name, void *drvdata,
 				const struct hwmon_chip_info *info,
-				const struct attribute_group **groups);
+				const struct attribute_group **extra_groups);
 struct device *
 devm_hwmon_device_register_with_info(struct device *dev,
-				     const char *name, void *drvdata,
-				     const struct hwmon_chip_info *info,
-				     const struct attribute_group **groups);
+				const char *name, void *drvdata,
+				const struct hwmon_chip_info *info,
+				const struct attribute_group **extra_groups);
 
 void hwmon_device_unregister(struct device *dev);
 void devm_hwmon_device_unregister(struct device *dev);
-- 
cgit v1.2.3


From db717d8e26c2d1b0dba3e08668a1e6a7f665adde Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 26 Nov 2016 19:07:49 -0500
Subject: fscrypto: move ioctl processing more fully into common code

Multiple bugs were recently fixed in the "set encryption policy" ioctl.
To make it clear that fscrypt_process_policy() and fscrypt_get_policy()
implement ioctls and therefore their implementations must take standard
security and correctness precautions, rename them to
fscrypt_ioctl_set_policy() and fscrypt_ioctl_get_policy().  Make the
latter take in a struct file * to make it consistent with the former.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/policy.c       | 34 +++++++++++++++++++++-------------
 fs/ext4/ext4.h           |  4 ++--
 fs/ext4/ioctl.c          | 34 +++++-----------------------------
 fs/f2fs/f2fs.h           |  4 ++--
 fs/f2fs/file.c           | 19 ++-----------------
 include/linux/fscrypto.h | 12 ++++++------
 6 files changed, 38 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 6865663aac69..b96a10e3cf78 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -93,16 +93,19 @@ static int create_encryption_context_from_policy(struct inode *inode,
 	return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
 }
 
-int fscrypt_process_policy(struct file *filp,
-				const struct fscrypt_policy *policy)
+int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
 {
+	struct fscrypt_policy policy;
 	struct inode *inode = file_inode(filp);
 	int ret;
 
+	if (copy_from_user(&policy, arg, sizeof(policy)))
+		return -EFAULT;
+
 	if (!inode_owner_or_capable(inode))
 		return -EACCES;
 
-	if (policy->version != 0)
+	if (policy.version != 0)
 		return -EINVAL;
 
 	ret = mnt_want_write_file(filp);
@@ -120,9 +123,9 @@ int fscrypt_process_policy(struct file *filp,
 			ret = -ENOTEMPTY;
 		else
 			ret = create_encryption_context_from_policy(inode,
-								    policy);
+								    &policy);
 	} else if (!is_encryption_context_consistent_with_policy(inode,
-								 policy)) {
+								 &policy)) {
 		printk(KERN_WARNING
 		       "%s: Policy inconsistent with encryption context\n",
 		       __func__);
@@ -134,11 +137,13 @@ int fscrypt_process_policy(struct file *filp,
 	mnt_drop_write_file(filp);
 	return ret;
 }
-EXPORT_SYMBOL(fscrypt_process_policy);
+EXPORT_SYMBOL(fscrypt_ioctl_set_policy);
 
-int fscrypt_get_policy(struct inode *inode, struct fscrypt_policy *policy)
+int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
 {
+	struct inode *inode = file_inode(filp);
 	struct fscrypt_context ctx;
+	struct fscrypt_policy policy;
 	int res;
 
 	if (!inode->i_sb->s_cop->get_context ||
@@ -151,15 +156,18 @@ int fscrypt_get_policy(struct inode *inode, struct fscrypt_policy *policy)
 	if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
 		return -EINVAL;
 
-	policy->version = 0;
-	policy->contents_encryption_mode = ctx.contents_encryption_mode;
-	policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
-	policy->flags = ctx.flags;
-	memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
+	policy.version = 0;
+	policy.contents_encryption_mode = ctx.contents_encryption_mode;
+	policy.filenames_encryption_mode = ctx.filenames_encryption_mode;
+	policy.flags = ctx.flags;
+	memcpy(policy.master_key_descriptor, ctx.master_key_descriptor,
 				FS_KEY_DESCRIPTOR_SIZE);
+
+	if (copy_to_user(arg, &policy, sizeof(policy)))
+		return -EFAULT;
 	return 0;
 }
-EXPORT_SYMBOL(fscrypt_get_policy);
+EXPORT_SYMBOL(fscrypt_ioctl_get_policy);
 
 int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
 {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 282a51b07c57..bd8bc3be93a5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2338,8 +2338,8 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
 #define fscrypt_pullback_bio_page	fscrypt_notsupp_pullback_bio_page
 #define fscrypt_restore_control_page	fscrypt_notsupp_restore_control_page
 #define fscrypt_zeroout_range		fscrypt_notsupp_zeroout_range
-#define fscrypt_process_policy		fscrypt_notsupp_process_policy
-#define fscrypt_get_policy		fscrypt_notsupp_get_policy
+#define fscrypt_ioctl_set_policy	fscrypt_notsupp_ioctl_set_policy
+#define fscrypt_ioctl_get_policy	fscrypt_notsupp_ioctl_get_policy
 #define fscrypt_has_permitted_context	fscrypt_notsupp_has_permitted_context
 #define fscrypt_inherit_context		fscrypt_notsupp_inherit_context
 #define fscrypt_get_encryption_info	fscrypt_notsupp_get_encryption_info
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bf5ae8ebbc97..70083863ce51 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -765,22 +765,12 @@ resizefs_out:
 	}
 	case EXT4_IOC_PRECACHE_EXTENTS:
 		return ext4_ext_precache(inode);
-	case EXT4_IOC_SET_ENCRYPTION_POLICY: {
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-		struct fscrypt_policy policy;
 
+	case EXT4_IOC_SET_ENCRYPTION_POLICY:
 		if (!ext4_has_feature_encrypt(sb))
 			return -EOPNOTSUPP;
+		return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
 
-		if (copy_from_user(&policy,
-				   (struct fscrypt_policy __user *)arg,
-				   sizeof(policy)))
-			return -EFAULT;
-		return fscrypt_process_policy(filp, &policy);
-#else
-		return -EOPNOTSUPP;
-#endif
-	}
 	case EXT4_IOC_GET_ENCRYPTION_PWSALT: {
 		int err, err2;
 		struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -817,23 +807,9 @@ resizefs_out:
 			return -EFAULT;
 		return 0;
 	}
-	case EXT4_IOC_GET_ENCRYPTION_POLICY: {
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-		struct fscrypt_policy policy;
-		int err = 0;
-
-		if (!ext4_encrypted_inode(inode))
-			return -ENOENT;
-		err = fscrypt_get_policy(inode, &policy);
-		if (err)
-			return err;
-		if (copy_to_user((void __user *)arg, &policy, sizeof(policy)))
-			return -EFAULT;
-		return 0;
-#else
-		return -EOPNOTSUPP;
-#endif
-	}
+	case EXT4_IOC_GET_ENCRYPTION_POLICY:
+		return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
+
 	case EXT4_IOC_FSGETXATTR:
 	{
 		struct fsxattr fa;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9e8de18a168a..8e94b7bda42b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2453,8 +2453,8 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
 #define fscrypt_pullback_bio_page	fscrypt_notsupp_pullback_bio_page
 #define fscrypt_restore_control_page	fscrypt_notsupp_restore_control_page
 #define fscrypt_zeroout_range		fscrypt_notsupp_zeroout_range
-#define fscrypt_process_policy		fscrypt_notsupp_process_policy
-#define fscrypt_get_policy		fscrypt_notsupp_get_policy
+#define fscrypt_ioctl_set_policy	fscrypt_notsupp_ioctl_set_policy
+#define fscrypt_ioctl_get_policy	fscrypt_notsupp_ioctl_get_policy
 #define fscrypt_has_permitted_context	fscrypt_notsupp_has_permitted_context
 #define fscrypt_inherit_context		fscrypt_notsupp_inherit_context
 #define fscrypt_get_encryption_info	fscrypt_notsupp_get_encryption_info
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c7865073cd26..f0c83f74557d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1752,31 +1752,16 @@ static bool uuid_is_nonzero(__u8 u[16])
 
 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
 {
-	struct fscrypt_policy policy;
 	struct inode *inode = file_inode(filp);
 
-	if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
-							sizeof(policy)))
-		return -EFAULT;
-
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 
-	return fscrypt_process_policy(filp, &policy);
+	return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
 }
 
 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
 {
-	struct fscrypt_policy policy;
-	struct inode *inode = file_inode(filp);
-	int err;
-
-	err = fscrypt_get_policy(inode, &policy);
-	if (err)
-		return err;
-
-	if (copy_to_user((struct fscrypt_policy __user *)arg, &policy, sizeof(policy)))
-		return -EFAULT;
-	return 0;
+	return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
 }
 
 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 98c71e973a96..be94684dc05f 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -259,8 +259,8 @@ extern void fscrypt_restore_control_page(struct page *);
 extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
 						unsigned int);
 /* policy.c */
-extern int fscrypt_process_policy(struct file *, const struct fscrypt_policy *);
-extern int fscrypt_get_policy(struct inode *, struct fscrypt_policy *);
+extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
+extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
 extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
 extern int fscrypt_inherit_context(struct inode *, struct inode *,
 					void *, bool);
@@ -334,14 +334,14 @@ static inline int fscrypt_notsupp_zeroout_range(const struct inode *i, pgoff_t p
 }
 
 /* policy.c */
-static inline int fscrypt_notsupp_process_policy(struct file *f,
-				const struct fscrypt_policy *p)
+static inline int fscrypt_notsupp_ioctl_set_policy(struct file *f,
+				const void __user *arg)
 {
 	return -EOPNOTSUPP;
 }
 
-static inline int fscrypt_notsupp_get_policy(struct inode *i,
-				struct fscrypt_policy *p)
+static inline int fscrypt_notsupp_ioctl_get_policy(struct file *f,
+				void __user *arg)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From 3325bea5b26ac67e2521383f10e5ea0156c9a4b6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 26 Nov 2016 20:32:46 -0500
Subject: fscrypt: rename get_crypt_info() to fscrypt_get_crypt_info()

To avoid namespace collisions, rename get_crypt_info() to
fscrypt_get_crypt_info().  The function is only used inside the
fs/crypto directory, so declare it in the new header file,
fscrypt_private.h.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/fname.c           |  4 ++--
 fs/crypto/fscrypt_private.h | 19 +++++++++++++++++++
 fs/crypto/keyinfo.c         |  6 +++---
 include/linux/fscrypto.h    |  1 -
 4 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100644 fs/crypto/fscrypt_private.h

(limited to 'include/linux')

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 6b45d9caeeb0..56ad9d195f18 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -12,7 +12,7 @@
 
 #include <linux/scatterlist.h>
 #include <linux/ratelimit.h>
-#include <linux/fscrypto.h>
+#include "fscrypt_private.h"
 
 /**
  * fname_crypt_complete() - completion callback for filename crypto
@@ -350,7 +350,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
 		fname->disk_name.len = iname->len;
 		return 0;
 	}
-	ret = get_crypt_info(dir);
+	ret = fscrypt_get_crypt_info(dir);
 	if (ret && ret != -EOPNOTSUPP)
 		return ret;
 
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
new file mode 100644
index 000000000000..7c31108728e4
--- /dev/null
+++ b/fs/crypto/fscrypt_private.h
@@ -0,0 +1,19 @@
+/*
+ * fscrypt_private.h
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains encryption key functions.
+ *
+ * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
+ */
+
+#ifndef _FSCRYPT_PRIVATE_H
+#define _FSCRYPT_PRIVATE_H
+
+#include <linux/fscrypto.h>
+
+/* keyinfo.c */
+extern int fscrypt_get_crypt_info(struct inode *);
+
+#endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 67fb6d8876d0..35d3317a27b3 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -10,7 +10,7 @@
 
 #include <keys/user-type.h>
 #include <linux/scatterlist.h>
-#include <linux/fscrypto.h>
+#include "fscrypt_private.h"
 
 static void derive_crypt_complete(struct crypto_async_request *req, int rc)
 {
@@ -178,7 +178,7 @@ static void put_crypt_info(struct fscrypt_info *ci)
 	kmem_cache_free(fscrypt_info_cachep, ci);
 }
 
-int get_crypt_info(struct inode *inode)
+int fscrypt_get_crypt_info(struct inode *inode)
 {
 	struct fscrypt_info *crypt_info;
 	struct fscrypt_context ctx;
@@ -327,7 +327,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
 		 (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
 					       (1 << KEY_FLAG_REVOKED) |
 					       (1 << KEY_FLAG_DEAD)))))
-		return get_crypt_info(inode);
+		return fscrypt_get_crypt_info(inode);
 	return 0;
 }
 EXPORT_SYMBOL(fscrypt_get_encryption_info);
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index be94684dc05f..2f8894f0696c 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -265,7 +265,6 @@ extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
 extern int fscrypt_inherit_context(struct inode *, struct inode *,
 					void *, bool);
 /* keyinfo.c */
-extern int get_crypt_info(struct inode *);
 extern int fscrypt_get_encryption_info(struct inode *);
 extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
 
-- 
cgit v1.2.3


From b98701df349b7003efd52d9330acbb7be5a255c6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 26 Nov 2016 20:43:09 -0500
Subject: fscrypt: unexport fscrypt_initialize()

The fscrypt_initalize() function isn't used outside fs/crypto, so
there's no point making it be an exported symbol.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/crypto.c          | 1 -
 fs/crypto/fscrypt_private.h | 3 +++
 include/linux/fscrypto.h    | 1 -
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index b6029785714c..56f98f45cece 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -540,7 +540,6 @@ fail:
 	mutex_unlock(&fscrypt_init_mutex);
 	return res;
 }
-EXPORT_SYMBOL(fscrypt_initialize);
 
 /**
  * fscrypt_init() - Set up for fs encryption.
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 7c31108728e4..bb92f0c0961b 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -13,6 +13,9 @@
 
 #include <linux/fscrypto.h>
 
+/* crypto.c */
+int fscrypt_initialize(void);
+
 /* keyinfo.c */
 extern int fscrypt_get_crypt_info(struct inode *);
 
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 2f8894f0696c..ce2ebdee6a89 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -244,7 +244,6 @@ static inline void fscrypt_set_d_op(struct dentry *dentry)
 #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
 /* crypto.c */
 extern struct kmem_cache *fscrypt_info_cachep;
-int fscrypt_initialize(void);
 
 extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
 extern void fscrypt_release_ctx(struct fscrypt_ctx *);
-- 
cgit v1.2.3


From cc4e0df038ddb73510c01712abf302b3f0130147 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 26 Nov 2016 22:05:18 -0500
Subject: fscrypt: move non-public structures and constants to
 fscrypt_private.h

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/crypto.c          |  2 +-
 fs/crypto/fscrypt_private.h | 71 +++++++++++++++++++++++++++++++++++++++++++++
 fs/crypto/policy.c          |  2 +-
 include/linux/fscrypto.h    | 68 ++-----------------------------------------
 4 files changed, 76 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 56f98f45cece..4d9d221b1d60 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -27,7 +27,7 @@
 #include <linux/bio.h>
 #include <linux/dcache.h>
 #include <linux/namei.h>
-#include <linux/fscrypto.h>
+#include "fscrypt_private.h"
 
 static unsigned int num_prealloc_crypto_pages = 32;
 static unsigned int num_prealloc_crypto_ctxs = 128;
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index bb92f0c0961b..c98b2a7fb6d3 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -13,6 +13,77 @@
 
 #include <linux/fscrypto.h>
 
+#define FS_FNAME_CRYPTO_DIGEST_SIZE	32
+
+/* Encryption parameters */
+#define FS_XTS_TWEAK_SIZE		16
+#define FS_AES_128_ECB_KEY_SIZE		16
+#define FS_AES_256_GCM_KEY_SIZE		32
+#define FS_AES_256_CBC_KEY_SIZE		32
+#define FS_AES_256_CTS_KEY_SIZE		32
+#define FS_AES_256_XTS_KEY_SIZE		64
+#define FS_MAX_KEY_SIZE			64
+
+#define FS_KEY_DESC_PREFIX		"fscrypt:"
+#define FS_KEY_DESC_PREFIX_SIZE		8
+
+#define FS_KEY_DERIVATION_NONCE_SIZE		16
+
+/**
+ * Encryption context for inode
+ *
+ * Protector format:
+ *  1 byte: Protector format (1 = this version)
+ *  1 byte: File contents encryption mode
+ *  1 byte: File names encryption mode
+ *  1 byte: Flags
+ *  8 bytes: Master Key descriptor
+ *  16 bytes: Encryption Key derivation nonce
+ */
+struct fscrypt_context {
+	u8 format;
+	u8 contents_encryption_mode;
+	u8 filenames_encryption_mode;
+	u8 flags;
+	u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+	u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+} __packed;
+
+#define FS_ENCRYPTION_CONTEXT_FORMAT_V1		1
+
+/* This is passed in from userspace into the kernel keyring */
+struct fscrypt_key {
+	u32 mode;
+	u8 raw[FS_MAX_KEY_SIZE];
+	u32 size;
+} __packed;
+
+/*
+ * A pointer to this structure is stored in the file system's in-core
+ * representation of an inode.
+ */
+struct fscrypt_info {
+	u8 ci_data_mode;
+	u8 ci_filename_mode;
+	u8 ci_flags;
+	struct crypto_skcipher *ci_ctfm;
+	struct key *ci_keyring_key;
+	u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL		0x00000001
+#define FS_WRITE_PATH_FL			0x00000002
+
+struct fscrypt_completion_result {
+	struct completion completion;
+	int res;
+};
+
+#define DECLARE_FS_COMPLETION_RESULT(ecr) \
+	struct fscrypt_completion_result ecr = { \
+		COMPLETION_INITIALIZER((ecr).completion), 0 }
+
+
 /* crypto.c */
 int fscrypt_initialize(void);
 
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index b96a10e3cf78..6ed7c2eebeec 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -10,8 +10,8 @@
 
 #include <linux/random.h>
 #include <linux/string.h>
-#include <linux/fscrypto.h>
 #include <linux/mount.h>
+#include "fscrypt_private.h"
 
 static int inode_has_encryption_context(struct inode *inode)
 {
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index ce2ebdee6a89..71e8a20711ec 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -18,9 +18,6 @@
 #include <crypto/skcipher.h>
 #include <uapi/linux/fs.h>
 
-#define FS_KEY_DERIVATION_NONCE_SIZE		16
-#define FS_ENCRYPTION_CONTEXT_FORMAT_V1		1
-
 #define FS_POLICY_FLAGS_PAD_4		0x00
 #define FS_POLICY_FLAGS_PAD_8		0x01
 #define FS_POLICY_FLAGS_PAD_16		0x02
@@ -35,56 +32,10 @@
 #define FS_ENCRYPTION_MODE_AES_256_CBC		3
 #define FS_ENCRYPTION_MODE_AES_256_CTS		4
 
-/**
- * Encryption context for inode
- *
- * Protector format:
- *  1 byte: Protector format (1 = this version)
- *  1 byte: File contents encryption mode
- *  1 byte: File names encryption mode
- *  1 byte: Flags
- *  8 bytes: Master Key descriptor
- *  16 bytes: Encryption Key derivation nonce
- */
-struct fscrypt_context {
-	u8 format;
-	u8 contents_encryption_mode;
-	u8 filenames_encryption_mode;
-	u8 flags;
-	u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
-	u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
-} __packed;
-
-/* Encryption parameters */
-#define FS_XTS_TWEAK_SIZE		16
-#define FS_AES_128_ECB_KEY_SIZE		16
-#define FS_AES_256_GCM_KEY_SIZE		32
-#define FS_AES_256_CBC_KEY_SIZE		32
-#define FS_AES_256_CTS_KEY_SIZE		32
-#define FS_AES_256_XTS_KEY_SIZE		64
-#define FS_MAX_KEY_SIZE			64
-
-#define FS_KEY_DESC_PREFIX		"fscrypt:"
-#define FS_KEY_DESC_PREFIX_SIZE		8
-
-/* This is passed in from userspace into the kernel keyring */
-struct fscrypt_key {
-	u32 mode;
-	u8 raw[FS_MAX_KEY_SIZE];
-	u32 size;
-} __packed;
-
-struct fscrypt_info {
-	u8 ci_data_mode;
-	u8 ci_filename_mode;
-	u8 ci_flags;
-	struct crypto_skcipher *ci_ctfm;
-	struct key *ci_keyring_key;
-	u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
-};
+#define FS_CRYPTO_BLOCK_SIZE		16
 
-#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL		0x00000001
-#define FS_WRITE_PATH_FL			0x00000002
+struct fscrypt_info;
+struct fscrypt_ctx;
 
 struct fscrypt_ctx {
 	union {
@@ -102,19 +53,6 @@ struct fscrypt_ctx {
 	u8 mode;				/* Encryption mode for tfm */
 };
 
-struct fscrypt_completion_result {
-	struct completion completion;
-	int res;
-};
-
-#define DECLARE_FS_COMPLETION_RESULT(ecr) \
-	struct fscrypt_completion_result ecr = { \
-		COMPLETION_INITIALIZER((ecr).completion), 0 }
-
-#define FS_FNAME_NUM_SCATTER_ENTRIES	4
-#define FS_CRYPTO_BLOCK_SIZE		16
-#define FS_FNAME_CRYPTO_DIGEST_SIZE	32
-
 /**
  * For encrypted symlinks, the ciphertext length is stored at the beginning
  * of the string in little-endian format.
-- 
cgit v1.2.3


From 41d5319af3368127b55f6587f1c747dd6a7b9b04 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 26 Nov 2016 22:18:59 -0500
Subject: fscrypt: move the policy flags and encryption mode definitions to
 uapi header

These constants are part of the UAPI, so they belong in
include/uapi/linux/fs.h instead of include/linux/fscrypto.h

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
---
 include/linux/fscrypto.h | 14 --------------
 include/uapi/linux/fs.h  | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 71e8a20711ec..42ef82d60790 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -18,20 +18,6 @@
 #include <crypto/skcipher.h>
 #include <uapi/linux/fs.h>
 
-#define FS_POLICY_FLAGS_PAD_4		0x00
-#define FS_POLICY_FLAGS_PAD_8		0x01
-#define FS_POLICY_FLAGS_PAD_16		0x02
-#define FS_POLICY_FLAGS_PAD_32		0x03
-#define FS_POLICY_FLAGS_PAD_MASK	0x03
-#define FS_POLICY_FLAGS_VALID		0x03
-
-/* Encryption algorithms */
-#define FS_ENCRYPTION_MODE_INVALID		0
-#define FS_ENCRYPTION_MODE_AES_256_XTS		1
-#define FS_ENCRYPTION_MODE_AES_256_GCM		2
-#define FS_ENCRYPTION_MODE_AES_256_CBC		3
-#define FS_ENCRYPTION_MODE_AES_256_CTS		4
-
 #define FS_CRYPTO_BLOCK_SIZE		16
 
 struct fscrypt_info;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index acb2b6152ba0..0496d37abe28 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -254,6 +254,20 @@ struct fsxattr {
 /* Policy provided via an ioctl on the topmost directory */
 #define FS_KEY_DESCRIPTOR_SIZE	8
 
+#define FS_POLICY_FLAGS_PAD_4		0x00
+#define FS_POLICY_FLAGS_PAD_8		0x01
+#define FS_POLICY_FLAGS_PAD_16		0x02
+#define FS_POLICY_FLAGS_PAD_32		0x03
+#define FS_POLICY_FLAGS_PAD_MASK	0x03
+#define FS_POLICY_FLAGS_VALID		0x03
+
+/* Encryption algorithms */
+#define FS_ENCRYPTION_MODE_INVALID		0
+#define FS_ENCRYPTION_MODE_AES_256_XTS		1
+#define FS_ENCRYPTION_MODE_AES_256_GCM		2
+#define FS_ENCRYPTION_MODE_AES_256_CBC		3
+#define FS_ENCRYPTION_MODE_AES_256_CTS		4
+
 struct fscrypt_policy {
 	__u8 version;
 	__u8 contents_encryption_mode;
-- 
cgit v1.2.3


From 1400451f04f2ff28b658b92557495e5090914aee Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Tue, 6 Dec 2016 23:53:55 +0100
Subject: fscrypt: Cleanup fscrypt_{decrypt,encrypt}_page()

- Improve documentation
- Add BUG_ON(len == 0) to avoid accidental switch of offs and len
parameters
- Improve variable names for readability

Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/crypto.c       | 93 +++++++++++++++++++++++++++---------------------
 include/linux/fscrypto.h |  8 ++---
 2 files changed, 56 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index f287f76cc906..8c104e712bb2 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -147,9 +147,9 @@ typedef enum {
 } fscrypt_direction_t;
 
 static int do_page_crypto(const struct inode *inode,
-			fscrypt_direction_t rw, pgoff_t index,
+			fscrypt_direction_t rw, u64 lblk_num,
 			struct page *src_page, struct page *dest_page,
-			unsigned int src_len, unsigned int src_offset,
+			unsigned int len, unsigned int offs,
 			gfp_t gfp_flags)
 {
 	struct {
@@ -163,6 +163,8 @@ static int do_page_crypto(const struct inode *inode,
 	struct crypto_skcipher *tfm = ci->ci_ctfm;
 	int res = 0;
 
+	BUG_ON(len == 0);
+
 	req = skcipher_request_alloc(tfm, gfp_flags);
 	if (!req) {
 		printk_ratelimited(KERN_ERR
@@ -176,14 +178,14 @@ static int do_page_crypto(const struct inode *inode,
 		page_crypt_complete, &ecr);
 
 	BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE);
-	xts_tweak.index = cpu_to_le64(index);
+	xts_tweak.index = cpu_to_le64(lblk_num);
 	memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding));
 
 	sg_init_table(&dst, 1);
-	sg_set_page(&dst, dest_page, src_len, src_offset);
+	sg_set_page(&dst, dest_page, len, offs);
 	sg_init_table(&src, 1);
-	sg_set_page(&src, src_page, src_len, src_offset);
-	skcipher_request_set_crypt(req, &src, &dst, src_len, &xts_tweak);
+	sg_set_page(&src, src_page, len, offs);
+	skcipher_request_set_crypt(req, &src, &dst, len, &xts_tweak);
 	if (rw == FS_DECRYPT)
 		res = crypto_skcipher_decrypt(req);
 	else
@@ -214,44 +216,53 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
 
 /**
  * fscypt_encrypt_page() - Encrypts a page
- * @inode:            The inode for which the encryption should take place
- * @plaintext_page:   The page to encrypt. Must be locked.
- * @plaintext_len:    Length of plaintext within page
- * @plaintext_offset: Offset of plaintext within page
- * @index:            Index for encryption. This is mainly the page index, but
- *                    but might be different for multiple calls on same page.
- * @gfp_flags:        The gfp flag for memory allocation
+ * @inode:     The inode for which the encryption should take place
+ * @page:      The page to encrypt. Must be locked for bounce-page
+ *             encryption.
+ * @len:       Length of data to encrypt in @page and encrypted
+ *             data in returned page.
+ * @offs:      Offset of data within @page and returned
+ *             page holding encrypted data.
+ * @lblk_num:  Logical block number. This must be unique for multiple
+ *             calls with same inode, except when overwriting
+ *             previously written data.
+ * @gfp_flags: The gfp flag for memory allocation
  *
- * Encrypts plaintext_page using the ctx encryption context. If
- * the filesystem supports it, encryption is performed in-place, otherwise a
- * new ciphertext_page is allocated and returned.
+ * Encrypts @page using the ctx encryption context. Performs encryption
+ * either in-place or into a newly allocated bounce page.
+ * Called on the page write path.
  *
- * Called on the page write path.  The caller must call
+ * Bounce page allocation is the default.
+ * In this case, the contents of @page are encrypted and stored in an
+ * allocated bounce page. @page has to be locked and the caller must call
  * fscrypt_restore_control_page() on the returned ciphertext page to
  * release the bounce buffer and the encryption context.
  *
- * Return: An allocated page with the encrypted content on success. Else, an
+ * In-place encryption is used by setting the FS_CFLG_INPLACE_ENCRYPTION flag in
+ * fscrypt_operations. Here, the input-page is returned with its content
+ * encrypted.
+ *
+ * Return: A page with the encrypted content on success. Else, an
  * error value or NULL.
  */
 struct page *fscrypt_encrypt_page(const struct inode *inode,
-				struct page *plaintext_page,
-				unsigned int plaintext_len,
-				unsigned int plaintext_offset,
-				pgoff_t index, gfp_t gfp_flags)
+				struct page *page,
+				unsigned int len,
+				unsigned int offs,
+				u64 lblk_num, gfp_t gfp_flags)
 
 {
 	struct fscrypt_ctx *ctx;
-	struct page *ciphertext_page = plaintext_page;
+	struct page *ciphertext_page = page;
 	int err;
 
-	BUG_ON(plaintext_len % FS_CRYPTO_BLOCK_SIZE != 0);
+	BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0);
 
 	if (inode->i_sb->s_cop->flags & FS_CFLG_INPLACE_ENCRYPTION) {
 		/* with inplace-encryption we just encrypt the page */
-		err = do_page_crypto(inode, FS_ENCRYPT, index,
-					plaintext_page, ciphertext_page,
-					plaintext_len, plaintext_offset,
-					gfp_flags);
+		err = do_page_crypto(inode, FS_ENCRYPT, lblk_num,
+					page, ciphertext_page,
+					len, offs, gfp_flags);
 		if (err)
 			return ERR_PTR(err);
 
@@ -267,11 +278,10 @@ struct page *fscrypt_encrypt_page(const struct inode *inode,
 	if (IS_ERR(ciphertext_page))
 		goto errout;
 
-	ctx->w.control_page = plaintext_page;
-	err = do_page_crypto(inode, FS_ENCRYPT, index,
-					plaintext_page, ciphertext_page,
-					plaintext_len, plaintext_offset,
-					gfp_flags);
+	ctx->w.control_page = page;
+	err = do_page_crypto(inode, FS_ENCRYPT, lblk_num,
+					page, ciphertext_page,
+					len, offs, gfp_flags);
 	if (err) {
 		ciphertext_page = ERR_PTR(err);
 		goto errout;
@@ -289,11 +299,12 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
 
 /**
  * fscrypt_decrypt_page() - Decrypts a page in-place
- * @inode: Encrypted inode to decrypt.
- * @page:  The page to decrypt. Must be locked.
- * @len:   Number of bytes in @page to be decrypted.
- * @offs:  Start of data in @page.
- * @index: Index for encryption.
+ * @inode:     The corresponding inode for the page to decrypt.
+ * @page:      The page to decrypt. Must be locked in case
+ *             it is a writeback page.
+ * @len:       Number of bytes in @page to be decrypted.
+ * @offs:      Start of data in @page.
+ * @lblk_num:  Logical block number.
  *
  * Decrypts page in-place using the ctx encryption context.
  *
@@ -302,10 +313,10 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
  * Return: Zero on success, non-zero otherwise.
  */
 int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
-			unsigned int len, unsigned int offs, pgoff_t index)
+			unsigned int len, unsigned int offs, u64 lblk_num)
 {
-	return do_page_crypto(inode, FS_DECRYPT, index, page, page, len, offs,
-			GFP_NOFS);
+	return do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page, len,
+			offs, GFP_NOFS);
 }
 EXPORT_SYMBOL(fscrypt_decrypt_page);
 
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 42ef82d60790..2d9abfa22b94 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -173,9 +173,9 @@ extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
 extern void fscrypt_release_ctx(struct fscrypt_ctx *);
 extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
 						unsigned int, unsigned int,
-						pgoff_t, gfp_t);
+						u64, gfp_t);
 extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
-				unsigned int, pgoff_t);
+				unsigned int, u64);
 extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
 extern void fscrypt_pullback_bio_page(struct page **, bool);
 extern void fscrypt_restore_control_page(struct page *);
@@ -221,14 +221,14 @@ static inline struct page *fscrypt_notsupp_encrypt_page(const struct inode *i,
 						struct page *p,
 						unsigned int len,
 						unsigned int offs,
-						pgoff_t index, gfp_t f)
+						u64 lblk_num, gfp_t f)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline int fscrypt_notsupp_decrypt_page(const struct inode *i, struct page *p,
 						unsigned int len, unsigned int offs,
-						pgoff_t index)
+						u64 lblk_num)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From bd7b8290388dd58a8c0a3710b171e58ef952ca4d Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Tue, 6 Dec 2016 23:53:56 +0100
Subject: fscrypt: Cleanup page locking requirements for
 fscrypt_{decrypt,encrypt}_page()

Rename the FS_CFLG_INPLACE_ENCRYPTION flag to FS_CFLG_OWN_PAGES which,
when set, indicates that the fs uses pages under its own control as
opposed to writeback pages which require locking and a bounce buffer for
encryption.

Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/crypto.c       | 11 ++++++++---
 fs/ext4/inode.c          |  1 -
 fs/f2fs/data.c           |  1 -
 include/linux/fscrypto.h |  2 +-
 4 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 8c104e712bb2..5ffc59436397 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -238,7 +238,7 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
  * fscrypt_restore_control_page() on the returned ciphertext page to
  * release the bounce buffer and the encryption context.
  *
- * In-place encryption is used by setting the FS_CFLG_INPLACE_ENCRYPTION flag in
+ * In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in
  * fscrypt_operations. Here, the input-page is returned with its content
  * encrypted.
  *
@@ -258,7 +258,7 @@ struct page *fscrypt_encrypt_page(const struct inode *inode,
 
 	BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0);
 
-	if (inode->i_sb->s_cop->flags & FS_CFLG_INPLACE_ENCRYPTION) {
+	if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) {
 		/* with inplace-encryption we just encrypt the page */
 		err = do_page_crypto(inode, FS_ENCRYPT, lblk_num,
 					page, ciphertext_page,
@@ -269,6 +269,8 @@ struct page *fscrypt_encrypt_page(const struct inode *inode,
 		return ciphertext_page;
 	}
 
+	BUG_ON(!PageLocked(page));
+
 	ctx = fscrypt_get_ctx(inode, gfp_flags);
 	if (IS_ERR(ctx))
 		return (struct page *)ctx;
@@ -301,7 +303,7 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
  * fscrypt_decrypt_page() - Decrypts a page in-place
  * @inode:     The corresponding inode for the page to decrypt.
  * @page:      The page to decrypt. Must be locked in case
- *             it is a writeback page.
+ *             it is a writeback page (FS_CFLG_OWN_PAGES unset).
  * @len:       Number of bytes in @page to be decrypted.
  * @offs:      Start of data in @page.
  * @lblk_num:  Logical block number.
@@ -315,6 +317,9 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
 int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
 			unsigned int len, unsigned int offs, u64 lblk_num)
 {
+	if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES))
+		BUG_ON(!PageLocked(page));
+
 	return do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page, len,
 			offs, GFP_NOFS);
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1485ac273bfb..fb2b514f675b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3744,7 +3744,6 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 			/* We expect the key to be set. */
 			BUG_ON(!fscrypt_has_encryption_key(inode));
 			BUG_ON(blocksize != PAGE_SIZE);
-			BUG_ON(!PageLocked(page));
 			WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
 						page, PAGE_SIZE, 0, page->index));
 		}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 435590c4b341..9f0ba90b92e4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1194,7 +1194,6 @@ int do_write_data_page(struct f2fs_io_info *fio)
 		f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
 							fio->old_blkaddr);
 retry_encrypt:
-		BUG_ON(!PageLocked(fio->page));
 		fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
 							PAGE_SIZE, 0,
 							fio->page->index,
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 2d9abfa22b94..188b4fa59cbf 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -80,7 +80,7 @@ struct fscrypt_name {
 /*
  * fscrypt superblock flags
  */
-#define FS_CFLG_INPLACE_ENCRYPTION (1U << 1)
+#define FS_CFLG_OWN_PAGES (1U << 1)
 
 /*
  * crypto opertions for filesystems
-- 
cgit v1.2.3


From f32d7ac20a5864483c1f96e4970daa083e18bfd1 Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Tue, 6 Dec 2016 23:53:57 +0100
Subject: fscrypt: Delay bounce page pool allocation until needed

Since fscrypt users can now indicated if fscrypt_encrypt_page() should
use a bounce page, we can delay the bounce page pool initialization util
it is really needed. That is until fscrypt_operations has no
FS_CFLG_OWN_PAGES flag set.

Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/crypto.c          | 9 +++++++--
 fs/crypto/fscrypt_private.h | 2 +-
 fs/crypto/keyinfo.c         | 2 +-
 include/linux/fscrypto.h    | 1 -
 4 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 5ffc59436397..bc1d4781b9ec 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -525,17 +525,22 @@ static void fscrypt_destroy(void)
 
 /**
  * fscrypt_initialize() - allocate major buffers for fs encryption.
+ * @cop_flags:  fscrypt operations flags
  *
  * We only call this when we start accessing encrypted files, since it
  * results in memory getting allocated that wouldn't otherwise be used.
  *
  * Return: Zero on success, non-zero otherwise.
  */
-int fscrypt_initialize(void)
+int fscrypt_initialize(unsigned int cop_flags)
 {
 	int i, res = -ENOMEM;
 
-	if (fscrypt_bounce_page_pool)
+	/*
+	 * No need to allocate a bounce page pool if there already is one or
+	 * this FS won't use it.
+	 */
+	if (cop_flags & FS_CFLG_OWN_PAGES || fscrypt_bounce_page_pool)
 		return 0;
 
 	mutex_lock(&fscrypt_init_mutex);
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index c98b2a7fb6d3..7ba10cd45a2e 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -85,7 +85,7 @@ struct fscrypt_completion_result {
 
 
 /* crypto.c */
-int fscrypt_initialize(void);
+int fscrypt_initialize(unsigned int cop_flags);
 
 /* keyinfo.c */
 extern int fscrypt_get_crypt_info(struct inode *);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 35d3317a27b3..6eeea1dcba41 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -188,7 +188,7 @@ int fscrypt_get_crypt_info(struct inode *inode)
 	u8 *raw_key = NULL;
 	int res;
 
-	res = fscrypt_initialize();
+	res = fscrypt_initialize(inode->i_sb->s_cop->flags);
 	if (res)
 		return res;
 
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 188b4fa59cbf..1adc1c758d31 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -168,7 +168,6 @@ static inline void fscrypt_set_d_op(struct dentry *dentry)
 #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
 /* crypto.c */
 extern struct kmem_cache *fscrypt_info_cachep;
-
 extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
 extern void fscrypt_release_ctx(struct fscrypt_ctx *);
 extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
-- 
cgit v1.2.3


From 6a34e4d2be07255cc59e6ccaf820669cfd7f815c Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Tue, 6 Dec 2016 23:53:58 +0100
Subject: fscrypt: Rename FS_WRITE_PATH_FL to FS_CTX_HAS_BOUNCE_BUFFER_FL

... to better explain its purpose after introducing in-place encryption
without bounce buffer.

Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/crypto.c          | 6 +++---
 fs/crypto/fscrypt_private.h | 2 +-
 include/linux/fscrypto.h    | 1 -
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index bc1d4781b9ec..ac8e4f6a3773 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -63,7 +63,7 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
 {
 	unsigned long flags;
 
-	if (ctx->flags & FS_WRITE_PATH_FL && ctx->w.bounce_page) {
+	if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) {
 		mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool);
 		ctx->w.bounce_page = NULL;
 	}
@@ -121,7 +121,7 @@ struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags)
 	} else {
 		ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
 	}
-	ctx->flags &= ~FS_WRITE_PATH_FL;
+	ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL;
 	return ctx;
 }
 EXPORT_SYMBOL(fscrypt_get_ctx);
@@ -210,7 +210,7 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
 	ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
 	if (ctx->w.bounce_page == NULL)
 		return ERR_PTR(-ENOMEM);
-	ctx->flags |= FS_WRITE_PATH_FL;
+	ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL;
 	return ctx->w.bounce_page;
 }
 
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 7ba10cd45a2e..aeab032d7d35 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -72,7 +72,7 @@ struct fscrypt_info {
 };
 
 #define FS_CTX_REQUIRES_FREE_ENCRYPT_FL		0x00000001
-#define FS_WRITE_PATH_FL			0x00000002
+#define FS_CTX_HAS_BOUNCE_BUFFER_FL		0x00000002
 
 struct fscrypt_completion_result {
 	struct completion completion;
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 1adc1c758d31..c074b670aa99 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -21,7 +21,6 @@
 #define FS_CRYPTO_BLOCK_SIZE		16
 
 struct fscrypt_info;
-struct fscrypt_ctx;
 
 struct fscrypt_ctx {
 	union {
-- 
cgit v1.2.3


From e8f4aa6087fa80732382881ef7c0c96733bb1984 Mon Sep 17 00:00:00 2001
From: Allen Pais <allen.pais@oracle.com>
Date: Thu, 13 Oct 2016 10:06:13 +0530
Subject: sparc64:Support User Probes for sparc

Signed-off-by: Eric Saint Etienne <eric.saint.etienne@oracle.com>
Signed-off-by: Allen Pais <allen.pais@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/Kconfig                            |   1 +
 arch/sparc/Kconfig                      |   3 +
 arch/sparc/configs/sparc64_defconfig    |   1 +
 arch/sparc/include/asm/kdebug_64.h      |   2 +
 arch/sparc/include/asm/ptrace.h         |  35 +++-
 arch/sparc/include/asm/thread_info_64.h |   6 +-
 arch/sparc/include/asm/ttable.h         |   6 +
 arch/sparc/include/asm/uprobes.h        |  59 ++++++
 arch/sparc/kernel/Makefile              |   1 +
 arch/sparc/kernel/ptrace_64.c           |  54 ++++++
 arch/sparc/kernel/signal_64.c           |   2 +
 arch/sparc/kernel/ttable_64.S           |   2 +-
 arch/sparc/kernel/uprobes.c             | 331 ++++++++++++++++++++++++++++++++
 include/linux/uprobes.h                 |   1 +
 14 files changed, 500 insertions(+), 4 deletions(-)
 create mode 100644 arch/sparc/include/asm/uprobes.h
 create mode 100644 arch/sparc/kernel/uprobes.c

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 659bdd079277..44a44b49eb3a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -96,6 +96,7 @@ config KPROBES_ON_FTRACE
 
 config UPROBES
 	def_bool n
+	depends on ARCH_SUPPORTS_UPROBES
 	help
 	  Uprobes is the user-space counterpart to kprobes: they
 	  enable instrumentation applications (such as 'perf probe')
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 165ecdd24d22..cf4034c66362 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -155,6 +155,9 @@ config PGTABLE_LEVELS
 	default 4 if 64BIT
 	default 3
 
+config ARCH_SUPPORTS_UPROBES
+	def_bool y if SPARC64
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 3583d676a916..b2e650d1764f 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -213,6 +213,7 @@ CONFIG_SCHEDSTATS=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_KEYS=y
 CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_TEST=m
diff --git a/arch/sparc/include/asm/kdebug_64.h b/arch/sparc/include/asm/kdebug_64.h
index 04465de8f3b5..867286bf7b1a 100644
--- a/arch/sparc/include/asm/kdebug_64.h
+++ b/arch/sparc/include/asm/kdebug_64.h
@@ -10,6 +10,8 @@ enum die_val {
 	DIE_OOPS = 1,
 	DIE_DEBUG,	/* ta 0x70 */
 	DIE_DEBUG_2,	/* ta 0x71 */
+	DIE_BPT,	/* ta 0x73 */
+	DIE_SSTEP,	/* ta 0x74 */
 	DIE_DIE,
 	DIE_TRAP,
 	DIE_TRAP_TL1,
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index bac6a946ee00..ca57f08bd3db 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -61,7 +61,10 @@ extern union global_cpu_snapshot global_cpu_snapshot[NR_CPUS];
 #define force_successful_syscall_return() set_thread_noerror(1)
 #define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV))
 #define instruction_pointer(regs) ((regs)->tpc)
-#define instruction_pointer_set(regs, val) ((regs)->tpc = (val))
+#define instruction_pointer_set(regs, val) do { \
+		(regs)->tpc = (val); \
+		(regs)->tnpc = (val)+4; \
+	} while (0)
 #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
 static inline int is_syscall_success(struct pt_regs *regs)
 {
@@ -77,6 +80,36 @@ unsigned long profile_pc(struct pt_regs *);
 #else
 #define profile_pc(regs) instruction_pointer(regs)
 #endif
+
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, magic))
+
+extern int regs_query_register_offset(const char *name);
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs:	pt_regs from which register value is gotten
+ * @offset:	offset number of the register.
+ *
+ * regs_get_register returns the value of a register whose
+ * offset from @regs. The @offset is the offset of the register
+ * in struct pt_regs. If @offset is bigger than MAX_REG_OFFSET,
+ * this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+					     unsigned long offset)
+{
+	if (unlikely(offset >= MAX_REG_OFFSET))
+		return 0;
+	if (offset == PT_V9_Y)
+		return *(unsigned int *)((unsigned long)regs + offset);
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+/* Valid only for Kernel mode traps. */
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+	return regs->u_regs[UREG_I6];
+}
 #else /* __ASSEMBLY__ */
 #endif /* __ASSEMBLY__ */
 #else /* (defined(__sparc__) && defined(__arch64__)) */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 3d7b925f6516..38a24f257b85 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -180,7 +180,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 /* flag bit 4 is available */
 #define TIF_UNALIGNED		5	/* allowed to do unaligned accesses */
-/* flag bit 6 is available */
+#define TIF_UPROBE		6	/* breakpointed or singlestepped */
 #define TIF_32BIT		7	/* 32-bit binary */
 #define TIF_NOHZ		8	/* in adaptive nohz mode */
 #define TIF_SECCOMP		9	/* secure computing */
@@ -199,6 +199,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_UNALIGNED		(1<<TIF_UNALIGNED)
+#define _TIF_UPROBE		(1<<TIF_UPROBE)
 #define _TIF_32BIT		(1<<TIF_32BIT)
 #define _TIF_NOHZ		(1<<TIF_NOHZ)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
@@ -209,7 +210,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define _TIF_USER_WORK_MASK	((0xff << TI_FLAG_WSAVED_SHIFT) | \
 				 _TIF_DO_NOTIFY_RESUME_MASK | \
 				 _TIF_NEED_RESCHED)
-#define _TIF_DO_NOTIFY_RESUME_MASK	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING)
+#define _TIF_DO_NOTIFY_RESUME_MASK	(_TIF_NOTIFY_RESUME | \
+					 _TIF_SIGPENDING | _TIF_UPROBE)
 
 #define is_32bit_task()	(test_thread_flag(TIF_32BIT))
 
diff --git a/arch/sparc/include/asm/ttable.h b/arch/sparc/include/asm/ttable.h
index 781b9f1dbdc2..82e7df296abc 100644
--- a/arch/sparc/include/asm/ttable.h
+++ b/arch/sparc/include/asm/ttable.h
@@ -186,6 +186,12 @@
 #define KPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl)
 #endif
 
+#ifdef CONFIG_UPROBES
+#define UPROBES_TRAP(lvl) TRAP_ARG(uprobe_trap, lvl)
+#else
+#define UPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl)
+#endif
+
 #ifdef CONFIG_KGDB
 #define KGDB_TRAP(lvl) TRAP_IRQ(kgdb_trap, lvl)
 #else
diff --git a/arch/sparc/include/asm/uprobes.h b/arch/sparc/include/asm/uprobes.h
new file mode 100644
index 000000000000..f87aae5a908e
--- /dev/null
+++ b/arch/sparc/include/asm/uprobes.h
@@ -0,0 +1,59 @@
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+/*
+ * User-space Probes (UProbes) for sparc
+ *
+ * Copyright (C) 2013 Oracle, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authors:
+ *     Jose E. Marchesi <jose.marchesi@oracle.com>
+ *	Eric Saint Etienne <eric.saint.etienne@oracle.com>
+ */
+
+typedef u32 uprobe_opcode_t;
+
+#define MAX_UINSN_BYTES		4
+#define UPROBE_XOL_SLOT_BYTES	(MAX_UINSN_BYTES * 2)
+
+#define UPROBE_SWBP_INSN_SIZE	4
+#define UPROBE_SWBP_INSN	0x91d02073 /* ta 0x73 */
+#define UPROBE_STP_INSN		0x91d02074 /* ta 0x74 */
+
+#define ANNUL_BIT (1 << 29)
+
+struct arch_uprobe {
+	union {
+		u8  insn[MAX_UINSN_BYTES];
+		u32 ixol;
+	};
+};
+
+struct arch_uprobe_task {
+	u32 saved_tpc;
+	u32 saved_tnpc;
+};
+
+struct task_struct;
+struct notifier_block;
+
+extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
+extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
+extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+
+#endif	/* _ASM_UPROBES_H */
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index fa3c02d41138..aac609889ee4 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -116,4 +116,5 @@ obj-$(CONFIG_COMPAT)    += $(audit--y)
 pc--$(CONFIG_PERF_EVENTS) := perf_event.o
 obj-$(CONFIG_SPARC64)	+= $(pc--y)
 
+obj-$(CONFIG_UPROBES)	+= uprobes.o
 obj-$(CONFIG_SPARC64)	+= jump_label.o
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index ac082dd8c67d..96494b2ef41f 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -46,6 +46,43 @@
 
 /* #define ALLOW_INIT_TRACING */
 
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define REG_OFFSET_NAME(n, r) \
+	{.name = n, .offset = (PT_V9_##r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+	REG_OFFSET_NAME("g0", G0),
+	REG_OFFSET_NAME("g1", G1),
+	REG_OFFSET_NAME("g2", G2),
+	REG_OFFSET_NAME("g3", G3),
+	REG_OFFSET_NAME("g4", G4),
+	REG_OFFSET_NAME("g5", G5),
+	REG_OFFSET_NAME("g6", G6),
+	REG_OFFSET_NAME("g7", G7),
+
+	REG_OFFSET_NAME("i0", I0),
+	REG_OFFSET_NAME("i1", I1),
+	REG_OFFSET_NAME("i2", I2),
+	REG_OFFSET_NAME("i3", I3),
+	REG_OFFSET_NAME("i4", I4),
+	REG_OFFSET_NAME("i5", I5),
+	REG_OFFSET_NAME("i6", I6),
+	REG_OFFSET_NAME("i7", I7),
+
+	REG_OFFSET_NAME("tstate", TSTATE),
+	REG_OFFSET_NAME("pc", TPC),
+	REG_OFFSET_NAME("npc", TNPC),
+	REG_OFFSET_NAME("y", Y),
+	REG_OFFSET_NAME("lr", I7),
+
+	REG_OFFSET_END,
+};
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -1107,3 +1144,20 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 	if (test_thread_flag(TIF_NOHZ))
 		user_enter();
 }
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:	the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 5ee930c48f4c..c782c9b716db 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -545,6 +545,8 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
 {
 	user_exit();
+	if (thread_info_flags & _TIF_UPROBE)
+		uprobe_notify_resume(regs);
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs, orig_i0);
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S
index c6dfdaa29e20..7bd8f6556352 100644
--- a/arch/sparc/kernel/ttable_64.S
+++ b/arch/sparc/kernel/ttable_64.S
@@ -165,7 +165,7 @@ tl0_resv169:	BTRAP(0x169) BTRAP(0x16a) BTRAP(0x16b) BTRAP(0x16c)
 tl0_linux64:	LINUX_64BIT_SYSCALL_TRAP
 tl0_gsctx:	TRAP(sparc64_get_context) TRAP(sparc64_set_context)
 tl0_resv170:	KPROBES_TRAP(0x170) KPROBES_TRAP(0x171) KGDB_TRAP(0x172)
-tl0_resv173:	BTRAP(0x173) BTRAP(0x174) BTRAP(0x175) BTRAP(0x176) BTRAP(0x177)
+tl0_resv173:	UPROBES_TRAP(0x173) UPROBES_TRAP(0x174) BTRAP(0x175) BTRAP(0x176) BTRAP(0x177)
 tl0_resv178:	BTRAP(0x178) BTRAP(0x179) BTRAP(0x17a) BTRAP(0x17b) BTRAP(0x17c)
 tl0_resv17d:	BTRAP(0x17d) BTRAP(0x17e) BTRAP(0x17f)
 #define BTRAPS(x) BTRAP(x) BTRAP(x+1) BTRAP(x+2) BTRAP(x+3) BTRAP(x+4) BTRAP(x+5) BTRAP(x+6) BTRAP(x+7)
diff --git a/arch/sparc/kernel/uprobes.c b/arch/sparc/kernel/uprobes.c
new file mode 100644
index 000000000000..b68314050602
--- /dev/null
+++ b/arch/sparc/kernel/uprobes.c
@@ -0,0 +1,331 @@
+/*
+ * User-space Probes (UProbes) for sparc
+ *
+ * Copyright (C) 2013 Oracle Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authors:
+ *	Jose E. Marchesi <jose.marchesi@oracle.com>
+ *	Eric Saint Etienne <eric.saint.etienne@oracle.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/highmem.h>
+#include <linux/uprobes.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h> /* For struct task_struct */
+#include <linux/kdebug.h>
+
+#include <asm/cacheflush.h>
+#include <asm/uaccess.h>
+
+/* Compute the address of the breakpoint instruction and return it.
+ *
+ * Note that uprobe_get_swbp_addr is defined as a weak symbol in
+ * kernel/events/uprobe.c.
+ */
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+	return instruction_pointer(regs);
+}
+
+static void copy_to_page(struct page *page, unsigned long vaddr,
+			 const void *src, int len)
+{
+	void *kaddr = kmap_atomic(page);
+
+	memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
+	kunmap_atomic(kaddr);
+}
+
+/* Fill in the xol area with the probed instruction followed by the
+ * single-step trap.  Some fixups in the copied instruction are
+ * performed at this point.
+ *
+ * Note that uprobe_xol_copy is defined as a weak symbol in
+ * kernel/events/uprobe.c.
+ */
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+			   void *src, unsigned long len)
+{
+	const u32 stp_insn = UPROBE_STP_INSN;
+	u32 insn = *(u32 *) src;
+
+	/* Branches annulling their delay slot must be fixed to not do
+	 * so.  Clearing the annul bit on these instructions we can be
+	 * sure the single-step breakpoint in the XOL slot will be
+	 * executed.
+	 */
+
+	u32 op = (insn >> 30) & 0x3;
+	u32 op2 = (insn >> 22) & 0x7;
+
+	if (op == 0 &&
+	    (op2 == 1 || op2 == 2 || op2 == 3 || op2 == 5 || op2 == 6) &&
+	    (insn & ANNUL_BIT) == ANNUL_BIT)
+		insn &= ~ANNUL_BIT;
+
+	copy_to_page(page, vaddr, &insn, len);
+	copy_to_page(page, vaddr+len, &stp_insn, 4);
+}
+
+
+/* Instruction analysis/validity.
+ *
+ * This function returns 0 on success or a -ve number on error.
+ */
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
+			     struct mm_struct *mm, unsigned long addr)
+{
+	/* Any unsupported instruction?  Then return -EINVAL  */
+	return 0;
+}
+
+/* If INSN is a relative control transfer instruction, return the
+ * corrected branch destination value.
+ *
+ * Note that regs->tpc and regs->tnpc still hold the values of the
+ * program counters at the time of the single-step trap due to the
+ * execution of the UPROBE_STP_INSN at utask->xol_vaddr + 4.
+ *
+ */
+static unsigned long relbranch_fixup(u32 insn, struct uprobe_task *utask,
+				     struct pt_regs *regs)
+{
+	/* Branch not taken, no mods necessary.  */
+	if (regs->tnpc == regs->tpc + 0x4UL)
+		return utask->autask.saved_tnpc + 0x4UL;
+
+	/* The three cases are call, branch w/prediction,
+	 * and traditional branch.
+	 */
+	if ((insn & 0xc0000000) == 0x40000000 ||
+	    (insn & 0xc1c00000) == 0x00400000 ||
+	    (insn & 0xc1c00000) == 0x00800000) {
+		unsigned long real_pc = (unsigned long) utask->vaddr;
+		unsigned long ixol_addr = utask->xol_vaddr;
+
+		/* The instruction did all the work for us
+		 * already, just apply the offset to the correct
+		 * instruction location.
+		 */
+		return (real_pc + (regs->tnpc - ixol_addr));
+	}
+
+	/* It is jmpl or some other absolute PC modification instruction,
+	 * leave NPC as-is.
+	 */
+	return regs->tnpc;
+}
+
+/* If INSN is an instruction which writes its PC location
+ * into a destination register, fix that up.
+ */
+static int retpc_fixup(struct pt_regs *regs, u32 insn,
+		       unsigned long real_pc)
+{
+	unsigned long *slot = NULL;
+	int rc = 0;
+
+	/* Simplest case is 'call', which always uses %o7 */
+	if ((insn & 0xc0000000) == 0x40000000)
+		slot = &regs->u_regs[UREG_I7];
+
+	/* 'jmpl' encodes the register inside of the opcode */
+	if ((insn & 0xc1f80000) == 0x81c00000) {
+		unsigned long rd = ((insn >> 25) & 0x1f);
+
+		if (rd <= 15) {
+			slot = &regs->u_regs[rd];
+		} else {
+			unsigned long fp = regs->u_regs[UREG_FP];
+			/* Hard case, it goes onto the stack. */
+			flushw_all();
+
+			rd -= 16;
+			if (test_thread_64bit_stack(fp)) {
+				unsigned long __user *uslot =
+			(unsigned long __user *) (fp + STACK_BIAS) + rd;
+				rc = __put_user(real_pc, uslot);
+			} else {
+				unsigned int __user *uslot = (unsigned int
+						__user *) fp + rd;
+				rc = __put_user((u32) real_pc, uslot);
+			}
+		}
+	}
+	if (slot != NULL)
+		*slot = real_pc;
+	return rc;
+}
+
+/* Single-stepping can be avoided for certain instructions: NOPs and
+ * instructions that can be emulated.  This function determines
+ * whether the instruction where the uprobe is installed falls in one
+ * of these cases and emulates it.
+ *
+ * This function returns true if the single-stepping can be skipped,
+ * false otherwise.
+ */
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	/* We currently only emulate NOP instructions.
+	 */
+
+	if (auprobe->ixol == (1 << 24)) {
+		regs->tnpc += 4;
+		regs->tpc += 4;
+		return true;
+	}
+
+	return false;
+}
+
+/* Prepare to execute out of line.  At this point
+ * current->utask->xol_vaddr points to an allocated XOL slot properly
+ * initialized with the original instruction and the single-stepping
+ * trap instruction.
+ *
+ * This function returns 0 on success, any other number on error.
+ */
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+	struct arch_uprobe_task *autask = &current->utask->autask;
+
+	/* Save the current program counters so they can be restored
+	 * later.
+	 */
+	autask->saved_tpc = regs->tpc;
+	autask->saved_tnpc = regs->tnpc;
+
+	/* Adjust PC and NPC so the first instruction in the XOL slot
+	 * will be executed by the user task.
+	 */
+	instruction_pointer_set(regs, utask->xol_vaddr);
+
+	return 0;
+}
+
+/* Prepare to resume execution after the single-step.  Called after
+ * single-stepping. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.
+ *
+ * This function returns 0 on success, any other number on error.
+ */
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+	struct arch_uprobe_task *autask = &utask->autask;
+	u32 insn = auprobe->ixol;
+	int rc = 0;
+
+	if (utask->state == UTASK_SSTEP_ACK) {
+		regs->tnpc = relbranch_fixup(insn, utask, regs);
+		regs->tpc = autask->saved_tnpc;
+		rc =  retpc_fixup(regs, insn, (unsigned long) utask->vaddr);
+	} else {
+		regs->tnpc = utask->vaddr+4;
+		regs->tpc = autask->saved_tnpc+4;
+	}
+	return rc;
+}
+
+/* Handler for uprobe traps.  This is called from the traps table and
+ * triggers the proper die notification.
+ */
+asmlinkage void uprobe_trap(struct pt_regs *regs,
+			    unsigned long trap_level)
+{
+	BUG_ON(trap_level != 0x173 && trap_level != 0x174);
+
+	/* We are only interested in user-mode code.  Uprobe traps
+	 * shall not be present in kernel code.
+	 */
+	if (!user_mode(regs)) {
+		local_irq_enable();
+		bad_trap(regs, trap_level);
+		return;
+	}
+
+	/* trap_level == 0x173 --> ta 0x73
+	 * trap_level == 0x174 --> ta 0x74
+	 */
+	if (notify_die((trap_level == 0x173) ? DIE_BPT : DIE_SSTEP,
+				(trap_level == 0x173) ? "bpt" : "sstep",
+				regs, 0, trap_level, SIGTRAP) != NOTIFY_STOP)
+		bad_trap(regs, trap_level);
+}
+
+/* Callback routine for handling die notifications.
+*/
+int arch_uprobe_exception_notify(struct notifier_block *self,
+				 unsigned long val, void *data)
+{
+	int ret = NOTIFY_DONE;
+	struct die_args *args = (struct die_args *)data;
+
+	/* We are only interested in userspace traps */
+	if (args->regs && !user_mode(args->regs))
+		return NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_BPT:
+		if (uprobe_pre_sstep_notifier(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+
+	case DIE_SSTEP:
+		if (uprobe_post_sstep_notifier(args->regs))
+			ret = NOTIFY_STOP;
+
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/* This function gets called when a XOL instruction either gets
+ * trapped or the thread has a fatal signal, so reset the instruction
+ * pointer to its probed address.
+ */
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	instruction_pointer_set(regs, utask->vaddr);
+}
+
+/* If xol insn itself traps and generates a signal(Say,
+ * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped
+ * instruction jumps back to its own address.
+ */
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+	return false;
+}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
+				  struct pt_regs *regs)
+{
+	unsigned long orig_ret_vaddr = regs->u_regs[UREG_I7];
+
+	regs->u_regs[UREG_I7] = trampoline_vaddr-8;
+
+	return orig_ret_vaddr + 8;
+}
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4a29c75b146e..0a294e950df8 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -27,6 +27,7 @@
 #include <linux/errno.h>
 #include <linux/rbtree.h>
 #include <linux/types.h>
+#include <linux/wait.h>
 
 struct vm_area_struct;
 struct mm_struct;
-- 
cgit v1.2.3


From 5191d88acc688743eef56f1c598a4e4cddf6c6cd Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick@shmanahar.org>
Date: Sat, 10 Dec 2016 23:27:32 -0800
Subject: Input: synaptics-rmi4 - add support for F34 V7 bootloader

Port firmware update code from Samsung Galaxy S7 driver into
mainline framework.

This patch has been tested on Synaptics S7813.

Signed-off-by: Nick Dyer <nick@shmanahar.org>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/Makefile     |    2 +-
 drivers/input/rmi4/rmi_driver.c |   56 +-
 drivers/input/rmi4/rmi_f34.c    |   40 +-
 drivers/input/rmi4/rmi_f34.h    |  250 ++++++-
 drivers/input/rmi4/rmi_f34v7.c  | 1372 +++++++++++++++++++++++++++++++++++++++
 include/linux/rmi.h             |    2 +-
 6 files changed, 1688 insertions(+), 34 deletions(-)
 create mode 100644 drivers/input/rmi4/rmi_f34v7.c

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/Makefile b/drivers/input/rmi4/Makefile
index a199cbe9c27d..9aaac3dd8613 100644
--- a/drivers/input/rmi4/Makefile
+++ b/drivers/input/rmi4/Makefile
@@ -8,7 +8,7 @@ rmi_core-$(CONFIG_RMI4_F03) += rmi_f03.o
 rmi_core-$(CONFIG_RMI4_F11) += rmi_f11.o
 rmi_core-$(CONFIG_RMI4_F12) += rmi_f12.o
 rmi_core-$(CONFIG_RMI4_F30) += rmi_f30.o
-rmi_core-$(CONFIG_RMI4_F34) += rmi_f34.o
+rmi_core-$(CONFIG_RMI4_F34) += rmi_f34.o rmi_f34v7.o
 rmi_core-$(CONFIG_RMI4_F54) += rmi_f54.o
 rmi_core-$(CONFIG_RMI4_F55) += rmi_f55.o
 
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 05a3c4bc9778..cb6efe693302 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -544,7 +544,7 @@ static int rmi_scan_pdt_page(struct rmi_device *rmi_dev,
 	else
 		*empty_pages = 0;
 
-	return (data->f01_bootloader_mode || *empty_pages >= 2) ?
+	return (data->bootloader_mode || *empty_pages >= 2) ?
 					RMI_SCAN_DONE : RMI_SCAN_CONTINUE;
 }
 
@@ -749,41 +749,49 @@ bool rmi_register_desc_has_subpacket(const struct rmi_register_desc_item *item,
 				subpacket) == subpacket;
 }
 
-/* Indicates that flash programming is enabled (bootloader mode). */
-#define RMI_F01_STATUS_BOOTLOADER(status)	(!!((status) & 0x40))
-
-/*
- * Given the PDT entry for F01, read the device status register to determine
- * if we're stuck in bootloader mode or not.
- *
- */
 static int rmi_check_bootloader_mode(struct rmi_device *rmi_dev,
 				     const struct pdt_entry *pdt)
 {
-	int error;
-	u8 device_status;
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	int ret;
+	u8 status;
 
-	error = rmi_read(rmi_dev, pdt->data_base_addr + pdt->page_start,
-			 &device_status);
-	if (error) {
-		dev_err(&rmi_dev->dev,
-			"Failed to read device status: %d.\n", error);
-		return error;
+	if (pdt->function_number == 0x34 && pdt->function_version > 1) {
+		ret = rmi_read(rmi_dev, pdt->data_base_addr, &status);
+		if (ret) {
+			dev_err(&rmi_dev->dev,
+				"Failed to read F34 status: %d.\n", ret);
+			return ret;
+		}
+
+		if (status & BIT(7))
+			data->bootloader_mode = true;
+	} else if (pdt->function_number == 0x01) {
+		ret = rmi_read(rmi_dev, pdt->data_base_addr, &status);
+		if (ret) {
+			dev_err(&rmi_dev->dev,
+				"Failed to read F01 status: %d.\n", ret);
+			return ret;
+		}
+
+		if (status & BIT(6))
+			data->bootloader_mode = true;
 	}
 
-	return RMI_F01_STATUS_BOOTLOADER(device_status);
+	return 0;
 }
 
 static int rmi_count_irqs(struct rmi_device *rmi_dev,
 			 void *ctx, const struct pdt_entry *pdt)
 {
-	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
 	int *irq_count = ctx;
+	int ret;
 
 	*irq_count += pdt->interrupt_source_count;
-	if (pdt->function_number == 0x01)
-		data->f01_bootloader_mode =
-			rmi_check_bootloader_mode(rmi_dev, pdt);
+
+	ret = rmi_check_bootloader_mode(rmi_dev, pdt);
+	if (ret < 0)
+		return ret;
 
 	return RMI_SCAN_CONTINUE;
 }
@@ -1024,13 +1032,15 @@ int rmi_probe_interrupts(struct rmi_driver_data *data)
 	 */
 	rmi_dbg(RMI_DEBUG_CORE, dev, "%s: Counting IRQs.\n", __func__);
 	irq_count = 0;
+	data->bootloader_mode = false;
+
 	retval = rmi_scan_pdt(rmi_dev, &irq_count, rmi_count_irqs);
 	if (retval < 0) {
 		dev_err(dev, "IRQ counting failed with code %d.\n", retval);
 		return retval;
 	}
 
-	if (data->f01_bootloader_mode)
+	if (data->bootloader_mode)
 		dev_warn(&rmi_dev->dev, "Device in bootloader mode.\n");
 
 	data->irq_count = irq_count;
diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c
index 01936a4a9a6c..9774dfbab9bb 100644
--- a/drivers/input/rmi4/rmi_f34.c
+++ b/drivers/input/rmi4/rmi_f34.c
@@ -12,6 +12,7 @@
 #include <linux/firmware.h>
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
+#include <linux/bitops.h>
 
 #include "rmi_driver.h"
 #include "rmi_f34.h"
@@ -105,6 +106,9 @@ static int rmi_f34_attention(struct rmi_function *fn, unsigned long *irq_bits)
 	struct f34_data *f34 = dev_get_drvdata(&fn->dev);
 	int ret;
 
+	if (f34->bl_version != 5)
+		return 0;
+
 	ret = rmi_read(f34->fn->rmi_dev, f34->v5.ctrl_address, &f34->v5.status);
 	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s: status: %#02x, ret: %d\n",
 		__func__, f34->v5.status, ret);
@@ -292,17 +296,24 @@ static int rmi_firmware_update(struct rmi_driver_data *data,
 		return -EINVAL;
 	}
 
-	/* Only version 0 currently supported */
-	if (data->f34_container->fd.function_version != 0) {
+	f34 = dev_get_drvdata(&data->f34_container->dev);
+
+	if (f34->bl_version == 7) {
+		if (data->pdt_props & HAS_BSR) {
+			dev_err(dev, "%s: LTS not supported\n", __func__);
+			return -ENODEV;
+		}
+	} else if (f34->bl_version != 5) {
 		dev_warn(dev, "F34 V%d not supported!\n",
 			 data->f34_container->fd.function_version);
 		return -ENODEV;
 	}
 
-	f34 = dev_get_drvdata(&data->f34_container->dev);
-
 	/* Enter flash mode */
-	ret = rmi_f34_enable_flash(f34);
+	if (f34->bl_version == 7)
+		ret = rmi_f34v7_start_reflash(f34, fw);
+	else
+		ret = rmi_f34_enable_flash(f34);
 	if (ret)
 		return ret;
 
@@ -319,7 +330,7 @@ static int rmi_firmware_update(struct rmi_driver_data *data,
 	if (ret)
 		return ret;
 
-	if (!data->f01_bootloader_mode || !data->f34_container) {
+	if (!data->bootloader_mode || !data->f34_container) {
 		dev_warn(dev, "%s: No F34 present or not in bootloader!\n",
 				__func__);
 		return -EINVAL;
@@ -330,7 +341,10 @@ static int rmi_firmware_update(struct rmi_driver_data *data,
 	f34 = dev_get_drvdata(&data->f34_container->dev);
 
 	/* Perform firmware update */
-	ret = rmi_f34_update_firmware(f34, fw);
+	if (f34->bl_version == 7)
+		ret = rmi_f34v7_do_reflash(f34, fw);
+	else
+		ret = rmi_f34_update_firmware(f34, fw);
 
 	dev_info(&f34->fn->dev, "Firmware update complete, status:%d\n", ret);
 
@@ -363,6 +377,9 @@ static int rmi_firmware_update(struct rmi_driver_data *data,
 	return ret;
 }
 
+static int rmi_firmware_update(struct rmi_driver_data *data,
+			       const struct firmware *fw);
+
 static ssize_t rmi_driver_update_fw_store(struct device *dev,
 					  struct device_attribute *dattr,
 					  const char *buf, size_t count)
@@ -411,6 +428,7 @@ static int rmi_f34_probe(struct rmi_function *fn)
 	struct f34_data *f34;
 	unsigned char f34_queries[9];
 	bool has_config_id;
+	u8 version = fn->fd.function_version;
 	int ret;
 
 	f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL);
@@ -420,6 +438,14 @@ static int rmi_f34_probe(struct rmi_function *fn)
 	f34->fn = fn;
 	dev_set_drvdata(&fn->dev, f34);
 
+	/* v5 code only supported version 0, try V7 probe */
+	if (version > 0)
+		return rmi_f34v7_probe(f34);
+	else if (version != 0)
+		return -ENODEV;
+
+	f34->bl_version = 5;
+
 	ret = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
 			     f34_queries, sizeof(f34_queries));
 	if (ret) {
diff --git a/drivers/input/rmi4/rmi_f34.h b/drivers/input/rmi4/rmi_f34.h
index 6cee5282fbb4..2c21056dc375 100644
--- a/drivers/input/rmi4/rmi_f34.h
+++ b/drivers/input/rmi4/rmi_f34.h
@@ -33,6 +33,216 @@
 
 #define F34_BOOTLOADER_ID_LEN	2
 
+/* F34 V7 defines */
+#define V7_FLASH_STATUS_OFFSET		0
+#define V7_PARTITION_ID_OFFSET		1
+#define V7_BLOCK_NUMBER_OFFSET		2
+#define V7_TRANSFER_LENGTH_OFFSET	3
+#define V7_COMMAND_OFFSET		4
+#define V7_PAYLOAD_OFFSET		5
+#define V7_BOOTLOADER_ID_OFFSET		1
+
+#define IMAGE_HEADER_VERSION_10		0x10
+
+#define CONFIG_ID_SIZE			32
+#define PRODUCT_ID_SIZE			10
+
+#define ENABLE_WAIT_MS			(1 * 1000)
+#define WRITE_WAIT_MS			(3 * 1000)
+
+#define MIN_SLEEP_TIME_US		50
+#define MAX_SLEEP_TIME_US		100
+
+#define HAS_BSR				BIT(5)
+#define HAS_CONFIG_ID			BIT(3)
+#define HAS_GUEST_CODE			BIT(6)
+#define HAS_DISP_CFG			BIT(5)
+
+/* F34 V7 commands */
+#define CMD_V7_IDLE			0
+#define CMD_V7_ENTER_BL			1
+#define CMD_V7_READ			2
+#define CMD_V7_WRITE			3
+#define CMD_V7_ERASE			4
+#define CMD_V7_ERASE_AP			5
+#define CMD_V7_SENSOR_ID		6
+
+#define v7_CMD_IDLE			0
+#define v7_CMD_WRITE_FW			1
+#define v7_CMD_WRITE_CONFIG		2
+#define v7_CMD_WRITE_LOCKDOWN		3
+#define v7_CMD_WRITE_GUEST_CODE		4
+#define v7_CMD_READ_CONFIG		5
+#define v7_CMD_ERASE_ALL		6
+#define v7_CMD_ERASE_UI_FIRMWARE	7
+#define v7_CMD_ERASE_UI_CONFIG		8
+#define v7_CMD_ERASE_BL_CONFIG		9
+#define v7_CMD_ERASE_DISP_CONFIG	10
+#define v7_CMD_ERASE_FLASH_CONFIG	11
+#define v7_CMD_ERASE_GUEST_CODE		12
+#define v7_CMD_ENABLE_FLASH_PROG	13
+
+#define v7_UI_CONFIG_AREA		0
+#define v7_PM_CONFIG_AREA		1
+#define v7_BL_CONFIG_AREA		2
+#define v7_DP_CONFIG_AREA		3
+#define v7_FLASH_CONFIG_AREA		4
+
+/* F34 V7 partition IDs */
+#define BOOTLOADER_PARTITION		1
+#define DEVICE_CONFIG_PARTITION		2
+#define FLASH_CONFIG_PARTITION		3
+#define MANUFACTURING_BLOCK_PARTITION	4
+#define GUEST_SERIALIZATION_PARTITION	5
+#define GLOBAL_PARAMETERS_PARTITION	6
+#define CORE_CODE_PARTITION		7
+#define CORE_CONFIG_PARTITION		8
+#define GUEST_CODE_PARTITION		9
+#define DISPLAY_CONFIG_PARTITION	10
+
+/* F34 V7 container IDs */
+#define TOP_LEVEL_CONTAINER			0
+#define UI_CONTAINER				1
+#define UI_CONFIG_CONTAINER			2
+#define BL_CONTAINER				3
+#define BL_IMAGE_CONTAINER			4
+#define BL_CONFIG_CONTAINER			5
+#define BL_LOCKDOWN_INFO_CONTAINER		6
+#define PERMANENT_CONFIG_CONTAINER		7
+#define GUEST_CODE_CONTAINER			8
+#define BL_PROTOCOL_DESCRIPTOR_CONTAINER	9
+#define UI_PROTOCOL_DESCRIPTOR_CONTAINER	10
+#define RMI_SELF_DISCOVERY_CONTAINER		11
+#define RMI_PAGE_CONTENT_CONTAINER		12
+#define GENERAL_INFORMATION_CONTAINER		13
+#define DEVICE_CONFIG_CONTAINER			14
+#define FLASH_CONFIG_CONTAINER			15
+#define GUEST_SERIALIZATION_CONTAINER		16
+#define GLOBAL_PARAMETERS_CONTAINER		17
+#define CORE_CODE_CONTAINER			18
+#define CORE_CONFIG_CONTAINER			19
+#define DISPLAY_CONFIG_CONTAINER		20
+
+struct f34v7_query_1_7 {
+	u8 bl_minor_revision;			/* query 1 */
+	u8 bl_major_revision;
+	__le32 bl_fw_id;			/* query 2 */
+	u8 minimum_write_size;			/* query 3 */
+	__le16 block_size;
+	__le16 flash_page_size;
+	__le16 adjustable_partition_area_size;	/* query 4 */
+	__le16 flash_config_length;		/* query 5 */
+	__le16 payload_length;			/* query 6 */
+	u8 partition_support[4];		/* query 7 */
+} __packed;
+
+struct f34v7_data_1_5 {
+	u8 partition_id;
+	__le16 block_offset;
+	__le16 transfer_length;
+	u8 command;
+	u8 payload[2];
+} __packed;
+
+struct block_data {
+	const void *data;
+	int size;
+};
+
+struct partition_table {
+	u8 partition_id;
+	u8 byte_1_reserved;
+	__le16 partition_length;
+	__le16 start_physical_address;
+	__le16 partition_properties;
+} __packed;
+
+struct physical_address {
+	u16 ui_firmware;
+	u16 ui_config;
+	u16 dp_config;
+	u16 guest_code;
+};
+
+struct container_descriptor {
+	__le32 content_checksum;
+	__le16 container_id;
+	u8 minor_version;
+	u8 major_version;
+	u8 reserved_08;
+	u8 reserved_09;
+	u8 reserved_0a;
+	u8 reserved_0b;
+	u8 container_option_flags[4];
+	__le32 content_options_length;
+	__le32 content_options_address;
+	__le32 content_length;
+	__le32 content_address;
+} __packed;
+
+struct block_count {
+	u16 ui_firmware;
+	u16 ui_config;
+	u16 dp_config;
+	u16 fl_config;
+	u16 pm_config;
+	u16 bl_config;
+	u16 lockdown;
+	u16 guest_code;
+};
+
+struct image_header_10 {
+	__le32 checksum;
+	u8 reserved_04;
+	u8 reserved_05;
+	u8 minor_header_version;
+	u8 major_header_version;
+	u8 reserved_08;
+	u8 reserved_09;
+	u8 reserved_0a;
+	u8 reserved_0b;
+	__le32 top_level_container_start_addr;
+};
+
+struct image_metadata {
+	bool contains_firmware_id;
+	bool contains_bootloader;
+	bool contains_display_cfg;
+	bool contains_guest_code;
+	bool contains_flash_config;
+	unsigned int firmware_id;
+	unsigned int checksum;
+	unsigned int bootloader_size;
+	unsigned int display_cfg_offset;
+	unsigned char bl_version;
+	unsigned char product_id[PRODUCT_ID_SIZE + 1];
+	unsigned char cstmr_product_id[PRODUCT_ID_SIZE + 1];
+	struct block_data bootloader;
+	struct block_data ui_firmware;
+	struct block_data ui_config;
+	struct block_data dp_config;
+	struct block_data fl_config;
+	struct block_data bl_config;
+	struct block_data guest_code;
+	struct block_data lockdown;
+	struct block_count blkcount;
+	struct physical_address phyaddr;
+};
+
+struct register_offset {
+	u8 properties;
+	u8 properties_2;
+	u8 block_size;
+	u8 block_count;
+	u8 gc_block_count;
+	u8 flash_status;
+	u8 partition_id;
+	u8 block_number;
+	u8 transfer_length;
+	u8 flash_cmd;
+	u8 payload;
+};
+
 struct rmi_f34_firmware {
 	__le32 checksum;
 	u8 pad1[3];
@@ -56,13 +266,49 @@ struct f34v5_data {
 	struct mutex flash_mutex;
 };
 
+struct f34v7_data {
+	bool has_display_cfg;
+	bool has_guest_code;
+	bool force_update;
+	bool in_bl_mode;
+	u8 *read_config_buf;
+	size_t read_config_buf_size;
+	u8 command;
+	u8 flash_status;
+	u16 block_size;
+	u16 config_block_count;
+	u16 config_size;
+	u16 config_area;
+	u16 flash_config_length;
+	u16 payload_length;
+	u8 partitions;
+	u16 partition_table_bytes;
+	bool new_partition_table;
+
+	struct register_offset off;
+	struct block_count blkcount;
+	struct physical_address phyaddr;
+	struct image_metadata img;
+
+	const void *config_data;
+	const void *image;
+};
+
 struct f34_data {
 	struct rmi_function *fn;
 
+	u8 bl_version;
 	unsigned char bootloader_id[5];
-	unsigned char configuration_id[9];
+	unsigned char configuration_id[CONFIG_ID_SIZE*2 + 1];
 
-	struct f34v5_data v5;
+	union {
+		struct f34v5_data v5;
+		struct f34v7_data v7;
+	};
 };
 
+int rmi_f34v7_start_reflash(struct f34_data *f34, const struct firmware *fw);
+int rmi_f34v7_do_reflash(struct f34_data *f34, const struct firmware *fw);
+int rmi_f34v7_probe(struct f34_data *f34);
+
 #endif /* _RMI_F34_H */
diff --git a/drivers/input/rmi4/rmi_f34v7.c b/drivers/input/rmi4/rmi_f34v7.c
new file mode 100644
index 000000000000..ca31f9539d9b
--- /dev/null
+++ b/drivers/input/rmi4/rmi_f34v7.c
@@ -0,0 +1,1372 @@
+/*
+ * Copyright (c) 2016, Zodiac Inflight Innovations
+ * Copyright (c) 2007-2016, Synaptics Incorporated
+ * Copyright (C) 2012 Alexandra Chin <alexandra.chin@tw.synaptics.com>
+ * Copyright (C) 2012 Scott Lin <scott.lin@tw.synaptics.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/rmi.h>
+#include <linux/firmware.h>
+#include <asm/unaligned.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include "rmi_driver.h"
+#include "rmi_f34.h"
+
+static int rmi_f34v7_read_flash_status(struct f34_data *f34)
+{
+	u8 status;
+	u8 command;
+	int ret;
+
+	ret = rmi_read_block(f34->fn->rmi_dev,
+			f34->fn->fd.data_base_addr + f34->v7.off.flash_status,
+			&status,
+			sizeof(status));
+	if (ret < 0) {
+		rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+			"%s: Failed to read flash status\n", __func__);
+		return ret;
+	}
+
+	f34->v7.in_bl_mode = status >> 7;
+	f34->v7.flash_status = status & 0x1f;
+
+	if (f34->v7.flash_status != 0x00) {
+		dev_err(&f34->fn->dev, "%s: status=%d, command=0x%02x\n",
+			__func__, f34->v7.flash_status, f34->v7.command);
+	}
+
+	ret = rmi_read_block(f34->fn->rmi_dev,
+			f34->fn->fd.data_base_addr + f34->v7.off.flash_cmd,
+			&command,
+			sizeof(command));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to read flash command\n",
+			__func__);
+		return ret;
+	}
+
+	f34->v7.command = command;
+
+	return 0;
+}
+
+static int rmi_f34v7_wait_for_idle(struct f34_data *f34, int timeout_ms)
+{
+	int count = 0;
+	int timeout_count = ((timeout_ms * 1000) / MAX_SLEEP_TIME_US) + 1;
+
+	do {
+		usleep_range(MIN_SLEEP_TIME_US, MAX_SLEEP_TIME_US);
+
+		count++;
+
+		rmi_f34v7_read_flash_status(f34);
+
+		if ((f34->v7.command == v7_CMD_IDLE)
+		    && (f34->v7.flash_status == 0x00)) {
+			rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+				"Idle status detected\n");
+			return 0;
+		}
+	} while (count < timeout_count);
+
+	dev_err(&f34->fn->dev,
+		"%s: Timed out waiting for idle status\n", __func__);
+
+	return -ETIMEDOUT;
+}
+
+static int rmi_f34v7_write_command_single_transaction(struct f34_data *f34,
+						      u8 cmd)
+{
+	int ret;
+	u8 base;
+	struct f34v7_data_1_5 data_1_5;
+
+	base = f34->fn->fd.data_base_addr;
+
+	memset(&data_1_5, 0, sizeof(data_1_5));
+
+	switch (cmd) {
+	case v7_CMD_ERASE_ALL:
+		data_1_5.partition_id = CORE_CODE_PARTITION;
+		data_1_5.command = CMD_V7_ERASE_AP;
+		break;
+	case v7_CMD_ERASE_UI_FIRMWARE:
+		data_1_5.partition_id = CORE_CODE_PARTITION;
+		data_1_5.command = CMD_V7_ERASE;
+		break;
+	case v7_CMD_ERASE_BL_CONFIG:
+		data_1_5.partition_id = GLOBAL_PARAMETERS_PARTITION;
+		data_1_5.command = CMD_V7_ERASE;
+		break;
+	case v7_CMD_ERASE_UI_CONFIG:
+		data_1_5.partition_id = CORE_CONFIG_PARTITION;
+		data_1_5.command = CMD_V7_ERASE;
+		break;
+	case v7_CMD_ERASE_DISP_CONFIG:
+		data_1_5.partition_id = DISPLAY_CONFIG_PARTITION;
+		data_1_5.command = CMD_V7_ERASE;
+		break;
+	case v7_CMD_ERASE_FLASH_CONFIG:
+		data_1_5.partition_id = FLASH_CONFIG_PARTITION;
+		data_1_5.command = CMD_V7_ERASE;
+		break;
+	case v7_CMD_ERASE_GUEST_CODE:
+		data_1_5.partition_id = GUEST_CODE_PARTITION;
+		data_1_5.command = CMD_V7_ERASE;
+		break;
+	case v7_CMD_ENABLE_FLASH_PROG:
+		data_1_5.partition_id = BOOTLOADER_PARTITION;
+		data_1_5.command = CMD_V7_ENTER_BL;
+		break;
+	}
+
+	data_1_5.payload[0] = f34->bootloader_id[0];
+	data_1_5.payload[1] = f34->bootloader_id[1];
+
+	ret = rmi_write_block(f34->fn->rmi_dev,
+			base + f34->v7.off.partition_id,
+			&data_1_5, sizeof(data_1_5));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev,
+			"%s: Failed to write single transaction command\n",
+			__func__);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rmi_f34v7_write_command(struct f34_data *f34, u8 cmd)
+{
+	int ret;
+	u8 base;
+	u8 command;
+
+	base = f34->fn->fd.data_base_addr;
+
+	switch (cmd) {
+	case v7_CMD_WRITE_FW:
+	case v7_CMD_WRITE_CONFIG:
+	case v7_CMD_WRITE_GUEST_CODE:
+		command = CMD_V7_WRITE;
+		break;
+	case v7_CMD_READ_CONFIG:
+		command = CMD_V7_READ;
+		break;
+	case v7_CMD_ERASE_ALL:
+		command = CMD_V7_ERASE_AP;
+		break;
+	case v7_CMD_ERASE_UI_FIRMWARE:
+	case v7_CMD_ERASE_BL_CONFIG:
+	case v7_CMD_ERASE_UI_CONFIG:
+	case v7_CMD_ERASE_DISP_CONFIG:
+	case v7_CMD_ERASE_FLASH_CONFIG:
+	case v7_CMD_ERASE_GUEST_CODE:
+		command = CMD_V7_ERASE;
+		break;
+	case v7_CMD_ENABLE_FLASH_PROG:
+		command = CMD_V7_ENTER_BL;
+		break;
+	default:
+		dev_err(&f34->fn->dev, "%s: Invalid command 0x%02x\n",
+			__func__, cmd);
+		return -EINVAL;
+	}
+
+	f34->v7.command = command;
+
+	switch (cmd) {
+	case v7_CMD_ERASE_ALL:
+	case v7_CMD_ERASE_UI_FIRMWARE:
+	case v7_CMD_ERASE_BL_CONFIG:
+	case v7_CMD_ERASE_UI_CONFIG:
+	case v7_CMD_ERASE_DISP_CONFIG:
+	case v7_CMD_ERASE_FLASH_CONFIG:
+	case v7_CMD_ERASE_GUEST_CODE:
+	case v7_CMD_ENABLE_FLASH_PROG:
+		ret = rmi_f34v7_write_command_single_transaction(f34, cmd);
+		if (ret < 0)
+			return ret;
+		else
+			return 0;
+	default:
+		break;
+	}
+
+	rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev, "%s: writing cmd %02X\n",
+		__func__, command);
+
+	ret = rmi_write_block(f34->fn->rmi_dev,
+			base + f34->v7.off.flash_cmd,
+			&command, sizeof(command));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to write flash command\n",
+			__func__);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rmi_f34v7_write_partition_id(struct f34_data *f34, u8 cmd)
+{
+	int ret;
+	u8 base;
+	u8 partition;
+
+	base = f34->fn->fd.data_base_addr;
+
+	switch (cmd) {
+	case v7_CMD_WRITE_FW:
+		partition = CORE_CODE_PARTITION;
+		break;
+	case v7_CMD_WRITE_CONFIG:
+	case v7_CMD_READ_CONFIG:
+		if (f34->v7.config_area == v7_UI_CONFIG_AREA)
+			partition = CORE_CONFIG_PARTITION;
+		else if (f34->v7.config_area == v7_DP_CONFIG_AREA)
+			partition = DISPLAY_CONFIG_PARTITION;
+		else if (f34->v7.config_area == v7_PM_CONFIG_AREA)
+			partition = GUEST_SERIALIZATION_PARTITION;
+		else if (f34->v7.config_area == v7_BL_CONFIG_AREA)
+			partition = GLOBAL_PARAMETERS_PARTITION;
+		else if (f34->v7.config_area == v7_FLASH_CONFIG_AREA)
+			partition = FLASH_CONFIG_PARTITION;
+		break;
+	case v7_CMD_WRITE_GUEST_CODE:
+		partition = GUEST_CODE_PARTITION;
+		break;
+	case v7_CMD_ERASE_ALL:
+		partition = CORE_CODE_PARTITION;
+		break;
+	case v7_CMD_ERASE_BL_CONFIG:
+		partition = GLOBAL_PARAMETERS_PARTITION;
+		break;
+	case v7_CMD_ERASE_UI_CONFIG:
+		partition = CORE_CONFIG_PARTITION;
+		break;
+	case v7_CMD_ERASE_DISP_CONFIG:
+		partition = DISPLAY_CONFIG_PARTITION;
+		break;
+	case v7_CMD_ERASE_FLASH_CONFIG:
+		partition = FLASH_CONFIG_PARTITION;
+		break;
+	case v7_CMD_ERASE_GUEST_CODE:
+		partition = GUEST_CODE_PARTITION;
+		break;
+	case v7_CMD_ENABLE_FLASH_PROG:
+		partition = BOOTLOADER_PARTITION;
+		break;
+	default:
+		dev_err(&f34->fn->dev, "%s: Invalid command 0x%02x\n",
+			__func__, cmd);
+		return -EINVAL;
+	}
+
+	ret = rmi_write_block(f34->fn->rmi_dev,
+			base + f34->v7.off.partition_id,
+			&partition, sizeof(partition));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to write partition ID\n",
+			__func__);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rmi_f34v7_read_f34v7_partition_table(struct f34_data *f34)
+{
+	int ret;
+	u8 base;
+	__le16 length;
+	u16 block_number = 0;
+
+	base = f34->fn->fd.data_base_addr;
+
+	f34->v7.config_area = v7_FLASH_CONFIG_AREA;
+
+	ret = rmi_f34v7_write_partition_id(f34, v7_CMD_READ_CONFIG);
+	if (ret < 0)
+		return ret;
+
+	ret = rmi_write_block(f34->fn->rmi_dev,
+			base + f34->v7.off.block_number,
+			&block_number, sizeof(block_number));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to write block number\n",
+			__func__);
+		return ret;
+	}
+
+	put_unaligned_le16(f34->v7.flash_config_length, &length);
+
+	ret = rmi_write_block(f34->fn->rmi_dev,
+			base + f34->v7.off.transfer_length,
+			&length, sizeof(length));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to write transfer length\n",
+			__func__);
+		return ret;
+	}
+
+	ret = rmi_f34v7_write_command(f34, v7_CMD_READ_CONFIG);
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to write command\n",
+			__func__);
+		return ret;
+	}
+
+	ret = rmi_f34v7_wait_for_idle(f34, WRITE_WAIT_MS);
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to wait for idle status\n",
+			__func__);
+		return ret;
+	}
+
+	ret = rmi_read_block(f34->fn->rmi_dev,
+			base + f34->v7.off.payload,
+			f34->v7.read_config_buf,
+			f34->v7.partition_table_bytes);
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to read block data\n",
+			__func__);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void rmi_f34v7_parse_partition_table(struct f34_data *f34,
+					    const void *partition_table,
+					    struct block_count *blkcount,
+					    struct physical_address *phyaddr)
+{
+	int i;
+	int index;
+	u16 partition_length;
+	u16 physical_address;
+	const struct partition_table *ptable;
+
+	for (i = 0; i < f34->v7.partitions; i++) {
+		index = i * 8 + 2;
+		ptable = partition_table + index;
+		partition_length = le16_to_cpu(ptable->partition_length);
+		physical_address = le16_to_cpu(ptable->start_physical_address);
+		rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+			"%s: Partition entry %d: %*ph\n",
+			__func__, i, sizeof(struct partition_table), ptable);
+		switch (ptable->partition_id & 0x1f) {
+		case CORE_CODE_PARTITION:
+			blkcount->ui_firmware = partition_length;
+			phyaddr->ui_firmware = physical_address;
+			rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+				"%s: Core code block count: %d\n",
+				__func__, blkcount->ui_firmware);
+			break;
+		case CORE_CONFIG_PARTITION:
+			blkcount->ui_config = partition_length;
+			phyaddr->ui_config = physical_address;
+			rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+				"%s: Core config block count: %d\n",
+				__func__, blkcount->ui_config);
+			break;
+		case DISPLAY_CONFIG_PARTITION:
+			blkcount->dp_config = partition_length;
+			phyaddr->dp_config = physical_address;
+			rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+				"%s: Display config block count: %d\n",
+				__func__, blkcount->dp_config);
+			break;
+		case FLASH_CONFIG_PARTITION:
+			blkcount->fl_config = partition_length;
+			rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+				"%s: Flash config block count: %d\n",
+				__func__, blkcount->fl_config);
+			break;
+		case GUEST_CODE_PARTITION:
+			blkcount->guest_code = partition_length;
+			phyaddr->guest_code = physical_address;
+			rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+				"%s: Guest code block count: %d\n",
+				__func__, blkcount->guest_code);
+			break;
+		case GUEST_SERIALIZATION_PARTITION:
+			blkcount->pm_config = partition_length;
+			rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+				"%s: Guest serialization block count: %d\n",
+				__func__, blkcount->pm_config);
+			break;
+		case GLOBAL_PARAMETERS_PARTITION:
+			blkcount->bl_config = partition_length;
+			rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+				"%s: Global parameters block count: %d\n",
+				__func__, blkcount->bl_config);
+			break;
+		case DEVICE_CONFIG_PARTITION:
+			blkcount->lockdown = partition_length;
+			rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+				"%s: Device config block count: %d\n",
+				__func__, blkcount->lockdown);
+			break;
+		}
+	}
+}
+
+static int rmi_f34v7_read_queries_bl_version(struct f34_data *f34)
+{
+	int ret;
+	u8 base;
+	int offset;
+	u8 query_0;
+	struct f34v7_query_1_7 query_1_7;
+
+	base = f34->fn->fd.query_base_addr;
+
+	ret = rmi_read_block(f34->fn->rmi_dev,
+			base,
+			&query_0,
+			sizeof(query_0));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev,
+			"%s: Failed to read query 0\n", __func__);
+		return ret;
+	}
+
+	offset = (query_0 & 0x7) + 1;
+
+	ret = rmi_read_block(f34->fn->rmi_dev,
+			base + offset,
+			&query_1_7,
+			sizeof(query_1_7));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to read queries 1 to 7\n",
+			__func__);
+		return ret;
+	}
+
+	f34->bootloader_id[0] = query_1_7.bl_minor_revision;
+	f34->bootloader_id[1] = query_1_7.bl_major_revision;
+
+	rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev, "Bootloader V%d.%d\n",
+		f34->bootloader_id[1], f34->bootloader_id[0]);
+
+	return 0;
+}
+
+static int rmi_f34v7_read_queries(struct f34_data *f34)
+{
+	int ret;
+	int i, j;
+	u8 base;
+	int offset;
+	u8 *ptable;
+	u8 query_0;
+	struct f34v7_query_1_7 query_1_7;
+
+	base = f34->fn->fd.query_base_addr;
+
+	ret = rmi_read_block(f34->fn->rmi_dev,
+			base,
+			&query_0,
+			sizeof(query_0));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev,
+			"%s: Failed to read query 0\n", __func__);
+		return ret;
+	}
+
+	offset = (query_0 & 0x07) + 1;
+
+	ret = rmi_read_block(f34->fn->rmi_dev,
+			base + offset,
+			&query_1_7,
+			sizeof(query_1_7));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to read queries 1 to 7\n",
+			__func__);
+		return ret;
+	}
+
+	f34->bootloader_id[0] = query_1_7.bl_minor_revision;
+	f34->bootloader_id[1] = query_1_7.bl_major_revision;
+
+	f34->v7.block_size = le16_to_cpu(query_1_7.block_size);
+	f34->v7.flash_config_length =
+			le16_to_cpu(query_1_7.flash_config_length);
+	f34->v7.payload_length = le16_to_cpu(query_1_7.payload_length);
+
+	rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev, "%s: f34->v7.block_size = %d\n",
+		 __func__, f34->v7.block_size);
+
+	f34->v7.off.flash_status = V7_FLASH_STATUS_OFFSET;
+	f34->v7.off.partition_id = V7_PARTITION_ID_OFFSET;
+	f34->v7.off.block_number = V7_BLOCK_NUMBER_OFFSET;
+	f34->v7.off.transfer_length = V7_TRANSFER_LENGTH_OFFSET;
+	f34->v7.off.flash_cmd = V7_COMMAND_OFFSET;
+	f34->v7.off.payload = V7_PAYLOAD_OFFSET;
+
+	f34->v7.has_display_cfg = query_1_7.partition_support[1] & HAS_DISP_CFG;
+	f34->v7.has_guest_code =
+			query_1_7.partition_support[1] & HAS_GUEST_CODE;
+
+	if (query_0 & HAS_CONFIG_ID) {
+		char f34_ctrl[CONFIG_ID_SIZE];
+		int i = 0;
+		u8 *p = f34->configuration_id;
+		*p = '\0';
+
+		ret = rmi_read_block(f34->fn->rmi_dev,
+				f34->fn->fd.control_base_addr,
+				f34_ctrl,
+				sizeof(f34_ctrl));
+		if (ret)
+			return ret;
+
+		/* Eat leading zeros */
+		while (i < sizeof(f34_ctrl) && !f34_ctrl[i])
+			i++;
+
+		for (; i < sizeof(f34_ctrl); i++)
+			p += snprintf(p, f34->configuration_id
+				      + sizeof(f34->configuration_id) - p,
+				      "%02X", f34_ctrl[i]);
+
+		rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev, "Configuration ID: %s\n",
+			f34->configuration_id);
+	}
+
+	f34->v7.partitions = 0;
+	for (i = 0; i < sizeof(query_1_7.partition_support); i++)
+		for (j = 0; j < 8; j++)
+			if (query_1_7.partition_support[i] & (1 << j))
+				f34->v7.partitions++;
+
+	rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev, "%s: Supported partitions: %*ph\n",
+		__func__, sizeof(query_1_7.partition_support),
+		query_1_7.partition_support);
+
+
+	f34->v7.partition_table_bytes = f34->v7.partitions * 8 + 2;
+
+	f34->v7.read_config_buf = devm_kzalloc(&f34->fn->dev,
+			f34->v7.partition_table_bytes,
+			GFP_KERNEL);
+	if (!f34->v7.read_config_buf) {
+		f34->v7.read_config_buf_size = 0;
+		return -ENOMEM;
+	}
+
+	f34->v7.read_config_buf_size = f34->v7.partition_table_bytes;
+	ptable = f34->v7.read_config_buf;
+
+	ret = rmi_f34v7_read_f34v7_partition_table(f34);
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to read partition table\n",
+				__func__);
+		return ret;
+	}
+
+	rmi_f34v7_parse_partition_table(f34, ptable,
+					&f34->v7.blkcount, &f34->v7.phyaddr);
+
+	return 0;
+}
+
+static int rmi_f34v7_check_ui_firmware_size(struct f34_data *f34)
+{
+	u16 block_count;
+
+	block_count = f34->v7.img.ui_firmware.size / f34->v7.block_size;
+
+	if (block_count != f34->v7.blkcount.ui_firmware) {
+		dev_err(&f34->fn->dev,
+			"UI firmware size mismatch: %d != %d\n",
+			block_count, f34->v7.blkcount.ui_firmware);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rmi_f34v7_check_ui_config_size(struct f34_data *f34)
+{
+	u16 block_count;
+
+	block_count = f34->v7.img.ui_config.size / f34->v7.block_size;
+
+	if (block_count != f34->v7.blkcount.ui_config) {
+		dev_err(&f34->fn->dev, "UI config size mismatch\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rmi_f34v7_check_dp_config_size(struct f34_data *f34)
+{
+	u16 block_count;
+
+	block_count = f34->v7.img.dp_config.size / f34->v7.block_size;
+
+	if (block_count != f34->v7.blkcount.dp_config) {
+		dev_err(&f34->fn->dev, "Display config size mismatch\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rmi_f34v7_check_guest_code_size(struct f34_data *f34)
+{
+	u16 block_count;
+
+	block_count = f34->v7.img.guest_code.size / f34->v7.block_size;
+	if (block_count != f34->v7.blkcount.guest_code) {
+		dev_err(&f34->fn->dev, "Guest code size mismatch\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rmi_f34v7_check_bl_config_size(struct f34_data *f34)
+{
+	u16 block_count;
+
+	block_count = f34->v7.img.bl_config.size / f34->v7.block_size;
+
+	if (block_count != f34->v7.blkcount.bl_config) {
+		dev_err(&f34->fn->dev, "Bootloader config size mismatch\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rmi_f34v7_erase_config(struct f34_data *f34)
+{
+	int ret;
+
+	dev_info(&f34->fn->dev, "Erasing config...\n");
+
+	switch (f34->v7.config_area) {
+	case v7_UI_CONFIG_AREA:
+		ret = rmi_f34v7_write_command(f34, v7_CMD_ERASE_UI_CONFIG);
+		if (ret < 0)
+			return ret;
+		break;
+	case v7_DP_CONFIG_AREA:
+		ret = rmi_f34v7_write_command(f34, v7_CMD_ERASE_DISP_CONFIG);
+		if (ret < 0)
+			return ret;
+		break;
+	case v7_BL_CONFIG_AREA:
+		ret = rmi_f34v7_write_command(f34, v7_CMD_ERASE_BL_CONFIG);
+		if (ret < 0)
+			return ret;
+		break;
+	}
+
+	ret = rmi_f34v7_wait_for_idle(f34, ENABLE_WAIT_MS);
+	if (ret < 0)
+		return ret;
+
+	return ret;
+}
+
+static int rmi_f34v7_erase_guest_code(struct f34_data *f34)
+{
+	int ret;
+
+	dev_info(&f34->fn->dev, "Erasing guest code...\n");
+
+	ret = rmi_f34v7_write_command(f34, v7_CMD_ERASE_GUEST_CODE);
+	if (ret < 0)
+		return ret;
+
+	ret = rmi_f34v7_wait_for_idle(f34, ENABLE_WAIT_MS);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int rmi_f34v7_erase_all(struct f34_data *f34)
+{
+	int ret;
+
+	dev_info(&f34->fn->dev, "Erasing firmware...\n");
+
+	ret = rmi_f34v7_write_command(f34, v7_CMD_ERASE_UI_FIRMWARE);
+	if (ret < 0)
+		return ret;
+
+	ret = rmi_f34v7_wait_for_idle(f34, ENABLE_WAIT_MS);
+	if (ret < 0)
+		return ret;
+
+	f34->v7.config_area = v7_UI_CONFIG_AREA;
+	ret = rmi_f34v7_erase_config(f34);
+	if (ret < 0)
+		return ret;
+
+	if (f34->v7.has_display_cfg) {
+		f34->v7.config_area = v7_DP_CONFIG_AREA;
+		ret = rmi_f34v7_erase_config(f34);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (f34->v7.new_partition_table && f34->v7.has_guest_code) {
+		ret = rmi_f34v7_erase_guest_code(f34);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int rmi_f34v7_read_f34v7_blocks(struct f34_data *f34, u16 block_cnt,
+				       u8 command)
+{
+	int ret;
+	u8 base;
+	__le16 length;
+	u16 transfer;
+	u16 max_transfer;
+	u16 remaining = block_cnt;
+	u16 block_number = 0;
+	u16 index = 0;
+
+	base = f34->fn->fd.data_base_addr;
+
+	ret = rmi_f34v7_write_partition_id(f34, command);
+	if (ret < 0)
+		return ret;
+
+	ret = rmi_write_block(f34->fn->rmi_dev,
+			base + f34->v7.off.block_number,
+			&block_number, sizeof(block_number));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to write block number\n",
+			__func__);
+		return ret;
+	}
+
+	max_transfer = min(f34->v7.payload_length,
+			   (u16)(PAGE_SIZE / f34->v7.block_size));
+
+	do {
+		transfer = min(remaining, max_transfer);
+		put_unaligned_le16(transfer, &length);
+
+		ret = rmi_write_block(f34->fn->rmi_dev,
+				base + f34->v7.off.transfer_length,
+				&length, sizeof(length));
+		if (ret < 0) {
+			dev_err(&f34->fn->dev,
+				"%s: Write transfer length fail (%d remaining)\n",
+				__func__, remaining);
+			return ret;
+		}
+
+		ret = rmi_f34v7_write_command(f34, command);
+		if (ret < 0)
+			return ret;
+
+		ret = rmi_f34v7_wait_for_idle(f34, ENABLE_WAIT_MS);
+		if (ret < 0) {
+			dev_err(&f34->fn->dev,
+				"%s: Wait for idle failed (%d blks remaining)\n",
+				__func__, remaining);
+			return ret;
+		}
+
+		ret = rmi_read_block(f34->fn->rmi_dev,
+				base + f34->v7.off.payload,
+				&f34->v7.read_config_buf[index],
+				transfer * f34->v7.block_size);
+		if (ret < 0) {
+			dev_err(&f34->fn->dev,
+				"%s: Read block failed (%d blks remaining)\n",
+				__func__, remaining);
+			return ret;
+		}
+
+		index += (transfer * f34->v7.block_size);
+		remaining -= transfer;
+	} while (remaining);
+
+	return 0;
+}
+
+static int rmi_f34v7_write_f34v7_blocks(struct f34_data *f34,
+					const void *block_ptr, u16 block_cnt,
+					u8 command)
+{
+	int ret;
+	u8 base;
+	__le16 length;
+	u16 transfer;
+	u16 max_transfer;
+	u16 remaining = block_cnt;
+	u16 block_number = 0;
+
+	base = f34->fn->fd.data_base_addr;
+
+	ret = rmi_f34v7_write_partition_id(f34, command);
+	if (ret < 0)
+		return ret;
+
+	ret = rmi_write_block(f34->fn->rmi_dev,
+			base + f34->v7.off.block_number,
+			&block_number, sizeof(block_number));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to write block number\n",
+			__func__);
+		return ret;
+	}
+
+	if (f34->v7.payload_length > (PAGE_SIZE / f34->v7.block_size))
+		max_transfer = PAGE_SIZE / f34->v7.block_size;
+	else
+		max_transfer = f34->v7.payload_length;
+
+	do {
+		transfer = min(remaining, max_transfer);
+		put_unaligned_le16(transfer, &length);
+
+		ret = rmi_write_block(f34->fn->rmi_dev,
+				base + f34->v7.off.transfer_length,
+				&length, sizeof(length));
+		if (ret < 0) {
+			dev_err(&f34->fn->dev,
+				"%s: Write transfer length fail (%d remaining)\n",
+				__func__, remaining);
+			return ret;
+		}
+
+		ret = rmi_f34v7_write_command(f34, command);
+		if (ret < 0)
+			return ret;
+
+		ret = rmi_write_block(f34->fn->rmi_dev,
+				base + f34->v7.off.payload,
+				block_ptr, transfer * f34->v7.block_size);
+		if (ret < 0) {
+			dev_err(&f34->fn->dev,
+				"%s: Failed writing data (%d blks remaining)\n",
+				__func__, remaining);
+			return ret;
+		}
+
+		ret = rmi_f34v7_wait_for_idle(f34, ENABLE_WAIT_MS);
+		if (ret < 0) {
+			dev_err(&f34->fn->dev,
+				"%s: Failed wait for idle (%d blks remaining)\n",
+				__func__, remaining);
+			return ret;
+		}
+
+		block_ptr += (transfer * f34->v7.block_size);
+		remaining -= transfer;
+	} while (remaining);
+
+	return 0;
+}
+
+static int rmi_f34v7_write_config(struct f34_data *f34)
+{
+	return rmi_f34v7_write_f34v7_blocks(f34, f34->v7.config_data,
+					    f34->v7.config_block_count,
+					    v7_CMD_WRITE_CONFIG);
+}
+
+static int rmi_f34v7_write_ui_config(struct f34_data *f34)
+{
+	f34->v7.config_area = v7_UI_CONFIG_AREA;
+	f34->v7.config_data = f34->v7.img.ui_config.data;
+	f34->v7.config_size = f34->v7.img.ui_config.size;
+	f34->v7.config_block_count = f34->v7.config_size / f34->v7.block_size;
+
+	return rmi_f34v7_write_config(f34);
+}
+
+static int rmi_f34v7_write_dp_config(struct f34_data *f34)
+{
+	f34->v7.config_area = v7_DP_CONFIG_AREA;
+	f34->v7.config_data = f34->v7.img.dp_config.data;
+	f34->v7.config_size = f34->v7.img.dp_config.size;
+	f34->v7.config_block_count = f34->v7.config_size / f34->v7.block_size;
+
+	return rmi_f34v7_write_config(f34);
+}
+
+static int rmi_f34v7_write_guest_code(struct f34_data *f34)
+{
+	return rmi_f34v7_write_f34v7_blocks(f34, f34->v7.img.guest_code.data,
+					    f34->v7.img.guest_code.size /
+							f34->v7.block_size,
+					    v7_CMD_WRITE_GUEST_CODE);
+}
+
+static int rmi_f34v7_write_flash_config(struct f34_data *f34)
+{
+	int ret;
+
+	f34->v7.config_area = v7_FLASH_CONFIG_AREA;
+	f34->v7.config_data = f34->v7.img.fl_config.data;
+	f34->v7.config_size = f34->v7.img.fl_config.size;
+	f34->v7.config_block_count = f34->v7.config_size / f34->v7.block_size;
+
+	if (f34->v7.config_block_count != f34->v7.blkcount.fl_config) {
+		dev_err(&f34->fn->dev, "%s: Flash config size mismatch\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	ret = rmi_f34v7_write_command(f34, v7_CMD_ERASE_FLASH_CONFIG);
+	if (ret < 0)
+		return ret;
+
+	rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+		"%s: Erase flash config command written\n", __func__);
+
+	ret = rmi_f34v7_wait_for_idle(f34, ENABLE_WAIT_MS);
+	if (ret < 0)
+		return ret;
+
+	ret = rmi_f34v7_write_config(f34);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int rmi_f34v7_write_partition_table(struct f34_data *f34)
+{
+	u16 block_count;
+	int ret;
+
+	block_count = f34->v7.blkcount.bl_config;
+	f34->v7.config_area = v7_BL_CONFIG_AREA;
+	f34->v7.config_size = f34->v7.block_size * block_count;
+	devm_kfree(&f34->fn->dev, f34->v7.read_config_buf);
+	f34->v7.read_config_buf = devm_kzalloc(&f34->fn->dev,
+					       f34->v7.config_size, GFP_KERNEL);
+	if (!f34->v7.read_config_buf) {
+		f34->v7.read_config_buf_size = 0;
+		return -ENOMEM;
+	}
+
+	f34->v7.read_config_buf_size = f34->v7.config_size;
+
+	ret = rmi_f34v7_read_f34v7_blocks(f34, block_count, v7_CMD_READ_CONFIG);
+	if (ret < 0)
+		return ret;
+
+	ret = rmi_f34v7_erase_config(f34);
+	if (ret < 0)
+		return ret;
+
+	ret = rmi_f34v7_write_flash_config(f34);
+	if (ret < 0)
+		return ret;
+
+	f34->v7.config_area = v7_BL_CONFIG_AREA;
+	f34->v7.config_data = f34->v7.read_config_buf;
+	f34->v7.config_size = f34->v7.img.bl_config.size;
+	f34->v7.config_block_count = f34->v7.config_size / f34->v7.block_size;
+
+	ret = rmi_f34v7_write_config(f34);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int rmi_f34v7_write_firmware(struct f34_data *f34)
+{
+	u16 blk_count;
+
+	blk_count = f34->v7.img.ui_firmware.size / f34->v7.block_size;
+
+	return rmi_f34v7_write_f34v7_blocks(f34, f34->v7.img.ui_firmware.data,
+					    blk_count, v7_CMD_WRITE_FW);
+}
+
+static void rmi_f34v7_compare_partition_tables(struct f34_data *f34)
+{
+	if (f34->v7.phyaddr.ui_firmware != f34->v7.img.phyaddr.ui_firmware) {
+		f34->v7.new_partition_table = true;
+		return;
+	}
+
+	if (f34->v7.phyaddr.ui_config != f34->v7.img.phyaddr.ui_config) {
+		f34->v7.new_partition_table = true;
+		return;
+	}
+
+	if (f34->v7.has_display_cfg &&
+	    f34->v7.phyaddr.dp_config != f34->v7.img.phyaddr.dp_config) {
+		f34->v7.new_partition_table = true;
+		return;
+	}
+
+	if (f34->v7.has_guest_code &&
+	    f34->v7.phyaddr.guest_code != f34->v7.img.phyaddr.guest_code) {
+		f34->v7.new_partition_table = true;
+		return;
+	}
+
+	f34->v7.new_partition_table = false;
+}
+
+static void rmi_f34v7_parse_img_header_10_bl_container(struct f34_data *f34,
+						       const void *image)
+{
+	int i;
+	int num_of_containers;
+	unsigned int addr;
+	unsigned int container_id;
+	unsigned int length;
+	const void *content;
+	const struct container_descriptor *descriptor;
+
+	num_of_containers = f34->v7.img.bootloader.size / 4 - 1;
+
+	for (i = 1; i <= num_of_containers; i++) {
+		addr = get_unaligned_le32(f34->v7.img.bootloader.data + i * 4);
+		descriptor = image + addr;
+		container_id = le16_to_cpu(descriptor->container_id);
+		content = image + le32_to_cpu(descriptor->content_address);
+		length = le32_to_cpu(descriptor->content_length);
+		switch (container_id) {
+		case BL_CONFIG_CONTAINER:
+		case GLOBAL_PARAMETERS_CONTAINER:
+			f34->v7.img.bl_config.data = content;
+			f34->v7.img.bl_config.size = length;
+			break;
+		case BL_LOCKDOWN_INFO_CONTAINER:
+		case DEVICE_CONFIG_CONTAINER:
+			f34->v7.img.lockdown.data = content;
+			f34->v7.img.lockdown.size = length;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static void rmi_f34v7_parse_image_header_10(struct f34_data *f34)
+{
+	unsigned int i;
+	unsigned int num_of_containers;
+	unsigned int addr;
+	unsigned int offset;
+	unsigned int container_id;
+	unsigned int length;
+	const void *image = f34->v7.image;
+	const u8 *content;
+	const struct container_descriptor *descriptor;
+	const struct image_header_10 *header = image;
+
+	f34->v7.img.checksum = le32_to_cpu(header->checksum);
+
+	rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev, "%s: f34->v7.img.checksum=%X\n",
+		__func__, f34->v7.img.checksum);
+
+	/* address of top level container */
+	offset = le32_to_cpu(header->top_level_container_start_addr);
+	descriptor = image + offset;
+
+	/* address of top level container content */
+	offset = le32_to_cpu(descriptor->content_address);
+	num_of_containers = le32_to_cpu(descriptor->content_length) / 4;
+
+	for (i = 0; i < num_of_containers; i++) {
+		addr = get_unaligned_le32(image + offset);
+		offset += 4;
+		descriptor = image + addr;
+		container_id = le16_to_cpu(descriptor->container_id);
+		content = image + le32_to_cpu(descriptor->content_address);
+		length = le32_to_cpu(descriptor->content_length);
+
+		rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+			"%s: container_id=%d, length=%d\n", __func__,
+			container_id, length);
+
+		switch (container_id) {
+		case UI_CONTAINER:
+		case CORE_CODE_CONTAINER:
+			f34->v7.img.ui_firmware.data = content;
+			f34->v7.img.ui_firmware.size = length;
+			break;
+		case UI_CONFIG_CONTAINER:
+		case CORE_CONFIG_CONTAINER:
+			f34->v7.img.ui_config.data = content;
+			f34->v7.img.ui_config.size = length;
+			break;
+		case BL_CONTAINER:
+			f34->v7.img.bl_version = *content;
+			f34->v7.img.bootloader.data = content;
+			f34->v7.img.bootloader.size = length;
+			rmi_f34v7_parse_img_header_10_bl_container(f34, image);
+			break;
+		case GUEST_CODE_CONTAINER:
+			f34->v7.img.contains_guest_code = true;
+			f34->v7.img.guest_code.data = content;
+			f34->v7.img.guest_code.size = length;
+			break;
+		case DISPLAY_CONFIG_CONTAINER:
+			f34->v7.img.contains_display_cfg = true;
+			f34->v7.img.dp_config.data = content;
+			f34->v7.img.dp_config.size = length;
+			break;
+		case FLASH_CONFIG_CONTAINER:
+			f34->v7.img.contains_flash_config = true;
+			f34->v7.img.fl_config.data = content;
+			f34->v7.img.fl_config.size = length;
+			break;
+		case GENERAL_INFORMATION_CONTAINER:
+			f34->v7.img.contains_firmware_id = true;
+			f34->v7.img.firmware_id =
+				get_unaligned_le32(content + 4);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int rmi_f34v7_parse_image_info(struct f34_data *f34)
+{
+	const struct image_header_10 *header = f34->v7.image;
+
+	memset(&f34->v7.img, 0x00, sizeof(f34->v7.img));
+
+	rmi_dbg(RMI_DEBUG_FN, &f34->fn->dev,
+		"%s: header->major_header_version = %d\n",
+		__func__, header->major_header_version);
+
+	switch (header->major_header_version) {
+	case IMAGE_HEADER_VERSION_10:
+		rmi_f34v7_parse_image_header_10(f34);
+		break;
+	default:
+		dev_err(&f34->fn->dev, "Unsupported image file format %02X\n",
+			header->major_header_version);
+		return -EINVAL;
+	}
+
+	if (!f34->v7.img.contains_flash_config) {
+		dev_err(&f34->fn->dev, "%s: No flash config in fw image\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	rmi_f34v7_parse_partition_table(f34, f34->v7.img.fl_config.data,
+			&f34->v7.img.blkcount, &f34->v7.img.phyaddr);
+
+	rmi_f34v7_compare_partition_tables(f34);
+
+	return 0;
+}
+
+int rmi_f34v7_do_reflash(struct f34_data *f34, const struct firmware *fw)
+{
+	int ret;
+
+	rmi_f34v7_read_queries_bl_version(f34);
+
+	f34->v7.image = fw->data;
+
+	ret = rmi_f34v7_parse_image_info(f34);
+	if (ret < 0)
+		goto fail;
+
+	if (!f34->v7.new_partition_table) {
+		ret = rmi_f34v7_check_ui_firmware_size(f34);
+		if (ret < 0)
+			goto fail;
+
+		ret = rmi_f34v7_check_ui_config_size(f34);
+		if (ret < 0)
+			goto fail;
+
+		if (f34->v7.has_display_cfg &&
+		    f34->v7.img.contains_display_cfg) {
+			ret = rmi_f34v7_check_dp_config_size(f34);
+			if (ret < 0)
+				goto fail;
+		}
+
+		if (f34->v7.has_guest_code && f34->v7.img.contains_guest_code) {
+			ret = rmi_f34v7_check_guest_code_size(f34);
+			if (ret < 0)
+				goto fail;
+		}
+	} else {
+		ret = rmi_f34v7_check_bl_config_size(f34);
+		if (ret < 0)
+			goto fail;
+	}
+
+	ret = rmi_f34v7_erase_all(f34);
+	if (ret < 0)
+		goto fail;
+
+	if (f34->v7.new_partition_table) {
+		ret = rmi_f34v7_write_partition_table(f34);
+		if (ret < 0)
+			goto fail;
+		dev_info(&f34->fn->dev, "%s: Partition table programmed\n",
+			 __func__);
+	}
+
+	dev_info(&f34->fn->dev, "Writing firmware (%d bytes)...\n",
+		 f34->v7.img.ui_firmware.size);
+
+	ret = rmi_f34v7_write_firmware(f34);
+	if (ret < 0)
+		goto fail;
+
+	dev_info(&f34->fn->dev, "Writing config (%d bytes)...\n",
+		 f34->v7.img.ui_config.size);
+
+	f34->v7.config_area = v7_UI_CONFIG_AREA;
+	ret = rmi_f34v7_write_ui_config(f34);
+	if (ret < 0)
+		goto fail;
+
+	if (f34->v7.has_display_cfg && f34->v7.img.contains_display_cfg) {
+		dev_info(&f34->fn->dev, "Writing display config...\n");
+
+		ret = rmi_f34v7_write_dp_config(f34);
+		if (ret < 0)
+			goto fail;
+	}
+
+	if (f34->v7.new_partition_table) {
+		if (f34->v7.has_guest_code && f34->v7.img.contains_guest_code) {
+			dev_info(&f34->fn->dev, "Writing guest code...\n");
+
+			ret = rmi_f34v7_write_guest_code(f34);
+			if (ret < 0)
+				goto fail;
+		}
+	}
+
+fail:
+	return ret;
+}
+
+static int rmi_f34v7_enter_flash_prog(struct f34_data *f34)
+{
+	int ret;
+
+	ret = rmi_f34v7_read_flash_status(f34);
+	if (ret < 0)
+		return ret;
+
+	if (f34->v7.in_bl_mode)
+		return 0;
+
+	ret = rmi_f34v7_write_command(f34, v7_CMD_ENABLE_FLASH_PROG);
+	if (ret < 0)
+		return ret;
+
+	ret = rmi_f34v7_wait_for_idle(f34, ENABLE_WAIT_MS);
+	if (ret < 0)
+		return ret;
+
+	if (!f34->v7.in_bl_mode) {
+		dev_err(&f34->fn->dev, "%s: BL mode not entered\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int rmi_f34v7_start_reflash(struct f34_data *f34, const struct firmware *fw)
+{
+	int ret = 0;
+
+	f34->v7.config_area = v7_UI_CONFIG_AREA;
+	f34->v7.image = fw->data;
+
+	ret = rmi_f34v7_parse_image_info(f34);
+	if (ret < 0)
+		goto exit;
+
+	if (!f34->v7.force_update && f34->v7.new_partition_table) {
+		dev_err(&f34->fn->dev, "%s: Partition table mismatch\n",
+				__func__);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	dev_info(&f34->fn->dev, "Firmware image OK\n");
+
+	ret = rmi_f34v7_read_flash_status(f34);
+	if (ret < 0)
+		goto exit;
+
+	if (f34->v7.in_bl_mode) {
+		dev_info(&f34->fn->dev, "%s: Device in bootloader mode\n",
+				__func__);
+	}
+
+	rmi_f34v7_enter_flash_prog(f34);
+
+	return 0;
+
+exit:
+	return ret;
+}
+
+int rmi_f34v7_probe(struct f34_data *f34)
+{
+	int ret;
+
+	/* Read bootloader version */
+	ret = rmi_read_block(f34->fn->rmi_dev,
+			f34->fn->fd.query_base_addr + V7_BOOTLOADER_ID_OFFSET,
+			f34->bootloader_id,
+			sizeof(f34->bootloader_id));
+	if (ret < 0) {
+		dev_err(&f34->fn->dev, "%s: Failed to read bootloader ID\n",
+			__func__);
+		return ret;
+	}
+
+	if (f34->bootloader_id[1] == '5') {
+		f34->bl_version = 5;
+	} else if (f34->bootloader_id[1] == '6') {
+		f34->bl_version = 6;
+	} else if (f34->bootloader_id[1] == 7) {
+		f34->bl_version = 7;
+	} else {
+		dev_err(&f34->fn->dev, "%s: Unrecognized bootloader version\n",
+				__func__);
+		return -EINVAL;
+	}
+
+	memset(&f34->v7.blkcount, 0x00, sizeof(f34->v7.blkcount));
+	memset(&f34->v7.phyaddr, 0x00, sizeof(f34->v7.phyaddr));
+	rmi_f34v7_read_queries(f34);
+
+	f34->v7.force_update = false;
+	return 0;
+}
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index ac910f730688..64125443f8a6 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -342,7 +342,7 @@ struct rmi_driver_data {
 
 	struct rmi_function *f01_container;
 	struct rmi_function *f34_container;
-	bool f01_bootloader_mode;
+	bool bootloader_mode;
 
 	int num_of_irq_regs;
 	int irq_count;
-- 
cgit v1.2.3


From 0dde584882ade13dc9708d611fbf69b0ae8a9e48 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 2 Dec 2016 16:35:09 +0100
Subject: libceph: drop len argument of *verify_authorizer_reply()

The length of the reply is protocol-dependent - for cephx it's
ceph_x_authorize_reply.  Nothing sensible can be passed from the
messenger layer anyway.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/mds_client.c           | 4 ++--
 include/linux/ceph/auth.h      | 5 ++---
 include/linux/ceph/messenger.h | 2 +-
 net/ceph/auth.c                | 4 ++--
 net/ceph/auth_x.c              | 2 +-
 net/ceph/messenger.c           | 2 +-
 net/ceph/osd_client.c          | 4 ++--
 7 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 815acd1a56d4..bf4d3d26850c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3943,13 +3943,13 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
 }
 
 
-static int verify_authorizer_reply(struct ceph_connection *con, int len)
+static int verify_authorizer_reply(struct ceph_connection *con)
 {
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
 
-	return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len);
+	return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer);
 }
 
 static int invalidate_authorizer(struct ceph_connection *con)
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index 374bb1c4ef52..a6747789fe5c 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -64,7 +64,7 @@ struct ceph_auth_client_ops {
 	int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
 				 struct ceph_auth_handshake *auth);
 	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
-				       struct ceph_authorizer *a, size_t len);
+				       struct ceph_authorizer *a);
 	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
 				      int peer_type);
 
@@ -118,8 +118,7 @@ extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
 				       int peer_type,
 				       struct ceph_auth_handshake *a);
 extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
-					     struct ceph_authorizer *a,
-					     size_t len);
+					     struct ceph_authorizer *a);
 extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
 					    int peer_type);
 
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 8dbd7879fdc6..531f2d882bd9 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -30,7 +30,7 @@ struct ceph_connection_operations {
 	struct ceph_auth_handshake *(*get_authorizer) (
 				struct ceph_connection *con,
 			       int *proto, int force_new);
-	int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
+	int (*verify_authorizer_reply) (struct ceph_connection *con);
 	int (*invalidate_authorizer)(struct ceph_connection *con);
 
 	/* there was some error on the socket (disconnect, whatever) */
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index c822b3ae1bd3..48bb8d95195b 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -315,13 +315,13 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
 EXPORT_SYMBOL(ceph_auth_update_authorizer);
 
 int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
-				      struct ceph_authorizer *a, size_t len)
+				      struct ceph_authorizer *a)
 {
 	int ret = 0;
 
 	mutex_lock(&ac->mutex);
 	if (ac->ops && ac->ops->verify_authorizer_reply)
-		ret = ac->ops->verify_authorizer_reply(ac, a, len);
+		ret = ac->ops->verify_authorizer_reply(ac, a);
 	mutex_unlock(&ac->mutex);
 	return ret;
 }
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index b216131915e7..2034fb926670 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -623,7 +623,7 @@ static int ceph_x_update_authorizer(
 }
 
 static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
-					  struct ceph_authorizer *a, size_t len)
+					  struct ceph_authorizer *a)
 {
 	struct ceph_x_authorizer *au = (void *)a;
 	void *p = au->enc_buf;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2efb335deada..dba380429a05 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2033,7 +2033,7 @@ static int process_connect(struct ceph_connection *con)
 		 * should also define ->verify_authorizer_reply().
 		 * See get_connect_authorizer().
 		 */
-		ret = con->ops->verify_authorizer_reply(con, 0);
+		ret = con->ops->verify_authorizer_reply(con);
 		if (ret < 0) {
 			con->error_msg = "bad authorize reply";
 			return ret;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e6ae15bc41b7..5d812a26f05a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4478,13 +4478,13 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
 }
 
 
-static int verify_authorizer_reply(struct ceph_connection *con, int len)
+static int verify_authorizer_reply(struct ceph_connection *con)
 {
 	struct ceph_osd *o = con->private;
 	struct ceph_osd_client *osdc = o->o_osdc;
 	struct ceph_auth_client *ac = osdc->client->monc.auth;
 
-	return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
+	return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer);
 }
 
 static int invalidate_authorizer(struct ceph_connection *con)
-- 
cgit v1.2.3


From 34888602eb99df174e76eafeb613ad857f534ebb Mon Sep 17 00:00:00 2001
From: Jingkui Wang <jkwang@google.com>
Date: Sat, 10 Dec 2016 22:44:10 -0800
Subject: Input: drv260x - use generic device properties

Update driver drv260x to use generic device properties so that it can be
used on non-DT systems. We also remove platform data as generic device
properties work on static board code as well.

Signed-off-by: Jingkui Wang <jkwang@google.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/drv260x.c                | 83 +++++++----------------------
 include/linux/platform_data/drv260x-pdata.h | 28 ----------
 2 files changed, 19 insertions(+), 92 deletions(-)
 delete mode 100644 include/linux/platform_data/drv260x-pdata.h

(limited to 'include/linux')

diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c
index a26aa548dd66..0a2b865b1000 100644
--- a/drivers/input/misc/drv260x.c
+++ b/drivers/input/misc/drv260x.c
@@ -18,8 +18,6 @@
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/module.h>
-#include <linux/of_gpio.h>
-#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
@@ -27,7 +25,6 @@
 #include <linux/regulator/consumer.h>
 
 #include <dt-bindings/input/ti-drv260x.h>
-#include <linux/platform_data/drv260x-pdata.h>
 
 #define DRV260X_STATUS		0x0
 #define DRV260X_MODE		0x1
@@ -468,86 +465,36 @@ static const struct regmap_config drv260x_regmap_config = {
 	.cache_type = REGCACHE_NONE,
 };
 
-#ifdef CONFIG_OF
-static int drv260x_parse_dt(struct device *dev,
-			    struct drv260x_data *haptics)
-{
-	struct device_node *np = dev->of_node;
-	unsigned int voltage;
-	int error;
-
-	error = of_property_read_u32(np, "mode", &haptics->mode);
-	if (error) {
-		dev_err(dev, "%s: No entry for mode\n", __func__);
-		return error;
-	}
-
-	error = of_property_read_u32(np, "library-sel", &haptics->library);
-	if (error) {
-		dev_err(dev, "%s: No entry for library selection\n",
-			__func__);
-		return error;
-	}
-
-	error = of_property_read_u32(np, "vib-rated-mv", &voltage);
-	if (!error)
-		haptics->rated_voltage = drv260x_calculate_voltage(voltage);
-
-
-	error = of_property_read_u32(np, "vib-overdrive-mv", &voltage);
-	if (!error)
-		haptics->overdrive_voltage = drv260x_calculate_voltage(voltage);
-
-	return 0;
-}
-#else
-static inline int drv260x_parse_dt(struct device *dev,
-				   struct drv260x_data *haptics)
-{
-	dev_err(dev, "no platform data defined\n");
-
-	return -EINVAL;
-}
-#endif
-
 static int drv260x_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
-	const struct drv260x_platform_data *pdata = dev_get_platdata(&client->dev);
 	struct device *dev = &client->dev;
 	struct drv260x_data *haptics;
+	u32 voltage;
 	int error;
 
 	haptics = devm_kzalloc(dev, sizeof(*haptics), GFP_KERNEL);
 	if (!haptics)
 		return -ENOMEM;
 
-	haptics->overdrive_voltage = DRV260X_DEF_OD_CLAMP_VOLT;
-	haptics->rated_voltage = DRV260X_DEF_RATED_VOLT;
-
-	if (pdata) {
-		haptics->mode = pdata->mode;
-		haptics->library = pdata->library_selection;
-		if (pdata->vib_overdrive_voltage)
-			haptics->overdrive_voltage = drv260x_calculate_voltage(pdata->vib_overdrive_voltage);
-		if (pdata->vib_rated_voltage)
-			haptics->rated_voltage = drv260x_calculate_voltage(pdata->vib_rated_voltage);
-	} else if (client->dev.of_node) {
-		error = drv260x_parse_dt(&client->dev, haptics);
-		if (error)
-			return error;
-	} else {
-		dev_err(dev, "Platform data not set\n");
-		return -ENODEV;
+	error = device_property_read_u32(dev, "mode", &haptics->mode);
+	if (error) {
+		dev_err(dev, "Can't fetch 'mode' property: %d\n", error);
+		return error;
 	}
 
-
 	if (haptics->mode < DRV260X_LRA_MODE ||
 	    haptics->mode > DRV260X_ERM_MODE) {
 		dev_err(dev, "Vibrator mode is invalid: %i\n", haptics->mode);
 		return -EINVAL;
 	}
 
+	error = device_property_read_u32(dev, "library-sel", &haptics->library);
+	if (error) {
+		dev_err(dev, "Can't fetch 'library-sel' property: %d\n", error);
+		return error;
+	}
+
 	if (haptics->library < DRV260X_LIB_EMPTY ||
 	    haptics->library > DRV260X_ERM_LIB_F) {
 		dev_err(dev,
@@ -569,6 +516,14 @@ static int drv260x_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
+	error = device_property_read_u32(dev, "vib-rated-mv", &voltage);
+	haptics->rated_voltage = error ? DRV260X_DEF_RATED_VOLT :
+					 drv260x_calculate_voltage(voltage);
+
+	error = device_property_read_u32(dev, "vib-overdrive-mv", &voltage);
+	haptics->overdrive_voltage = error ? DRV260X_DEF_OD_CLAMP_VOLT :
+					     drv260x_calculate_voltage(voltage);
+
 	haptics->regulator = devm_regulator_get(dev, "vbat");
 	if (IS_ERR(haptics->regulator)) {
 		error = PTR_ERR(haptics->regulator);
diff --git a/include/linux/platform_data/drv260x-pdata.h b/include/linux/platform_data/drv260x-pdata.h
deleted file mode 100644
index 0a03b0944411..000000000000
--- a/include/linux/platform_data/drv260x-pdata.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Platform data for DRV260X haptics driver family
- *
- * Author: Dan Murphy <dmurphy@ti.com>
- *
- * Copyright:   (C) 2014 Texas Instruments, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#ifndef _LINUX_DRV260X_PDATA_H
-#define _LINUX_DRV260X_PDATA_H
-
-struct drv260x_platform_data {
-	u32 library_selection;
-	u32 mode;
-	u32 vib_rated_voltage;
-	u32 vib_overdrive_voltage;
-};
-
-#endif
-- 
cgit v1.2.3


From e9e427f0a14f7e4773896dd7af357819a56d097a Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 10 Nov 2016 16:02:06 +0800
Subject: ceph: check availability of mds cluster on mount

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/mds_client.c        |  19 ++++--
 fs/ceph/mdsmap.c            | 163 ++++++++++++++++++++++++++++++++++++++++++--
 fs/ceph/super.c             |  10 +++
 fs/ceph/super.h             |   1 +
 include/linux/ceph/mdsmap.h |   5 ++
 5 files changed, 187 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bf4d3d26850c..4f49253387a0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2100,17 +2100,26 @@ static int __do_request(struct ceph_mds_client *mdsc,
 		err = -EIO;
 		goto finish;
 	}
+	if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
+		if (mdsc->mdsmap_err) {
+			err = mdsc->mdsmap_err;
+			dout("do_request mdsmap err %d\n", err);
+			goto finish;
+		}
+		if (!(mdsc->fsc->mount_options->flags &
+		      CEPH_MOUNT_OPT_MOUNTWAIT) &&
+		    !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
+			err = -ENOENT;
+			pr_info("probably no mds server is up\n");
+			goto finish;
+		}
+	}
 
 	put_request_session(req);
 
 	mds = __choose_mds(mdsc, req);
 	if (mds < 0 ||
 	    ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
-		if (mdsc->mdsmap_err) {
-			err = mdsc->mdsmap_err;
-			dout("do_request mdsmap err %d\n", err);
-			goto finish;
-		}
 		dout("do_request no mds or not active, waiting for map\n");
 		list_add(&req->r_wait, &mdsc->waiting_for_map);
 		goto out;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 8c3591a7fbae..5454e2327a5f 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -42,6 +42,60 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	return i;
 }
 
+#define __decode_and_drop_type(p, end, type, bad)		\
+	do {							\
+		if (*p + sizeof(type) > end)			\
+			goto bad;				\
+		*p += sizeof(type);				\
+	} while (0)
+
+#define __decode_and_drop_set(p, end, type, bad)		\
+	do {							\
+		u32 n;						\
+		size_t need;					\
+		ceph_decode_32_safe(p, end, n, bad);		\
+		need = sizeof(type) * n;			\
+		ceph_decode_need(p, end, need, bad);		\
+		*p += need;					\
+	} while (0)
+
+#define __decode_and_drop_map(p, end, ktype, vtype, bad)	\
+	do {							\
+		u32 n;						\
+		size_t need;					\
+		ceph_decode_32_safe(p, end, n, bad);		\
+		need = (sizeof(ktype) + sizeof(vtype)) * n;	\
+		ceph_decode_need(p, end, need, bad);		\
+		*p += need;					\
+	} while (0)
+
+
+static int __decode_and_drop_compat_set(void **p, void* end)
+{
+	int i;
+	/* compat, ro_compat, incompat*/
+	for (i = 0; i < 3; i++) {
+		u32 n;
+		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
+		/* mask */
+		*p += sizeof(u64);
+		/* names (map<u64, string>) */
+		n = ceph_decode_32(p);
+		while (n-- > 0) {
+			u32 len;
+			ceph_decode_need(p, end, sizeof(u64) + sizeof(u32),
+					 bad);
+			*p += sizeof(u64);
+			len = ceph_decode_32(p);
+			ceph_decode_need(p, end, len, bad);
+			*p += len;
+		}
+	}
+	return 0;
+bad:
+	return -1;
+}
+
 /*
  * Decode an MDS map
  *
@@ -55,6 +109,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	int i, j, n;
 	int err = -EINVAL;
 	u8 mdsmap_v, mdsmap_cv;
+	u16 mdsmap_ev;
 
 	m = kzalloc(sizeof(*m), GFP_NOFS);
 	if (m == NULL)
@@ -83,7 +138,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 
 	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
 	if (m->m_info == NULL)
-		goto badmem;
+		goto nomem;
 
 	/* pick out active nodes from mds_info (state > 0) */
 	n = ceph_decode_32(p);
@@ -166,7 +221,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 			info->export_targets = kcalloc(num_export_targets,
 						       sizeof(u32), GFP_NOFS);
 			if (info->export_targets == NULL)
-				goto badmem;
+				goto nomem;
 			for (j = 0; j < num_export_targets; j++)
 				info->export_targets[j] =
 				       ceph_decode_32(&pexport_targets);
@@ -180,24 +235,104 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	m->m_num_data_pg_pools = n;
 	m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
 	if (!m->m_data_pg_pools)
-		goto badmem;
+		goto nomem;
 	ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
 	for (i = 0; i < n; i++)
 		m->m_data_pg_pools[i] = ceph_decode_64(p);
 	m->m_cas_pg_pool = ceph_decode_64(p);
+	m->m_enabled = m->m_epoch > 1;
+
+	mdsmap_ev = 1;
+	if (mdsmap_v >= 2) {
+		ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext);
+	}
+	if (mdsmap_ev >= 3) {
+		if (__decode_and_drop_compat_set(p, end) < 0)
+			goto bad_ext;
+	}
+	/* metadata_pool */
+	if (mdsmap_ev < 5) {
+		__decode_and_drop_type(p, end, u32, bad_ext);
+	} else {
+		__decode_and_drop_type(p, end, u64, bad_ext);
+	}
 
-	/* ok, we don't care about the rest. */
+	/* created + modified + tableserver */
+	__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
+	__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
+	__decode_and_drop_type(p, end, u32, bad_ext);
+
+	/* in */
+	{
+		int num_laggy = 0;
+		ceph_decode_32_safe(p, end, n, bad_ext);
+		ceph_decode_need(p, end, sizeof(u32) * n, bad_ext);
+
+		for (i = 0; i < n; i++) {
+			s32 mds = ceph_decode_32(p);
+			if (mds >= 0 && mds < m->m_max_mds) {
+				if (m->m_info[mds].laggy)
+					num_laggy++;
+			}
+		}
+		m->m_num_laggy = num_laggy;
+	}
+
+	/* inc */
+	__decode_and_drop_map(p, end, u32, u32, bad_ext);
+	/* up */
+	__decode_and_drop_map(p, end, u32, u64, bad_ext);
+	/* failed */
+	__decode_and_drop_set(p, end, u32, bad_ext);
+	/* stopped */
+	__decode_and_drop_set(p, end, u32, bad_ext);
+
+	if (mdsmap_ev >= 4) {
+		/* last_failure_osd_epoch */
+		__decode_and_drop_type(p, end, u32, bad_ext);
+	}
+	if (mdsmap_ev >= 6) {
+		/* ever_allowed_snaps */
+		__decode_and_drop_type(p, end, u8, bad_ext);
+		/* explicitly_allowed_snaps */
+		__decode_and_drop_type(p, end, u8, bad_ext);
+	}
+	if (mdsmap_ev >= 7) {
+		/* inline_data_enabled */
+		__decode_and_drop_type(p, end, u8, bad_ext);
+	}
+	if (mdsmap_ev >= 8) {
+		u32 name_len;
+		/* enabled */
+		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
+		ceph_decode_32_safe(p, end, name_len, bad_ext);
+		ceph_decode_need(p, end, name_len, bad_ext);
+		*p += name_len;
+	}
+	/* damaged */
+	if (mdsmap_ev >= 9) {
+		size_t need;
+		ceph_decode_32_safe(p, end, n, bad_ext);
+		need = sizeof(u32) * n;
+		ceph_decode_need(p, end, need, bad_ext);
+		*p += need;
+		m->m_damaged = n > 0;
+	} else {
+		m->m_damaged = false;
+	}
+bad_ext:
 	*p = end;
 	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
 	return m;
-
-badmem:
+nomem:
 	err = -ENOMEM;
+	goto out_err;
 bad:
 	pr_err("corrupt mdsmap\n");
 	print_hex_dump(KERN_DEBUG, "mdsmap: ",
 		       DUMP_PREFIX_OFFSET, 16, 1,
 		       start, end - start, true);
+out_err:
 	ceph_mdsmap_destroy(m);
 	return ERR_PTR(err);
 }
@@ -212,3 +347,19 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
 	kfree(m->m_data_pg_pools);
 	kfree(m);
 }
+
+bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
+{
+	int i, nr_active = 0;
+	if (!m->m_enabled)
+		return false;
+	if (m->m_damaged)
+		return false;
+	if (m->m_num_laggy > 0)
+		return false;
+	for (i = 0; i < m->m_max_mds; i++) {
+		if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
+			nr_active++;
+	}
+	return nr_active > 0;
+}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b382e5910eea..537f96631785 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -137,6 +137,8 @@ enum {
 	Opt_nofscache,
 	Opt_poolperm,
 	Opt_nopoolperm,
+	Opt_require_active_mds,
+	Opt_norequire_active_mds,
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	Opt_acl,
 #endif
@@ -171,6 +173,8 @@ static match_table_t fsopt_tokens = {
 	{Opt_nofscache, "nofsc"},
 	{Opt_poolperm, "poolperm"},
 	{Opt_nopoolperm, "nopoolperm"},
+	{Opt_require_active_mds, "require_active_mds"},
+	{Opt_norequire_active_mds, "norequire_active_mds"},
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	{Opt_acl, "acl"},
 #endif
@@ -287,6 +291,12 @@ static int parse_fsopt_token(char *c, void *private)
 	case Opt_nopoolperm:
 		fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
 		break;
+	case Opt_require_active_mds:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
+		break;
+	case Opt_norequire_active_mds:
+		fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
+		break;
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	case Opt_acl:
 		fsopt->sb_flags |= MS_POSIXACL;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 622d5dd9f616..b07f55e55f60 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,6 +36,7 @@
 #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
 #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
 #define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
+#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
 
 #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
 
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 87ed09f54800..8ed5dc505fbb 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -31,6 +31,10 @@ struct ceph_mdsmap {
 	int m_num_data_pg_pools;
 	u64 *m_data_pg_pools;
 	u64 m_cas_pg_pool;
+
+	bool m_enabled;
+	bool m_damaged;
+	int m_num_laggy;
 };
 
 static inline struct ceph_entity_addr *
@@ -59,5 +63,6 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
 extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
 extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
 extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
+extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
 
 #endif
-- 
cgit v1.2.3


From 1e4ef0c6332bd90e6c70afc07b35dffaf1eab1a7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 10 Nov 2016 07:42:06 -0500
Subject: ceph: add flags parameter to send_cap_msg

Add a flags parameter to send_cap_msg, so we can request expedited
service from the MDS when we know we'll be waiting on the result.

Set that flag in the case of try_flush_caps. The callers of that
function generally wait synchronously on the result, so it's beneficial
to ask the server to expedite it.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/caps.c               | 25 +++++++++++++++----------
 include/linux/ceph/ceph_fs.h |  3 +++
 2 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 508663ed413c..b54af160526e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -996,6 +996,7 @@ struct cap_msg_args {
 	struct timespec		atime, mtime, ctime;
 	int			op, caps, wanted, dirty;
 	u32			seq, issue_seq, mseq, time_warp_seq;
+	u32			flags;
 	kuid_t			uid;
 	kgid_t			gid;
 	umode_t			mode;
@@ -1104,7 +1105,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
 	ceph_encode_64(&p, 0);
 
 	/* Advisory flags (version 10) */
-	ceph_encode_32(&p, 0);
+	ceph_encode_32(&p, arg->flags);
 
 	ceph_con_send(&arg->session->s_con, msg);
 	return 0;
@@ -1145,8 +1146,8 @@ void ceph_queue_caps_release(struct inode *inode)
  * caller should hold snap_rwsem (read), s_mutex.
  */
 static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
-		      int op, int used, int want, int retain, int flushing,
-		      u64 flush_tid, u64 oldest_flush_tid)
+		      int op, bool sync, int used, int want, int retain,
+		      int flushing, u64 flush_tid, u64 oldest_flush_tid)
 	__releases(cap->ci->i_ceph_lock)
 {
 	struct ceph_inode_info *ci = cap->ci;
@@ -1235,6 +1236,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	arg.mode = inode->i_mode;
 
 	arg.inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+	arg.flags = 0;
+	if (sync)
+		arg.flags |= CEPH_CLIENT_CAPS_SYNC;
 
 	spin_unlock(&ci->i_ceph_lock);
 
@@ -1288,6 +1292,7 @@ static inline int __send_flush_snap(struct inode *inode,
 	arg.mode = capsnap->mode;
 
 	arg.inline_data = capsnap->inline_data;
+	arg.flags = 0;
 
 	return send_cap_msg(&arg);
 }
@@ -1912,9 +1917,9 @@ ack:
 		sent++;
 
 		/* __send_cap drops i_ceph_lock */
-		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
-				      want, retain, flushing,
-				      flush_tid, oldest_flush_tid);
+		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, false,
+				cap_used, want, retain, flushing,
+				flush_tid, oldest_flush_tid);
 		goto retry; /* retake i_ceph_lock and restart our cap scan. */
 	}
 
@@ -1978,9 +1983,9 @@ retry:
 						&flush_tid, &oldest_flush_tid);
 
 		/* __send_cap drops i_ceph_lock */
-		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
-				     (cap->issued | cap->implemented),
-				     flushing, flush_tid, oldest_flush_tid);
+		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, true,
+				used, want, (cap->issued | cap->implemented),
+				flushing, flush_tid, oldest_flush_tid);
 
 		if (delayed) {
 			spin_lock(&ci->i_ceph_lock);
@@ -2173,7 +2178,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 			     inode, cap, cf->tid, ceph_cap_string(cf->caps));
 			ci->i_ceph_flags |= CEPH_I_NODELAY;
 			ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
-					  __ceph_caps_used(ci),
+					  false, __ceph_caps_used(ci),
 					  __ceph_caps_wanted(ci),
 					  cap->issued | cap->implemented,
 					  cf->caps, cf->tid, oldest_flush_tid);
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index f96de8de4fa7..f4b2ee18f38c 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -653,6 +653,9 @@ enum {
 
 extern const char *ceph_cap_op_name(int op);
 
+/* flags field in client cap messages (version >= 10) */
+#define CEPH_CLIENT_CAPS_SYNC	(0x1)
+
 /*
  * caps message, used for capability callbacks, acks, requests, etc.
  */
-- 
cgit v1.2.3


From c0b942a76361e08fc9fb17989e0f266e64ff0688 Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Mon, 12 Dec 2016 16:40:39 -0800
Subject: kthread: add __printf attributes

When commit fbae2d44aa1d ("kthread: add kthread_create_worker*()")
introduced some kthread_create_...() functions which were taking
printf-like parametter, it introduced __printf attributes to some
functions (e.g.  kthread_create_worker()).  Nevertheless some new
functions were forgotten (they have been detected thanks to
-Wmissing-format-attribute warning flag).

Add the missing __printf attributes to the newly-introduced functions in
order to detect formatting issues at build-time with -Wformat flag.

Link: http://lkml.kernel.org/r/20161126193543.22672-1-nicolas.iooss_linux@m4x.org
Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 2 +-
 kernel/kthread.c        | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c1c3e63d52c1..4fec8b775895 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -175,7 +175,7 @@ __printf(2, 3)
 struct kthread_worker *
 kthread_create_worker(unsigned int flags, const char namefmt[], ...);
 
-struct kthread_worker *
+__printf(3, 4) struct kthread_worker *
 kthread_create_worker_on_cpu(int cpu, unsigned int flags,
 			     const char namefmt[], ...);
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 956495f0efaf..2318fba86277 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -261,7 +261,8 @@ static void create_kthread(struct kthread_create_info *create)
 	}
 }
 
-static struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
+static __printf(4, 0)
+struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
 						    void *data, int node,
 						    const char namefmt[],
 						    va_list args)
@@ -635,7 +636,7 @@ repeat:
 }
 EXPORT_SYMBOL_GPL(kthread_worker_fn);
 
-static struct kthread_worker *
+static __printf(3, 0) struct kthread_worker *
 __kthread_create_worker(int cpu, unsigned int flags,
 			const char namefmt[], va_list args)
 {
-- 
cgit v1.2.3


From 3fb4afd9a504c2386b8435028d43283216bf588e Mon Sep 17 00:00:00 2001
From: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
Date: Mon, 12 Dec 2016 16:40:42 -0800
Subject: prctl: remove one-shot limitation for changing exe link

This limitation came with the reason to remove "another way for
malicious code to obscure a compromised program and masquerade as a
benign process" by allowing "security-concious program can use this
prctl once during its early initialization to ensure the prctl cannot
later be abused for this purpose":

    http://marc.info/?l=linux-kernel&m=133160684517468&w=2

This explanation doesn't look sufficient.  The only thing "exe" link is
indicating is the file, used to execve, which is basically nothing and
not reliable immediately after process has returned from execve system
call.

Moreover, to use this feture, all the mappings to previous exe file have
to be unmapped and all the new exe file permissions must be satisfied.

Which means, that changing exe link is very similar to calling execve on
the binary.

The need to remove this limitations comes from migration of NFS mount
point, which is not accessible during restore and replaced by other file
system.  Because of this exe link has to be changed twice.

[akpm@linux-foundation.org: fix up comment]
Link: http://lkml.kernel.org/r/20160927153755.9337.69650.stgit@localhost.localdomain
Signed-off-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  6 +++++-
 kernel/sys.c          | 10 ----------
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7551d3e2ab70..0e90f2973719 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -540,7 +540,11 @@ static inline int get_dumpable(struct mm_struct *mm)
 					/* leave room for more dump flags */
 #define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
 #define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
-#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
+/*
+ * This one-shot flag is dropped due to necessity of changing exe once again
+ * on NFS restore
+ */
+//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
 
 #define MMF_HAS_UPROBES		19	/* has uprobes */
 #define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
diff --git a/kernel/sys.c b/kernel/sys.c
index 89d5be418157..fd6f50809b6e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1696,16 +1696,6 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 		fput(exe_file);
 	}
 
-	/*
-	 * The symlink can be changed only once, just to disallow arbitrary
-	 * transitions malicious software might bring in. This means one
-	 * could make a snapshot over all processes running and monitor
-	 * /proc/pid/exe changes to notice unusual activity if needed.
-	 */
-	err = -EPERM;
-	if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
-		goto exit;
-
 	err = 0;
 	/* set the new file, lockless */
 	get_file(exe.file);
-- 
cgit v1.2.3


From fd60775aea802beef444881ddfa111a4b73b1bbc Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 12 Dec 2016 16:42:20 -0800
Subject: mm, thp: avoid unlikely branches for split_huge_pmd

While doing MADV_DONTNEED on a large area of thp memory, I noticed we
encountered many unlikely() branches in profiles for each backing
hugepage.  This is because zap_pmd_range() would call split_huge_pmd(),
which rechecked the conditions that were already validated, but as part
of an unlikely() branch.

Avoid the unlikely() branch when in a context where pmd is known to be
good for __split_huge_pmd() directly.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1610181600300.84525@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 2 ++
 mm/memory.c             | 4 ++--
 mm/mempolicy.c          | 2 +-
 mm/mprotect.c           | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e35e6de633b9..1f782aa1d8e6 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -189,6 +189,8 @@ static inline void deferred_split_huge_page(struct page *page) {}
 #define split_huge_pmd(__vma, __pmd, __address)	\
 	do { } while (0)
 
+static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+		unsigned long address, bool freeze, struct page *page) {}
 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
 		unsigned long address, bool freeze, struct page *page) {}
 
diff --git a/mm/memory.c b/mm/memory.c
index 33f45edf8272..d86b7b4afd7d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1240,7 +1240,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 			if (next - addr != HPAGE_PMD_SIZE) {
 				VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
 				    !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
-				split_huge_pmd(vma, pmd, addr);
+				__split_huge_pmd(vma, pmd, addr, false, NULL);
 			} else if (zap_huge_pmd(tlb, vma, pmd, addr))
 				goto next;
 			/* fall through */
@@ -3454,7 +3454,7 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
 
 	/* COW handled on pte level: split pmd */
 	VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma);
-	split_huge_pmd(fe->vma, fe->pmd, fe->address);
+	__split_huge_pmd(fe->vma, fe->pmd, fe->address, false, NULL);
 
 	return VM_FAULT_FALLBACK;
 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0b859af06b87..a6a27e5d6b14 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -496,7 +496,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 			page = pmd_page(*pmd);
 			if (is_huge_zero_page(page)) {
 				spin_unlock(ptl);
-				split_huge_pmd(vma, pmd, addr);
+				__split_huge_pmd(vma, pmd, addr, false, NULL);
 			} else {
 				get_page(page);
 				spin_unlock(ptl);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 05a02b72c98d..c5ba2aae0f54 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -176,7 +176,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 
 		if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
 			if (next - addr != HPAGE_PMD_SIZE) {
-				split_huge_pmd(vma, pmd, addr);
+				__split_huge_pmd(vma, pmd, addr, false, NULL);
 				if (pmd_trans_unstable(pmd))
 					continue;
 			} else {
-- 
cgit v1.2.3


From 41a9ada3e6b4253f1a3ce42699c6aaeb8584306c Mon Sep 17 00:00:00 2001
From: Reza Arbab <arbab@linux.vnet.ibm.com>
Date: Mon, 12 Dec 2016 16:43:02 -0800
Subject: of/fdt: mark hotpluggable memory

When movable nodes are enabled, any node containing only hotpluggable
memory is made movable at boot time.

On x86, hotpluggable memory is discovered by parsing the ACPI SRAT,
making corresponding calls to memblock_mark_hotplug().

If we introduce a dt property to describe memory as hotpluggable,
configs supporting early fdt may then also do this marking and use
movable nodes.

Link: http://lkml.kernel.org/r/1479160961-25840-5-git-send-email-arbab@linux.vnet.ibm.com
Signed-off-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Tested-by: Balbir Singh <bsingharora@gmail.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alistair Popple <apopple@au1.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Stewart Smith <stewart@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/of/fdt.c       | 19 +++++++++++++++++++
 include/linux/of_fdt.h |  1 +
 mm/Kconfig             |  2 +-
 3 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index c89d5d231a0e..c9b5cac03b36 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1015,6 +1015,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 	const __be32 *reg, *endp;
 	int l;
+	bool hotpluggable;
 
 	/* We are scanning "memory" nodes only */
 	if (type == NULL) {
@@ -1034,6 +1035,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 		return 0;
 
 	endp = reg + (l / sizeof(__be32));
+	hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);
 
 	pr_debug("memory scan node %s, reg size %d,\n", uname, l);
 
@@ -1049,6 +1051,13 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 		    (unsigned long long)size);
 
 		early_init_dt_add_memory_arch(base, size);
+
+		if (!hotpluggable)
+			continue;
+
+		if (early_init_dt_mark_hotplug_memory_arch(base, size))
+			pr_warn("failed to mark hotplug range 0x%llx - 0x%llx\n",
+				base, base + size);
 	}
 
 	return 0;
@@ -1146,6 +1155,11 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
 	memblock_add(base, size);
 }
 
+int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size)
+{
+	return memblock_mark_hotplug(base, size);
+}
+
 int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
 					phys_addr_t size, bool nomap)
 {
@@ -1168,6 +1182,11 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
 	WARN_ON(1);
 }
 
+int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size)
+{
+	return -ENOSYS;
+}
+
 int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
 					phys_addr_t size, bool nomap)
 {
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 4341f32516d8..271b3fdf0070 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -71,6 +71,7 @@ extern int early_init_dt_scan_chosen_stdout(void);
 extern void early_init_fdt_scan_reserved_mem(void);
 extern void early_init_fdt_reserve_self(void);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
+extern int early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size);
 extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size,
 					     bool no_map);
 extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
diff --git a/mm/Kconfig b/mm/Kconfig
index 061b46b18029..33a9b06ec618 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -153,7 +153,7 @@ config MOVABLE_NODE
 	bool "Enable to assign a node which has only movable memory"
 	depends on HAVE_MEMBLOCK
 	depends on NO_BOOTMEM
-	depends on X86_64 || MEMORY_HOTPLUG
+	depends on X86_64 || OF_EARLY_FLATTREE || MEMORY_HOTPLUG
 	depends on NUMA
 	default n
 	help
-- 
cgit v1.2.3


From c1ef8e2c0235bffe4b0505c3325bb8a6af954021 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 12 Dec 2016 16:43:12 -0800
Subject: mm: disable numa migration faults for dax vmas

Mark dax vmas as not migratable to exclude them from task_numa_work().
This is especially relevant for device-dax which wants to ensure
predictable access latency and not incur periodic faults.

[akpm@linux-foundation.org: add comment]
Link: http://lkml.kernel.org/r/147892450132.22062.16875659431109209179.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5e5b2969d931..5f4d8281832b 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -7,6 +7,7 @@
 
 
 #include <linux/mmzone.h>
+#include <linux/dax.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
@@ -177,6 +178,13 @@ static inline bool vma_migratable(struct vm_area_struct *vma)
 	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
 		return false;
 
+	/*
+	 * DAX device mappings require predictable access latency, so avoid
+	 * incurring periodic faults.
+	 */
+	if (vma_is_dax(vma))
+		return false;
+
 #ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
 	if (vma->vm_flags & VM_HUGETLB)
 		return false;
-- 
cgit v1.2.3


From d5e6eff265fe7537fa494e6ab125747813be76a0 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 12 Dec 2016 16:43:15 -0800
Subject: mm: cma: make linux/cma.h standalone includible

The header uses types and definitions from the linux/init.h as well as
linux/types.h headers without explicitly including them.  This causes a
failure to compile if they are not implicitly pulled in by includers.

Link: http://lkml.kernel.org/r/20161115133235.13387-1-thierry.reding@gmail.com
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cma.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 29f9e774ab76..6f0a91b37f68 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -1,6 +1,9 @@
 #ifndef __CMA_H__
 #define __CMA_H__
 
+#include <linux/init.h>
+#include <linux/types.h>
+
 /*
  * There is always at least global CMA area and a few optional
  * areas configured in kernel .config.
-- 
cgit v1.2.3


From 9491ae4aade6814afcfa67f4eb3e3342c2b39750 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Mon, 12 Dec 2016 16:43:26 -0800
Subject: mm: don't cap request size based on read-ahead setting

We ran into a funky issue, where someone doing 256K buffered reads saw
128K requests at the device level.  Turns out it is read-ahead capping
the request size, since we use 128K as the default setting.  This
doesn't make a lot of sense - if someone is issuing 256K reads, they
should see 256K reads, regardless of the read-ahead setting, if the
underlying device can support a 256K read in a single command.

This patch introduces a bdi hint, io_pages.  This is the soft max IO
size for the lower level, I've hooked it up to the bdev settings here.
Read-ahead is modified to issue the maximum of the user request size,
and the read-ahead max size, but capped to the max request size on the
device side.  The latter is done to avoid reading ahead too much, if the
application asks for a huge read.  With this patch, the kernel behaves
like the application expects.

Link: http://lkml.kernel.org/r/1479498073-8657-1-git-send-email-axboe@fb.com
Signed-off-by: Jens Axboe <axboe@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/blk-settings.c             |  1 +
 block/blk-sysfs.c                |  1 +
 include/linux/backing-dev-defs.h |  1 +
 mm/readahead.c                   | 39 ++++++++++++++++++++++++++++-----------
 4 files changed, 31 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index f679ae122843..65f16cf4f850 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -249,6 +249,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 	max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
 	max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
 	limits->max_sectors = max_sectors;
+	q->backing_dev_info.io_pages = max_sectors >> (PAGE_SHIFT - 9);
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9cc8d7c5439a..ea374e820775 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -212,6 +212,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 
 	spin_lock_irq(q->queue_lock);
 	q->limits.max_sectors = max_sectors_kb << 1;
+	q->backing_dev_info.io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
 	spin_unlock_irq(q->queue_lock);
 
 	return ret;
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index c357f27d5483..b8144b2d59ce 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -136,6 +136,7 @@ struct bdi_writeback {
 struct backing_dev_info {
 	struct list_head bdi_list;
 	unsigned long ra_pages;	/* max readahead in PAGE_SIZE units */
+	unsigned long io_pages;	/* max allowed IO size */
 	unsigned int capabilities; /* Device capabilities */
 	congested_fn *congested_fn; /* Function pointer if device is md/dm */
 	void *congested_data;	/* Pointer to aux data for congested func */
diff --git a/mm/readahead.c b/mm/readahead.c
index c8a955b1297e..c4ca70239233 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -207,12 +207,21 @@ out:
  * memory at once.
  */
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
-		pgoff_t offset, unsigned long nr_to_read)
+			       pgoff_t offset, unsigned long nr_to_read)
 {
+	struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
+	struct file_ra_state *ra = &filp->f_ra;
+	unsigned long max_pages;
+
 	if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
 		return -EINVAL;
 
-	nr_to_read = min(nr_to_read, inode_to_bdi(mapping->host)->ra_pages);
+	/*
+	 * If the request exceeds the readahead window, allow the read to
+	 * be up to the optimal hardware IO size
+	 */
+	max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
+	nr_to_read = min(nr_to_read, max_pages);
 	while (nr_to_read) {
 		int err;
 
@@ -369,9 +378,17 @@ ondemand_readahead(struct address_space *mapping,
 		   bool hit_readahead_marker, pgoff_t offset,
 		   unsigned long req_size)
 {
-	unsigned long max = ra->ra_pages;
+	struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
+	unsigned long max_pages = ra->ra_pages;
 	pgoff_t prev_offset;
 
+	/*
+	 * If the request exceeds the readahead window, allow the read to
+	 * be up to the optimal hardware IO size
+	 */
+	if (req_size > max_pages && bdi->io_pages > max_pages)
+		max_pages = min(req_size, bdi->io_pages);
+
 	/*
 	 * start of file
 	 */
@@ -385,7 +402,7 @@ ondemand_readahead(struct address_space *mapping,
 	if ((offset == (ra->start + ra->size - ra->async_size) ||
 	     offset == (ra->start + ra->size))) {
 		ra->start += ra->size;
-		ra->size = get_next_ra_size(ra, max);
+		ra->size = get_next_ra_size(ra, max_pages);
 		ra->async_size = ra->size;
 		goto readit;
 	}
@@ -400,16 +417,16 @@ ondemand_readahead(struct address_space *mapping,
 		pgoff_t start;
 
 		rcu_read_lock();
-		start = page_cache_next_hole(mapping, offset + 1, max);
+		start = page_cache_next_hole(mapping, offset + 1, max_pages);
 		rcu_read_unlock();
 
-		if (!start || start - offset > max)
+		if (!start || start - offset > max_pages)
 			return 0;
 
 		ra->start = start;
 		ra->size = start - offset;	/* old async_size */
 		ra->size += req_size;
-		ra->size = get_next_ra_size(ra, max);
+		ra->size = get_next_ra_size(ra, max_pages);
 		ra->async_size = ra->size;
 		goto readit;
 	}
@@ -417,7 +434,7 @@ ondemand_readahead(struct address_space *mapping,
 	/*
 	 * oversize read
 	 */
-	if (req_size > max)
+	if (req_size > max_pages)
 		goto initial_readahead;
 
 	/*
@@ -433,7 +450,7 @@ ondemand_readahead(struct address_space *mapping,
 	 * Query the page cache and look for the traces(cached history pages)
 	 * that a sequential stream would leave behind.
 	 */
-	if (try_context_readahead(mapping, ra, offset, req_size, max))
+	if (try_context_readahead(mapping, ra, offset, req_size, max_pages))
 		goto readit;
 
 	/*
@@ -444,7 +461,7 @@ ondemand_readahead(struct address_space *mapping,
 
 initial_readahead:
 	ra->start = offset;
-	ra->size = get_init_ra_size(req_size, max);
+	ra->size = get_init_ra_size(req_size, max_pages);
 	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
 
 readit:
@@ -454,7 +471,7 @@ readit:
 	 * the resulted next readahead window into the current one.
 	 */
 	if (offset == ra->start && ra->size == ra->async_size) {
-		ra->async_size = get_next_ra_size(ra, max);
+		ra->async_size = get_next_ra_size(ra, max_pages);
 		ra->size += ra->async_size;
 	}
 
-- 
cgit v1.2.3


From 8db378a570330fa0aaa9d75299fe264e4a5b6348 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 12 Dec 2016 16:43:29 -0800
Subject: include/linux/backing-dev-defs.h: shrink struct backing_dev_info

Move the 4-byte `capabilities' field next to other 4-byte things.
Shrinks sizeof(backing_dev_info) by 8 bytes on x86_64.

Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index b8144b2d59ce..0b5b1af35e5e 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -137,12 +137,12 @@ struct backing_dev_info {
 	struct list_head bdi_list;
 	unsigned long ra_pages;	/* max readahead in PAGE_SIZE units */
 	unsigned long io_pages;	/* max allowed IO size */
-	unsigned int capabilities; /* Device capabilities */
 	congested_fn *congested_fn; /* Function pointer if device is md/dm */
 	void *congested_data;	/* Pointer to aux data for congested func */
 
 	char *name;
 
+	unsigned int capabilities; /* Device capabilities */
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
 
-- 
cgit v1.2.3


From f7942430e40f14c6d2ca48a1875add509938c07d Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 12 Dec 2016 16:43:41 -0800
Subject: lib: radix-tree: native accounting of exceptional entries

The way the page cache is sneaking shadow entries of evicted pages into
the radix tree past the node entry accounting and tracking them manually
in the upper bits of node->count is fraught with problems.

These shadow entries are marked in the tree as exceptional entries,
which are a native concept to the radix tree.  Maintain an explicit
counter of exceptional entries in the radix tree node.  Subsequent
patches will switch shadow entry tracking over to that counter.

DAX and shmem are the other users of exceptional entries.  Since slot
replacements that change the entry type from regular to exceptional must
now be accounted, introduce a __radix_tree_replace() function that does
replacement and accounting, and switch DAX and shmem over.

The increase in radix tree node size is temporary.  A followup patch
switches the shadow tracking to this new scheme and we'll no longer need
the upper bits in node->count and shrink that back to one byte.

Link: http://lkml.kernel.org/r/20161117192945.GA23430@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c                   |  5 +++--
 include/linux/radix-tree.h | 10 +++++++---
 lib/radix-tree.c           | 46 +++++++++++++++++++++++++++++++++++++++++++---
 mm/shmem.c                 |  8 ++++----
 4 files changed, 57 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 014defd2e744..db78bae0dc0f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -643,12 +643,13 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 		}
 		mapping->nrexceptional++;
 	} else {
+		struct radix_tree_node *node;
 		void **slot;
 		void *ret;
 
-		ret = __radix_tree_lookup(page_tree, index, NULL, &slot);
+		ret = __radix_tree_lookup(page_tree, index, &node, &slot);
 		WARN_ON_ONCE(ret != entry);
-		radix_tree_replace_slot(slot, new_entry);
+		__radix_tree_replace(page_tree, node, slot, new_entry);
 	}
 	if (vmf->flags & FAULT_FLAG_WRITE)
 		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index af3581b8a451..7ced8a70cc8b 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -85,9 +85,10 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 #define RADIX_TREE_COUNT_MASK	((1UL << RADIX_TREE_COUNT_SHIFT) - 1)
 
 struct radix_tree_node {
-	unsigned char	shift;	/* Bits remaining in each slot */
-	unsigned char	offset;	/* Slot offset in parent */
-	unsigned int	count;
+	unsigned char	shift;		/* Bits remaining in each slot */
+	unsigned char	offset;		/* Slot offset in parent */
+	unsigned int	count;		/* Total entry count */
+	unsigned char	exceptional;	/* Exceptional entry count */
 	union {
 		struct {
 			/* Used when ascending tree */
@@ -276,6 +277,9 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
 			  struct radix_tree_node **nodep, void ***slotp);
 void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
 void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void __radix_tree_replace(struct radix_tree_root *root,
+			  struct radix_tree_node *node,
+			  void **slot, void *item);
 bool __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e6d552c40dd..7885796d35ae 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -220,10 +220,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 {
 	unsigned long i;
 
-	pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n",
+	pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d exceptional %d parent %p\n",
 		node, node->offset,
 		node->tags[0][0], node->tags[1][0], node->tags[2][0],
-		node->shift, node->count, node->parent);
+		node->shift, node->count, node->exceptional, node->parent);
 
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		unsigned long first = index | (i << node->shift);
@@ -522,8 +522,13 @@ static int radix_tree_extend(struct radix_tree_root *root,
 		node->offset = 0;
 		node->count = 1;
 		node->parent = NULL;
-		if (radix_tree_is_internal_node(slot))
+		if (radix_tree_is_internal_node(slot)) {
 			entry_to_node(slot)->parent = node;
+		} else {
+			/* Moving an exceptional root->rnode to a node */
+			if (radix_tree_exceptional_entry(slot))
+				node->exceptional = 1;
+		}
 		node->slots[0] = slot;
 		slot = node_to_entry(node);
 		rcu_assign_pointer(root->rnode, slot);
@@ -649,6 +654,8 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
 	if (node) {
 		unsigned offset = get_slot_offset(node, slot);
 		node->count++;
+		if (radix_tree_exceptional_entry(item))
+			node->exceptional++;
 		BUG_ON(tag_get(node, 0, offset));
 		BUG_ON(tag_get(node, 1, offset));
 		BUG_ON(tag_get(node, 2, offset));
@@ -746,6 +753,37 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
+/**
+ * __radix_tree_replace		- replace item in a slot
+ * @root:	radix tree root
+ * @node:	pointer to tree node
+ * @slot:	pointer to slot in @node
+ * @item:	new item to store in the slot.
+ *
+ * For use with __radix_tree_lookup().  Caller must hold tree write locked
+ * across slot lookup and replacement.
+ */
+void __radix_tree_replace(struct radix_tree_root *root,
+			  struct radix_tree_node *node,
+			  void **slot, void *item)
+{
+	void *old = rcu_dereference_raw(*slot);
+	int exceptional;
+
+	WARN_ON_ONCE(radix_tree_is_internal_node(item));
+	WARN_ON_ONCE(!!item - !!old);
+
+	exceptional = !!radix_tree_exceptional_entry(item) -
+		      !!radix_tree_exceptional_entry(old);
+
+	WARN_ON_ONCE(exceptional && !node && slot != (void **)&root->rnode);
+
+	if (node)
+		node->exceptional += exceptional;
+
+	rcu_assign_pointer(*slot, item);
+}
+
 /**
  *	radix_tree_tag_set - set a tag on a radix tree node
  *	@root:		radix tree root
@@ -1561,6 +1599,8 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 	delete_sibling_entries(node, node_to_entry(slot), offset);
 	node->slots[offset] = NULL;
 	node->count--;
+	if (radix_tree_exceptional_entry(entry))
+		node->exceptional--;
 
 	__radix_tree_delete_node(root, node);
 
diff --git a/mm/shmem.c b/mm/shmem.c
index ec7aa562343e..3149ddee8f55 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -300,18 +300,18 @@ void shmem_uncharge(struct inode *inode, long pages)
 static int shmem_radix_tree_replace(struct address_space *mapping,
 			pgoff_t index, void *expected, void *replacement)
 {
+	struct radix_tree_node *node;
 	void **pslot;
 	void *item;
 
 	VM_BUG_ON(!expected);
 	VM_BUG_ON(!replacement);
-	pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
-	if (!pslot)
+	item = __radix_tree_lookup(&mapping->page_tree, index, &node, &pslot);
+	if (!item)
 		return -ENOENT;
-	item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
 	if (item != expected)
 		return -ENOENT;
-	radix_tree_replace_slot(pslot, replacement);
+	__radix_tree_replace(&mapping->page_tree, node, pslot, replacement);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6d75f366b9242f9b17ed7d0b0604d7460f818f21 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 12 Dec 2016 16:43:43 -0800
Subject: lib: radix-tree: check accounting of existing slot replacement users

The bug in khugepaged fixed earlier in this series shows that radix tree
slot replacement is fragile; and it will become more so when not only
NULL<->!NULL transitions need to be caught but transitions from and to
exceptional entries as well.  We need checks.

Re-implement radix_tree_replace_slot() on top of the sanity-checked
__radix_tree_replace().  This requires existing callers to also pass the
radix tree root, but it'll warn us when somebody replaces slots with
contents that need proper accounting (transitions between NULL entries,
real entries, exceptional entries) and where a replacement through the
slot pointer would corrupt the radix tree node counts.

Link: http://lkml.kernel.org/r/20161117193021.GB23430@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/mm/gmap.c                   |  2 +-
 drivers/sh/intc/virq.c                |  2 +-
 fs/dax.c                              |  4 +--
 include/linux/radix-tree.h            | 16 ++-------
 lib/radix-tree.c                      | 63 +++++++++++++++++++++++++++--------
 mm/filemap.c                          |  4 +--
 mm/khugepaged.c                       |  5 +--
 mm/migrate.c                          |  4 +--
 mm/truncate.c                         |  2 +-
 tools/testing/radix-tree/multiorder.c |  2 +-
 10 files changed, 64 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 3ba622702ce4..ec1f0dedb948 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1015,7 +1015,7 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
 	if (slot) {
 		rmap->next = radix_tree_deref_slot_protected(slot,
 							&sg->guest_table_lock);
-		radix_tree_replace_slot(slot, rmap);
+		radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
 	} else {
 		rmap->next = NULL;
 		radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
index e7899624aa0b..35bbe288ddb4 100644
--- a/drivers/sh/intc/virq.c
+++ b/drivers/sh/intc/virq.c
@@ -254,7 +254,7 @@ restart:
 
 		radix_tree_tag_clear(&d->tree, entry->enum_id,
 				     INTC_TAG_VIRQ_NEEDS_ALLOC);
-		radix_tree_replace_slot((void **)entries[i],
+		radix_tree_replace_slot(&d->tree, (void **)entries[i],
 					&intc_irq_xlate[irq]);
 	}
 
diff --git a/fs/dax.c b/fs/dax.c
index db78bae0dc0f..85930c2a2749 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
 		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
 
 	entry |= RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(slot, (void *)entry);
+	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
 	return (void *)entry;
 }
 
@@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
 		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
 
 	entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(slot, (void *)entry);
+	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
 	return (void *)entry;
 }
 
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7ced8a70cc8b..2d1b9b8be983 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -249,20 +249,6 @@ static inline int radix_tree_exception(void *arg)
 	return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-/**
- * radix_tree_replace_slot	- replace item in a slot
- * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
- * @item:	new item to store in the slot.
- *
- * For use with radix_tree_lookup_slot().  Caller must hold tree write locked
- * across slot lookup and replacement.
- */
-static inline void radix_tree_replace_slot(void **pslot, void *item)
-{
-	BUG_ON(radix_tree_is_internal_node(item));
-	rcu_assign_pointer(*pslot, item);
-}
-
 int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 			unsigned order, struct radix_tree_node **nodep,
 			void ***slotp);
@@ -280,6 +266,8 @@ void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
 			  void **slot, void *item);
+void radix_tree_replace_slot(struct radix_tree_root *root,
+			     void **slot, void *item);
 bool __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7885796d35ae..f91d5b0af654 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -753,19 +753,10 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
-/**
- * __radix_tree_replace		- replace item in a slot
- * @root:	radix tree root
- * @node:	pointer to tree node
- * @slot:	pointer to slot in @node
- * @item:	new item to store in the slot.
- *
- * For use with __radix_tree_lookup().  Caller must hold tree write locked
- * across slot lookup and replacement.
- */
-void __radix_tree_replace(struct radix_tree_root *root,
-			  struct radix_tree_node *node,
-			  void **slot, void *item)
+static void replace_slot(struct radix_tree_root *root,
+			 struct radix_tree_node *node,
+			 void **slot, void *item,
+			 bool warn_typeswitch)
 {
 	void *old = rcu_dereference_raw(*slot);
 	int exceptional;
@@ -776,7 +767,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 	exceptional = !!radix_tree_exceptional_entry(item) -
 		      !!radix_tree_exceptional_entry(old);
 
-	WARN_ON_ONCE(exceptional && !node && slot != (void **)&root->rnode);
+	WARN_ON_ONCE(warn_typeswitch && exceptional);
 
 	if (node)
 		node->exceptional += exceptional;
@@ -784,6 +775,50 @@ void __radix_tree_replace(struct radix_tree_root *root,
 	rcu_assign_pointer(*slot, item);
 }
 
+/**
+ * __radix_tree_replace		- replace item in a slot
+ * @root:	radix tree root
+ * @node:	pointer to tree node
+ * @slot:	pointer to slot in @node
+ * @item:	new item to store in the slot.
+ *
+ * For use with __radix_tree_lookup().  Caller must hold tree write locked
+ * across slot lookup and replacement.
+ */
+void __radix_tree_replace(struct radix_tree_root *root,
+			  struct radix_tree_node *node,
+			  void **slot, void *item)
+{
+	/*
+	 * This function supports replacing exceptional entries, but
+	 * that needs accounting against the node unless the slot is
+	 * root->rnode.
+	 */
+	replace_slot(root, node, slot, item,
+		     !node && slot != (void **)&root->rnode);
+}
+
+/**
+ * radix_tree_replace_slot	- replace item in a slot
+ * @root:	radix tree root
+ * @slot:	pointer to slot
+ * @item:	new item to store in the slot.
+ *
+ * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
+ * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
+ * across slot lookup and replacement.
+ *
+ * NOTE: This cannot be used to switch between non-entries (empty slots),
+ * regular entries, and exceptional entries, as that requires accounting
+ * inside the radix tree node. When switching from one type of entry to
+ * another, use __radix_tree_lookup() and __radix_tree_replace().
+ */
+void radix_tree_replace_slot(struct radix_tree_root *root,
+			     void **slot, void *item)
+{
+	replace_slot(root, NULL, slot, item, true);
+}
+
 /**
  *	radix_tree_tag_set - set a tag on a radix tree node
  *	@root:		radix tree root
diff --git a/mm/filemap.c b/mm/filemap.c
index caa779f8797f..1ba726aef708 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -147,7 +147,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 						      false);
 		}
 	}
-	radix_tree_replace_slot(slot, page);
+	radix_tree_replace_slot(&mapping->page_tree, slot, page);
 	mapping->nrpages++;
 	if (node) {
 		workingset_node_pages_inc(node);
@@ -196,7 +196,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
 			shadow = NULL;
 		}
 
-		radix_tree_replace_slot(slot, shadow);
+		radix_tree_replace_slot(&mapping->page_tree, slot, shadow);
 
 		if (!node)
 			break;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 5d7c006373d3..7a50c726c5ae 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1426,7 +1426,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		list_add_tail(&page->lru, &pagelist);
 
 		/* Finally, replace with the new page. */
-		radix_tree_replace_slot(slot,
+		radix_tree_replace_slot(&mapping->page_tree, slot,
 				new_page + (index % HPAGE_PMD_NR));
 
 		slot = radix_tree_iter_next(&iter);
@@ -1538,7 +1538,8 @@ tree_unlocked:
 			/* Unfreeze the page. */
 			list_del(&page->lru);
 			page_ref_unfreeze(page, 2);
-			radix_tree_replace_slot(slot, page);
+			radix_tree_replace_slot(&mapping->page_tree,
+						slot, page);
 			spin_unlock_irq(&mapping->tree_lock);
 			putback_lru_page(page);
 			unlock_page(page);
diff --git a/mm/migrate.c b/mm/migrate.c
index 66ce6b490b13..0ed24b1fa77b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -482,7 +482,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 		SetPageDirty(newpage);
 	}
 
-	radix_tree_replace_slot(pslot, newpage);
+	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
 
 	/*
 	 * Drop cache reference from old page by unfreezing
@@ -556,7 +556,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 	get_page(newpage);
 
-	radix_tree_replace_slot(pslot, newpage);
+	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
 
 	page_ref_unfreeze(page, expected_count - 1);
 
diff --git a/mm/truncate.c b/mm/truncate.c
index 8d8c62d89e6d..3c631c357873 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -49,7 +49,7 @@ static void clear_exceptional_entry(struct address_space *mapping,
 		goto unlock;
 	if (*slot != entry)
 		goto unlock;
-	radix_tree_replace_slot(slot, NULL);
+	radix_tree_replace_slot(&mapping->page_tree, slot, NULL);
 	mapping->nrexceptional--;
 	if (!node)
 		goto unlock;
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 05d7bc488971..d1be94667a30 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -146,7 +146,7 @@ static void multiorder_check(unsigned long index, int order)
 
 	slot = radix_tree_lookup_slot(&tree, index);
 	free(*slot);
-	radix_tree_replace_slot(slot, item2);
+	radix_tree_replace_slot(&tree, slot, item2);
 	for (i = min; i < max; i++) {
 		struct item *item = item_lookup(&tree, i);
 		assert(item != 0);
-- 
cgit v1.2.3


From 4d693d08607ab319095ec8942909df4b4aebdf66 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 12 Dec 2016 16:43:49 -0800
Subject: lib: radix-tree: update callback for changing leaf nodes

Support handing __radix_tree_replace() a callback that gets invoked for
all leaf nodes that change or get freed as a result of the slot
replacement, to assist users tracking nodes with node->private_list.

This prepares for putting page cache shadow entries into the radix tree
root again and drastically simplifying the shadow tracking.

Link: http://lkml.kernel.org/r/20161117193134.GD23430@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c                   |  3 ++-
 include/linux/radix-tree.h |  4 +++-
 lib/radix-tree.c           | 42 +++++++++++++++++++++++++++++-------------
 mm/shmem.c                 |  3 ++-
 4 files changed, 36 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 85930c2a2749..6916ed37d463 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -649,7 +649,8 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 
 		ret = __radix_tree_lookup(page_tree, index, &node, &slot);
 		WARN_ON_ONCE(ret != entry);
-		__radix_tree_replace(page_tree, node, slot, new_entry);
+		__radix_tree_replace(page_tree, node, slot,
+				     new_entry, NULL, NULL);
 	}
 	if (vmf->flags & FAULT_FLAG_WRITE)
 		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 2d1b9b8be983..15c972ea9510 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -263,9 +263,11 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
 			  struct radix_tree_node **nodep, void ***slotp);
 void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
 void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *);
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
-			  void **slot, void *item);
+			  void **slot, void *item,
+			  radix_tree_update_node_t update_node, void *private);
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void **slot, void *item);
 bool __radix_tree_delete_node(struct radix_tree_root *root,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 5d8930f3b3d8..df4ff18dd63c 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -325,7 +325,6 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
 		tag_clear(node, i, 0);
 
 	node->slots[0] = NULL;
-	node->count = 0;
 
 	kmem_cache_free(radix_tree_node_cachep, node);
 }
@@ -542,7 +541,9 @@ out:
  *	radix_tree_shrink    -    shrink radix tree to minimum height
  *	@root		radix tree root
  */
-static inline bool radix_tree_shrink(struct radix_tree_root *root)
+static inline bool radix_tree_shrink(struct radix_tree_root *root,
+				     radix_tree_update_node_t update_node,
+				     void *private)
 {
 	bool shrunk = false;
 
@@ -597,8 +598,12 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
 		 * also results in a stale slot). So tag the slot as indirect
 		 * to force callers to retry.
 		 */
-		if (!radix_tree_is_internal_node(child))
+		node->count = 0;
+		if (!radix_tree_is_internal_node(child)) {
 			node->slots[0] = RADIX_TREE_RETRY;
+			if (update_node)
+				update_node(node, private);
+		}
 
 		radix_tree_node_free(node);
 		shrunk = true;
@@ -608,7 +613,8 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
 }
 
 static bool delete_node(struct radix_tree_root *root,
-			struct radix_tree_node *node)
+			struct radix_tree_node *node,
+			radix_tree_update_node_t update_node, void *private)
 {
 	bool deleted = false;
 
@@ -617,7 +623,8 @@ static bool delete_node(struct radix_tree_root *root,
 
 		if (node->count) {
 			if (node == entry_to_node(root->rnode))
-				deleted |= radix_tree_shrink(root);
+				deleted |= radix_tree_shrink(root, update_node,
+							     private);
 			return deleted;
 		}
 
@@ -880,17 +887,20 @@ static void replace_slot(struct radix_tree_root *root,
 
 /**
  * __radix_tree_replace		- replace item in a slot
- * @root:	radix tree root
- * @node:	pointer to tree node
- * @slot:	pointer to slot in @node
- * @item:	new item to store in the slot.
+ * @root:		radix tree root
+ * @node:		pointer to tree node
+ * @slot:		pointer to slot in @node
+ * @item:		new item to store in the slot.
+ * @update_node:	callback for changing leaf nodes
+ * @private:		private data to pass to @update_node
  *
  * For use with __radix_tree_lookup().  Caller must hold tree write locked
  * across slot lookup and replacement.
  */
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
-			  void **slot, void *item)
+			  void **slot, void *item,
+			  radix_tree_update_node_t update_node, void *private)
 {
 	/*
 	 * This function supports replacing exceptional entries and
@@ -900,7 +910,13 @@ void __radix_tree_replace(struct radix_tree_root *root,
 	replace_slot(root, node, slot, item,
 		     !node && slot != (void **)&root->rnode);
 
-	delete_node(root, node);
+	if (!node)
+		return;
+
+	if (update_node)
+		update_node(node, private);
+
+	delete_node(root, node, update_node, private);
 }
 
 /**
@@ -1585,7 +1601,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
 bool __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node)
 {
-	return delete_node(root, node);
+	return delete_node(root, node, NULL, NULL);
 }
 
 static inline void delete_sibling_entries(struct radix_tree_node *node,
@@ -1642,7 +1658,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 		node_tag_clear(root, node, tag, offset);
 
 	delete_sibling_entries(node, node_to_entry(slot), offset);
-	__radix_tree_replace(root, node, slot, NULL);
+	__radix_tree_replace(root, node, slot, NULL, NULL, NULL);
 
 	return entry;
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 3149ddee8f55..abd7403aba41 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -311,7 +311,8 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
 		return -ENOENT;
 	if (item != expected)
 		return -ENOENT;
-	__radix_tree_replace(&mapping->page_tree, node, pslot, replacement);
+	__radix_tree_replace(&mapping->page_tree, node, pslot,
+			     replacement, NULL, NULL);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 14b468791fa955d442f962fdf5207dfd39a131c8 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 12 Dec 2016 16:43:52 -0800
Subject: mm: workingset: move shadow entry tracking to radix tree exceptional
 tracking

Currently, we track the shadow entries in the page cache in the upper
bits of the radix_tree_node->count, behind the back of the radix tree
implementation.  Because the radix tree code has no awareness of them,
we rely on random subtleties throughout the implementation (such as the
node->count != 1 check in the shrinking code, which is meant to exclude
multi-entry nodes but also happens to skip nodes with only one shadow
entry, as that's accounted in the upper bits).  This is error prone and
has, in fact, caused the bug fixed in d3798ae8c6f3 ("mm: filemap: don't
plant shadow entries without radix tree node").

To remove these subtleties, this patch moves shadow entry tracking from
the upper bits of node->count to the existing counter for exceptional
entries.  node->count goes back to being a simple counter of valid
entries in the tree node and can be shrunk to a single byte.

This vastly simplifies the page cache code.  All accounting happens
natively inside the radix tree implementation, and maintaining the LRU
linkage of shadow nodes is consolidated into a single function in the
workingset code that is called for leaf nodes affected by a change in
the page cache tree.

This also removes the last user of the __radix_delete_node() return
value.  Eliminate it.

Link: http://lkml.kernel.org/r/20161117193211.GE23430@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h |  8 ++-----
 include/linux/swap.h       | 34 +---------------------------
 lib/radix-tree.c           | 25 +++++----------------
 mm/filemap.c               | 54 +++++---------------------------------------
 mm/truncate.c              | 21 +++--------------
 mm/workingset.c            | 56 +++++++++++++++++++++++++++++++++++-----------
 6 files changed, 60 insertions(+), 138 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 15c972ea9510..744486057e9e 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -80,14 +80,10 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
 					  RADIX_TREE_MAP_SHIFT))
 
-/* Internally used bits of node->count */
-#define RADIX_TREE_COUNT_SHIFT	(RADIX_TREE_MAP_SHIFT + 1)
-#define RADIX_TREE_COUNT_MASK	((1UL << RADIX_TREE_COUNT_SHIFT) - 1)
-
 struct radix_tree_node {
 	unsigned char	shift;		/* Bits remaining in each slot */
 	unsigned char	offset;		/* Slot offset in parent */
-	unsigned int	count;		/* Total entry count */
+	unsigned char	count;		/* Total entry count */
 	unsigned char	exceptional;	/* Exceptional entry count */
 	union {
 		struct {
@@ -270,7 +266,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 			  radix_tree_update_node_t update_node, void *private);
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void **slot, void *item);
-bool __radix_tree_delete_node(struct radix_tree_root *root,
+void __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a56523cefb9b..09b212d37f1d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -246,39 +246,7 @@ struct swap_info_struct {
 void *workingset_eviction(struct address_space *mapping, struct page *page);
 bool workingset_refault(void *shadow);
 void workingset_activation(struct page *page);
-extern struct list_lru workingset_shadow_nodes;
-
-static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
-{
-	return node->count & RADIX_TREE_COUNT_MASK;
-}
-
-static inline void workingset_node_pages_inc(struct radix_tree_node *node)
-{
-	node->count++;
-}
-
-static inline void workingset_node_pages_dec(struct radix_tree_node *node)
-{
-	VM_WARN_ON_ONCE(!workingset_node_pages(node));
-	node->count--;
-}
-
-static inline unsigned int workingset_node_shadows(struct radix_tree_node *node)
-{
-	return node->count >> RADIX_TREE_COUNT_SHIFT;
-}
-
-static inline void workingset_node_shadows_inc(struct radix_tree_node *node)
-{
-	node->count += 1U << RADIX_TREE_COUNT_SHIFT;
-}
-
-static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
-{
-	VM_WARN_ON_ONCE(!workingset_node_shadows(node));
-	node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
-}
+void workingset_update_node(struct radix_tree_node *node, void *private);
 
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index df4ff18dd63c..9dbfaac05e6c 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -541,12 +541,10 @@ out:
  *	radix_tree_shrink    -    shrink radix tree to minimum height
  *	@root		radix tree root
  */
-static inline bool radix_tree_shrink(struct radix_tree_root *root,
+static inline void radix_tree_shrink(struct radix_tree_root *root,
 				     radix_tree_update_node_t update_node,
 				     void *private)
 {
-	bool shrunk = false;
-
 	for (;;) {
 		struct radix_tree_node *node = root->rnode;
 		struct radix_tree_node *child;
@@ -606,26 +604,20 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		}
 
 		radix_tree_node_free(node);
-		shrunk = true;
 	}
-
-	return shrunk;
 }
 
-static bool delete_node(struct radix_tree_root *root,
+static void delete_node(struct radix_tree_root *root,
 			struct radix_tree_node *node,
 			radix_tree_update_node_t update_node, void *private)
 {
-	bool deleted = false;
-
 	do {
 		struct radix_tree_node *parent;
 
 		if (node->count) {
 			if (node == entry_to_node(root->rnode))
-				deleted |= radix_tree_shrink(root, update_node,
-							     private);
-			return deleted;
+				radix_tree_shrink(root, update_node, private);
+			return;
 		}
 
 		parent = node->parent;
@@ -638,12 +630,9 @@ static bool delete_node(struct radix_tree_root *root,
 		}
 
 		radix_tree_node_free(node);
-		deleted = true;
 
 		node = parent;
 	} while (node);
-
-	return deleted;
 }
 
 /**
@@ -1595,13 +1584,11 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
  *	After clearing the slot at @index in @node from radix tree
  *	rooted at @root, call this function to attempt freeing the
  *	node and shrinking the tree.
- *
- *	Returns %true if @node was freed, %false otherwise.
  */
-bool __radix_tree_delete_node(struct radix_tree_root *root,
+void __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node)
 {
-	return delete_node(root, node, NULL, NULL);
+	delete_node(root, node, NULL, NULL);
 }
 
 static inline void delete_sibling_entries(struct radix_tree_node *node,
diff --git a/mm/filemap.c b/mm/filemap.c
index 1ba726aef708..dc3e5fce0b7b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -132,37 +132,19 @@ static int page_cache_tree_insert(struct address_space *mapping,
 		if (!dax_mapping(mapping)) {
 			if (shadowp)
 				*shadowp = p;
-			if (node)
-				workingset_node_shadows_dec(node);
 		} else {
 			/* DAX can replace empty locked entry with a hole */
 			WARN_ON_ONCE(p !=
 				(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
 					 RADIX_DAX_ENTRY_LOCK));
-			/* DAX accounts exceptional entries as normal pages */
-			if (node)
-				workingset_node_pages_dec(node);
 			/* Wakeup waiters for exceptional entry lock */
 			dax_wake_mapping_entry_waiter(mapping, page->index,
 						      false);
 		}
 	}
-	radix_tree_replace_slot(&mapping->page_tree, slot, page);
+	__radix_tree_replace(&mapping->page_tree, node, slot, page,
+			     workingset_update_node, mapping);
 	mapping->nrpages++;
-	if (node) {
-		workingset_node_pages_inc(node);
-		/*
-		 * Don't track node that contains actual pages.
-		 *
-		 * Avoid acquiring the list_lru lock if already
-		 * untracked.  The list_empty() test is safe as
-		 * node->private_list is protected by
-		 * mapping->tree_lock.
-		 */
-		if (!list_empty(&node->private_list))
-			list_lru_del(&workingset_shadow_nodes,
-				     &node->private_list);
-	}
 	return 0;
 }
 
@@ -185,8 +167,6 @@ static void page_cache_tree_delete(struct address_space *mapping,
 		__radix_tree_lookup(&mapping->page_tree, page->index + i,
 				    &node, &slot);
 
-		radix_tree_clear_tags(&mapping->page_tree, node, slot);
-
 		if (!node) {
 			VM_BUG_ON_PAGE(nr != 1, page);
 			/*
@@ -196,33 +176,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
 			shadow = NULL;
 		}
 
-		radix_tree_replace_slot(&mapping->page_tree, slot, shadow);
-
-		if (!node)
-			break;
-
-		workingset_node_pages_dec(node);
-		if (shadow)
-			workingset_node_shadows_inc(node);
-		else
-			if (__radix_tree_delete_node(&mapping->page_tree, node))
-				continue;
-
-		/*
-		 * Track node that only contains shadow entries. DAX mappings
-		 * contain no shadow entries and may contain other exceptional
-		 * entries so skip those.
-		 *
-		 * Avoid acquiring the list_lru lock if already tracked.
-		 * The list_empty() test is safe as node->private_list is
-		 * protected by mapping->tree_lock.
-		 */
-		if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
-				list_empty(&node->private_list)) {
-			node->private_data = mapping;
-			list_lru_add(&workingset_shadow_nodes,
-					&node->private_list);
-		}
+		radix_tree_clear_tags(&mapping->page_tree, node, slot);
+		__radix_tree_replace(&mapping->page_tree, node, slot, shadow,
+				     workingset_update_node, mapping);
 	}
 
 	if (shadow) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 3c631c357873..fd97f1dbce29 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -44,28 +44,13 @@ static void clear_exceptional_entry(struct address_space *mapping,
 	 * without the tree itself locked.  These unlocked entries
 	 * need verification under the tree lock.
 	 */
-	if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
-				&slot))
+	if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
 		goto unlock;
 	if (*slot != entry)
 		goto unlock;
-	radix_tree_replace_slot(&mapping->page_tree, slot, NULL);
+	__radix_tree_replace(&mapping->page_tree, node, slot, NULL,
+			     workingset_update_node, mapping);
 	mapping->nrexceptional--;
-	if (!node)
-		goto unlock;
-	workingset_node_shadows_dec(node);
-	/*
-	 * Don't track node without shadow entries.
-	 *
-	 * Avoid acquiring the list_lru lock if already untracked.
-	 * The list_empty() test is safe as node->private_list is
-	 * protected by mapping->tree_lock.
-	 */
-	if (!workingset_node_shadows(node) &&
-	    !list_empty(&node->private_list))
-		list_lru_del(&workingset_shadow_nodes,
-				&node->private_list);
-	__radix_tree_delete_node(&mapping->page_tree, node);
 unlock:
 	spin_unlock_irq(&mapping->tree_lock);
 }
diff --git a/mm/workingset.c b/mm/workingset.c
index 98f830897b1b..ef556bf1323d 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -10,6 +10,7 @@
 #include <linux/atomic.h>
 #include <linux/module.h>
 #include <linux/swap.h>
+#include <linux/dax.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 
@@ -334,18 +335,45 @@ out:
  * point where they would still be useful.
  */
 
-struct list_lru workingset_shadow_nodes;
+static struct list_lru shadow_nodes;
+
+void workingset_update_node(struct radix_tree_node *node, void *private)
+{
+	struct address_space *mapping = private;
+
+	/* Only regular page cache has shadow entries */
+	if (dax_mapping(mapping) || shmem_mapping(mapping))
+		return;
+
+	/*
+	 * Track non-empty nodes that contain only shadow entries;
+	 * unlink those that contain pages or are being freed.
+	 *
+	 * Avoid acquiring the list_lru lock when the nodes are
+	 * already where they should be. The list_empty() test is safe
+	 * as node->private_list is protected by &mapping->tree_lock.
+	 */
+	if (node->count && node->count == node->exceptional) {
+		if (list_empty(&node->private_list)) {
+			node->private_data = mapping;
+			list_lru_add(&shadow_nodes, &node->private_list);
+		}
+	} else {
+		if (!list_empty(&node->private_list))
+			list_lru_del(&shadow_nodes, &node->private_list);
+	}
+}
 
 static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 					struct shrink_control *sc)
 {
-	unsigned long shadow_nodes;
 	unsigned long max_nodes;
+	unsigned long nodes;
 	unsigned long pages;
 
 	/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
 	local_irq_disable();
-	shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
+	nodes = list_lru_shrink_count(&shadow_nodes, sc);
 	local_irq_enable();
 
 	if (sc->memcg) {
@@ -372,10 +400,10 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	 */
 	max_nodes = pages >> (1 + RADIX_TREE_MAP_SHIFT - 3);
 
-	if (shadow_nodes <= max_nodes)
+	if (nodes <= max_nodes)
 		return 0;
 
-	return shadow_nodes - max_nodes;
+	return nodes - max_nodes;
 }
 
 static enum lru_status shadow_lru_isolate(struct list_head *item,
@@ -418,22 +446,25 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * no pages, so we expect to be able to remove them all and
 	 * delete and free the empty node afterwards.
 	 */
-	if (WARN_ON_ONCE(!workingset_node_shadows(node)))
+	if (WARN_ON_ONCE(!node->exceptional))
 		goto out_invalid;
-	if (WARN_ON_ONCE(workingset_node_pages(node)))
+	if (WARN_ON_ONCE(node->count != node->exceptional))
 		goto out_invalid;
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		if (node->slots[i]) {
 			if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i])))
 				goto out_invalid;
+			if (WARN_ON_ONCE(!node->exceptional))
+				goto out_invalid;
 			if (WARN_ON_ONCE(!mapping->nrexceptional))
 				goto out_invalid;
 			node->slots[i] = NULL;
-			workingset_node_shadows_dec(node);
+			node->exceptional--;
+			node->count--;
 			mapping->nrexceptional--;
 		}
 	}
-	if (WARN_ON_ONCE(workingset_node_shadows(node)))
+	if (WARN_ON_ONCE(node->exceptional))
 		goto out_invalid;
 	inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
 	__radix_tree_delete_node(&mapping->page_tree, node);
@@ -456,8 +487,7 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
 
 	/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
 	local_irq_disable();
-	ret =  list_lru_shrink_walk(&workingset_shadow_nodes, sc,
-				    shadow_lru_isolate, NULL);
+	ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
 	local_irq_enable();
 	return ret;
 }
@@ -496,7 +526,7 @@ static int __init workingset_init(void)
 	pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
 	       timestamp_bits, max_order, bucket_order);
 
-	ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
+	ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key);
 	if (ret)
 		goto err;
 	ret = register_shrinker(&workingset_shadow_shrinker);
@@ -504,7 +534,7 @@ static int __init workingset_init(void)
 		goto err_list_lru;
 	return 0;
 err_list_lru:
-	list_lru_destroy(&workingset_shadow_nodes);
+	list_lru_destroy(&shadow_nodes);
 err:
 	return ret;
 }
-- 
cgit v1.2.3


From bf22e37a641327e34681b7b6959d9646e3886770 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 12 Dec 2016 16:44:10 -0800
Subject: mm: add vfree_atomic()

We are going to use sleeping lock for freeing vmap.  However some
vfree() users want to free memory from atomic (but not from interrupt)
context.  For this we add vfree_atomic() - deferred variation of vfree()
which can be used in any atomic context (except NMIs).

[akpm@linux-foundation.org: tweak comment grammar]
[aryabinin@virtuozzo.com: use raw_cpu_ptr() instead of this_cpu_ptr()]
  Link: http://lkml.kernel.org/r/1481553981-3856-1-git-send-email-aryabinin@virtuozzo.com
Link: http://lkml.kernel.org/r/1479474236-4139-5-git-send-email-hch@lst.de
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Jisheng Zhang <jszhang@marvell.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: John Dias <joaodias@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h |  1 +
 mm/vmalloc.c            | 42 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 37 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 3d9d786a943c..d68edffbf142 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -82,6 +82,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
 			const void *caller);
 
 extern void vfree(const void *addr);
+extern void vfree_atomic(const void *addr);
 
 extern void *vmap(struct page **pages, unsigned int count,
 			unsigned long flags, pgprot_t prot);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1f5501b43026..4ac776f10ad1 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1486,7 +1486,39 @@ static void __vunmap(const void *addr, int deallocate_pages)
 	kfree(area);
 	return;
 }
- 
+
+static inline void __vfree_deferred(const void *addr)
+{
+	/*
+	 * Use raw_cpu_ptr() because this can be called from preemptible
+	 * context. Preemption is absolutely fine here, because the llist_add()
+	 * implementation is lockless, so it works even if we are adding to
+	 * nother cpu's list.  schedule_work() should be fine with this too.
+	 */
+	struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
+
+	if (llist_add((struct llist_node *)addr, &p->list))
+		schedule_work(&p->wq);
+}
+
+/**
+ *	vfree_atomic  -  release memory allocated by vmalloc()
+ *	@addr:		memory base address
+ *
+ *	This one is just like vfree() but can be called in any atomic context
+ *	except NMIs.
+ */
+void vfree_atomic(const void *addr)
+{
+	BUG_ON(in_nmi());
+
+	kmemleak_free(addr);
+
+	if (!addr)
+		return;
+	__vfree_deferred(addr);
+}
+
 /**
  *	vfree  -  release memory allocated by vmalloc()
  *	@addr:		memory base address
@@ -1509,11 +1541,9 @@ void vfree(const void *addr)
 
 	if (!addr)
 		return;
-	if (unlikely(in_interrupt())) {
-		struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
-		if (llist_add((struct llist_node *)addr, &p->list))
-			schedule_work(&p->wq);
-	} else
+	if (unlikely(in_interrupt()))
+		__vfree_deferred(addr);
+	else
 		__vunmap(addr, 1);
 }
 EXPORT_SYMBOL(vfree);
-- 
cgit v1.2.3


From d5a187daf5856df9b997f9d208e5a7b64006eb2e Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 12 Dec 2016 16:44:38 -0800
Subject: mm, rmap: handle anon_vma_prepare() common case inline

anon_vma_prepare() is mostly a large "if (unlikely(...))" block, as the
expected common case is that an anon_vma already exists.  We could turn
the condition around and return 0, but it also makes sense to do it
inline and avoid a call for the common case.

Bloat-o-meter naturally shows that inlining the check has some code size
costs:

add/remove: 1/1 grow/shrink: 4/0 up/down: 475/-373 (102)
function                                     old     new   delta
__anon_vma_prepare                             -     359    +359
handle_mm_fault                             2744    2796     +52
hugetlb_cow                                 1146    1170     +24
hugetlb_fault                               2123    2145     +22
wp_page_copy                                1469    1487     +18
anon_vma_prepare                             373       -    -373

Checking the asm however confirms that the hot paths now avoid a call,
which is moved away.

[akpm@linux-foundation.org: coding-style fixes]
Link: http://lkml.kernel.org/r/20161116074005.22768-1-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 10 +++++++-
 mm/rmap.c            | 69 ++++++++++++++++++++++++++--------------------------
 2 files changed, 43 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b46bb5620a76..15321fb1df6b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -137,11 +137,19 @@ static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
  * anon_vma helper functions.
  */
 void anon_vma_init(void);	/* create anon_vma_cachep */
-int  anon_vma_prepare(struct vm_area_struct *);
+int  __anon_vma_prepare(struct vm_area_struct *);
 void unlink_anon_vmas(struct vm_area_struct *);
 int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
 int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
 
+static inline int anon_vma_prepare(struct vm_area_struct *vma)
+{
+	if (likely(vma->anon_vma))
+		return 0;
+
+	return __anon_vma_prepare(vma);
+}
+
 static inline void anon_vma_merge(struct vm_area_struct *vma,
 				  struct vm_area_struct *next)
 {
diff --git a/mm/rmap.c b/mm/rmap.c
index 1ef36404e7b2..91619fd70939 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -141,14 +141,15 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
 }
 
 /**
- * anon_vma_prepare - attach an anon_vma to a memory region
+ * __anon_vma_prepare - attach an anon_vma to a memory region
  * @vma: the memory region in question
  *
  * This makes sure the memory mapping described by 'vma' has
  * an 'anon_vma' attached to it, so that we can associate the
  * anonymous pages mapped into it with that anon_vma.
  *
- * The common case will be that we already have one, but if
+ * The common case will be that we already have one, which
+ * is handled inline by anon_vma_prepare(). But if
  * not we either need to find an adjacent mapping that we
  * can re-use the anon_vma from (very common when the only
  * reason for splitting a vma has been mprotect()), or we
@@ -167,48 +168,46 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
  *
  * This must be called with the mmap_sem held for reading.
  */
-int anon_vma_prepare(struct vm_area_struct *vma)
+int __anon_vma_prepare(struct vm_area_struct *vma)
 {
-	struct anon_vma *anon_vma = vma->anon_vma;
+	struct mm_struct *mm = vma->vm_mm;
+	struct anon_vma *anon_vma, *allocated;
 	struct anon_vma_chain *avc;
 
 	might_sleep();
-	if (unlikely(!anon_vma)) {
-		struct mm_struct *mm = vma->vm_mm;
-		struct anon_vma *allocated;
 
-		avc = anon_vma_chain_alloc(GFP_KERNEL);
-		if (!avc)
-			goto out_enomem;
+	avc = anon_vma_chain_alloc(GFP_KERNEL);
+	if (!avc)
+		goto out_enomem;
+
+	anon_vma = find_mergeable_anon_vma(vma);
+	allocated = NULL;
+	if (!anon_vma) {
+		anon_vma = anon_vma_alloc();
+		if (unlikely(!anon_vma))
+			goto out_enomem_free_avc;
+		allocated = anon_vma;
+	}
 
-		anon_vma = find_mergeable_anon_vma(vma);
+	anon_vma_lock_write(anon_vma);
+	/* page_table_lock to protect against threads */
+	spin_lock(&mm->page_table_lock);
+	if (likely(!vma->anon_vma)) {
+		vma->anon_vma = anon_vma;
+		anon_vma_chain_link(vma, avc, anon_vma);
+		/* vma reference or self-parent link for new root */
+		anon_vma->degree++;
 		allocated = NULL;
-		if (!anon_vma) {
-			anon_vma = anon_vma_alloc();
-			if (unlikely(!anon_vma))
-				goto out_enomem_free_avc;
-			allocated = anon_vma;
-		}
+		avc = NULL;
+	}
+	spin_unlock(&mm->page_table_lock);
+	anon_vma_unlock_write(anon_vma);
 
-		anon_vma_lock_write(anon_vma);
-		/* page_table_lock to protect against threads */
-		spin_lock(&mm->page_table_lock);
-		if (likely(!vma->anon_vma)) {
-			vma->anon_vma = anon_vma;
-			anon_vma_chain_link(vma, avc, anon_vma);
-			/* vma reference or self-parent link for new root */
-			anon_vma->degree++;
-			allocated = NULL;
-			avc = NULL;
-		}
-		spin_unlock(&mm->page_table_lock);
-		anon_vma_unlock_write(anon_vma);
+	if (unlikely(allocated))
+		put_anon_vma(allocated);
+	if (unlikely(avc))
+		anon_vma_chain_free(avc);
 
-		if (unlikely(allocated))
-			put_anon_vma(allocated);
-		if (unlikely(avc))
-			anon_vma_chain_free(avc);
-	}
 	return 0;
 
  out_enomem_free_avc:
-- 
cgit v1.2.3


From 8e8780a547d987b6465c9458402177fe706c5624 Mon Sep 17 00:00:00 2001
From: Benjamin Peterson <bp@benjamin.pe>
Date: Mon, 12 Dec 2016 16:45:38 -0800
Subject: compiler-gcc.h: use "proved" instead of "proofed"

Link: http://lkml.kernel.org/r/1477894241.1103202.772260161.1B0A5995@webmail.messagingengine.com
Signed-off-by: Benjamin Peterson <bp@benjamin.pe>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 928e5ca0caee..0444b1336268 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -21,7 +21,7 @@
  * clobbered. The issue is as follows: while the inline asm might
  * access any memory it wants, the compiler could have fit all of
  * @ptr into memory registers instead, and since @ptr never escaped
- * from that, it proofed that the inline asm wasn't touching any of
+ * from that, it proved that the inline asm wasn't touching any of
  * it. This version works well with both compilers, i.e. we're telling
  * the compiler that the inline asm absolutely may see the contents
  * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
-- 
cgit v1.2.3


From 497957576cf8a2150d723aedd74ea60b5d498bfe Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Mon, 12 Dec 2016 16:45:47 -0800
Subject: printk/kdb: handle more message headers

Commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing
continuation lines") allows to define more message headers for a single
message.  The motivation is that continuous lines might get mixed.
Therefore it make sense to define the right log level for every piece of
a cont line.

This patch introduces printk_skip_headers() that will skip all headers
and uses it in the kdb code instead of printk_skip_level().

This approach helps to fix other printk_skip_level() users
independently.

Link: http://lkml.kernel.org/r/1478695291-12169-3-git-send-email-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Cc: Joe Perches <joe@perches.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <jbacik@fb.com>
Cc: David Sterba <dsterba@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h    | 8 ++++++++
 kernel/debug/kdb/kdb_io.c | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index eac1af8502bb..a0859e169bc3 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -31,6 +31,14 @@ static inline const char *printk_skip_level(const char *buffer)
 	return buffer;
 }
 
+static inline const char *printk_skip_headers(const char *buffer)
+{
+	while (printk_get_level(buffer))
+		buffer = printk_skip_level(buffer);
+
+	return buffer;
+}
+
 #define CONSOLE_EXT_LOG_MAX	8192
 
 /* printk's without a loglevel use this.. */
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index fc1ef736253c..98c9011eac78 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -697,7 +697,7 @@ kdb_printit:
 	 * Write to all consoles.
 	 */
 	retlen = strlen(kdb_buffer);
-	cp = (char *) printk_skip_level(kdb_buffer);
+	cp = (char *) printk_skip_headers(kdb_buffer);
 	if (!dbg_kdb_mode && kgdb_connected) {
 		gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
 	} else {
-- 
cgit v1.2.3


From 262c5e86fec7cfd59754732001a9ff5b13eba501 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Mon, 12 Dec 2016 16:45:50 -0800
Subject: printk/btrfs: handle more message headers

Commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing
continuation lines") allows to define more message headers for a single
message.  The motivation is that continuous lines might get mixed.
Therefore it make sense to define the right log level for every piece of
a cont line.

The current btrfs_printk() macros do not support continuous lines at the
moment.  But better be prepared for a custom messages and avoid
potential "lvl" buffer overflow.

This patch iterates over the entire message header.  It is interested
only into the message level like the original code.

This patch also introduces PRINTK_MAX_SINGLE_HEADER_LEN.  Three bytes
are enough for the message level header at the moment.  But it used to
be three, see the commit 04d2c8c83d0e ("printk: convert the format for
KERN_<LEVEL> to a 2 byte pattern").

Also I fixed the default ratelimit level.  It looked very strange when it
was different from the default log level.

[pmladek@suse.com: Fix a check of the valid message level]
  Link: http://lkml.kernel.org/r/20161111183236.GD2145@dhcp128.suse.cz
Link: http://lkml.kernel.org/r/1478695291-12169-4-git-send-email-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: David Sterba <dsterba@suse.com>
Cc: Joe Perches <joe@perches.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <jbacik@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/super.c       | 26 +++++++++++++++-----------
 include/linux/printk.h |  2 ++
 2 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 74ed5aae6cea..180f910339f4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -202,27 +202,31 @@ static struct ratelimit_state printk_limits[] = {
 void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 {
 	struct super_block *sb = fs_info->sb;
-	char lvl[4];
+	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1];
 	struct va_format vaf;
 	va_list args;
-	const char *type = logtypes[4];
+	const char *type = NULL;
 	int kern_level;
 	struct ratelimit_state *ratelimit;
 
 	va_start(args, fmt);
 
-	kern_level = printk_get_level(fmt);
-	if (kern_level) {
+	while ((kern_level = printk_get_level(fmt)) != 0) {
 		size_t size = printk_skip_level(fmt) - fmt;
-		memcpy(lvl, fmt,  size);
-		lvl[size] = '\0';
+
+		if (kern_level >= '0' && kern_level <= '7') {
+			memcpy(lvl, fmt,  size);
+			lvl[size] = '\0';
+			type = logtypes[kern_level - '0'];
+			ratelimit = &printk_limits[kern_level - '0'];
+		}
 		fmt += size;
-		type = logtypes[kern_level - '0'];
-		ratelimit = &printk_limits[kern_level - '0'];
-	} else {
+	}
+
+	if (!type) {
 		*lvl = '\0';
-		/* Default to debug output */
-		ratelimit = &printk_limits[7];
+		type = logtypes[4];
+		ratelimit = &printk_limits[4];
 	}
 
 	vaf.fmt = fmt;
diff --git a/include/linux/printk.h b/include/linux/printk.h
index a0859e169bc3..afe8ccec1672 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -10,6 +10,8 @@
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
+#define PRINTK_MAX_SINGLE_HEADER_LEN 2
+
 static inline int printk_get_level(const char *buffer)
 {
 	if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
-- 
cgit v1.2.3


From a8cfdc68f6cfc0c7ffc6d664406fe7f06f17eef4 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Mon, 12 Dec 2016 16:45:56 -0800
Subject: printk: add Kconfig option to set default console loglevel

Add a configuration option to set the default console loglevel.  This
is, as before, still possible to override at runtime through bootargs
(loglevel=<x>), sysrq and /proc/printk.

There are cases where adding additional arguments on the commandline is
impractical, and changing the default for the kernel when being built
makes more sense.  Provide such a method here, for those who choose to
do so.

Also, while touching this code, clarify the difference between
MESSAGE_LOGLEVEL_DEFAULT and CONSOLE_LOGLEVEL_DEFAULT.

Link: http://lkml.kernel.org/r/1479676829-30031-1-git-send-email-olof@lixom.net
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h |  7 ++++++-
 lib/Kconfig.debug      | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index afe8ccec1672..3472cc6b7a60 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -50,10 +50,15 @@ static inline const char *printk_skip_headers(const char *buffer)
 #define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
 #define CONSOLE_LOGLEVEL_MIN	 1 /* Minimum loglevel we let people use */
 #define CONSOLE_LOGLEVEL_QUIET	 4 /* Shhh ..., when booted with "quiet" */
-#define CONSOLE_LOGLEVEL_DEFAULT 7 /* anything MORE serious than KERN_DEBUG */
 #define CONSOLE_LOGLEVEL_DEBUG	10 /* issue debug messages */
 #define CONSOLE_LOGLEVEL_MOTORMOUTH 15	/* You can't shut this one up */
 
+/*
+ * Default used to be hard-coded at 7, we're now allowing it to be set from
+ * kernel config.
+ */
+#define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
+
 extern int console_printk[];
 
 #define console_loglevel (console_printk[0])
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9bb7d825ba14..65a619e0ad5d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,6 +15,21 @@ config PRINTK_TIME
 	  The behavior is also controlled by the kernel command line
 	  parameter printk.time=1. See Documentation/kernel-parameters.txt
 
+config CONSOLE_LOGLEVEL_DEFAULT
+	int "Default console loglevel (1-15)"
+	range 1 15
+	default "7"
+	help
+	  Default loglevel to determine what will be printed on the console.
+
+	  Setting a default here is equivalent to passing in loglevel=<x> in
+	  the kernel bootargs. loglevel=<x> continues to override whatever
+	  value is specified here as well.
+
+	  Note: This does not affect the log level of un-prefixed prink()
+	  usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT
+	  option.
+
 config MESSAGE_LOGLEVEL_DEFAULT
 	int "Default message log level (1-7)"
 	range 1 7
@@ -26,6 +41,10 @@ config MESSAGE_LOGLEVEL_DEFAULT
 	  that are auditing their logs closely may want to set it to a lower
 	  priority.
 
+	  Note: This does not affect what message level gets printed on the console
+	  by default. To change that, use loglevel=<x> in the kernel bootargs,
+	  or pick a different CONSOLE_LOGLEVEL_DEFAULT configuration value.
+
 config BOOT_PRINTK_DELAY
 	bool "Delay each boot printk message by N milliseconds"
 	depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
-- 
cgit v1.2.3


From c226dc22ec4904340e3e14a536983cda3dbe7914 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 31 Oct 2016 12:15:20 +0200
Subject: net/mlx5: Report multi packet WQE capabilities

Multi packet WQE enables sending multiple fix sized packets
using a single WQE. The exposed field reports such HW support.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2632cb2caf10..0779ad2e8f51 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -576,7 +576,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         self_lb_en_modifiable[0x1];
 	u8         reserved_at_9[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_at_10[0x2];
+	u8         multi_pkt_send_wqe[0x2];
 	u8	   wqe_inline_mode[0x2];
 	u8         rss_ind_tbl_cap[0x4];
 	u8         reg_umr_sq[0x1];
-- 
cgit v1.2.3


From 0184cfe72d2f139c4feed7f3820ba2269f5de322 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Fri, 21 Oct 2016 04:57:27 +0200
Subject: crypto: doc - fix source comments for Sphinx

Update comments to avoid any complaints from Sphinx during compilation.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/crypto/aead.h     | 14 +++++++-------
 include/crypto/hash.h     |  2 +-
 include/crypto/skcipher.h |  4 ++--
 include/linux/crypto.h    |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 12f84327ca36..e725155c6389 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -55,14 +55,14 @@
  * The scatter list pointing to the input data must contain:
  *
  * * for RFC4106 ciphers, the concatenation of
- * associated authentication data || IV || plaintext or ciphertext. Note, the
- * same IV (buffer) is also set with the aead_request_set_crypt call. Note,
- * the API call of aead_request_set_ad must provide the length of the AAD and
- * the IV. The API call of aead_request_set_crypt only points to the size of
- * the input plaintext or ciphertext.
+ *   associated authentication data || IV || plaintext or ciphertext. Note, the
+ *   same IV (buffer) is also set with the aead_request_set_crypt call. Note,
+ *   the API call of aead_request_set_ad must provide the length of the AAD and
+ *   the IV. The API call of aead_request_set_crypt only points to the size of
+ *   the input plaintext or ciphertext.
  *
  * * for "normal" AEAD ciphers, the concatenation of
- * associated authentication data || plaintext or ciphertext.
+ *   associated authentication data || plaintext or ciphertext.
  *
  * It is important to note that if multiple scatter gather list entries form
  * the input data mentioned above, the first entry must not point to a NULL
@@ -452,7 +452,7 @@ static inline void aead_request_free(struct aead_request *req)
  * completes
  *
  * The callback function is registered with the aead_request handle and
- * must comply with the following template
+ * must comply with the following template::
  *
  *	void callback_function(struct crypto_async_request *req, int error)
  */
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 26605888a199..216a2b876147 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -605,7 +605,7 @@ static inline struct ahash_request *ahash_request_cast(
  * the cipher operation completes.
  *
  * The callback function is registered with the &ahash_request handle and
- * must comply with the following template
+ * must comply with the following template::
  *
  *	void callback_function(struct crypto_async_request *req, int error)
  */
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index cc4d98a7892e..750b14f1ada4 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -516,7 +516,7 @@ static inline void skcipher_request_zero(struct skcipher_request *req)
  * skcipher_request_set_callback() - set asynchronous callback function
  * @req: request handle
  * @flags: specify zero or an ORing of the flags
- *         CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and
+ *	   CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and
  *	   increase the wait queue beyond the initial maximum size;
  *	   CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep
  * @compl: callback function pointer to be registered with the request handle
@@ -533,7 +533,7 @@ static inline void skcipher_request_zero(struct skcipher_request *req)
  * cipher operation completes.
  *
  * The callback function is registered with the skcipher_request handle and
- * must comply with the following template
+ * must comply with the following template::
  *
  *	void callback_function(struct crypto_async_request *req, int error)
  */
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 7cee5551625b..faf8127234e1 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -960,7 +960,7 @@ static inline void ablkcipher_request_free(struct ablkcipher_request *req)
  * ablkcipher_request_set_callback() - set asynchronous callback function
  * @req: request handle
  * @flags: specify zero or an ORing of the flags
- *         CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and
+ *	   CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and
  *	   increase the wait queue beyond the initial maximum size;
  *	   CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep
  * @compl: callback function pointer to be registered with the request handle
@@ -977,7 +977,7 @@ static inline void ablkcipher_request_free(struct ablkcipher_request *req)
  * cipher operation completes.
  *
  * The callback function is registered with the ablkcipher_request handle and
- * must comply with the following template
+ * must comply with the following template::
  *
  *	void callback_function(struct crypto_async_request *req, int error)
  */
-- 
cgit v1.2.3


From 74dcba3589fc184c7118905eda22b3a4aaef95ff Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Tue, 6 Dec 2016 19:09:16 -0600
Subject: NTB: correct ntb_spad_count comment typo

The comment for ntb_spad_count incorrectly referred to ntb_mw_count.

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/ntb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 6f47562d477b..50a7dbe88cf3 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -896,7 +896,7 @@ static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb)
 }
 
 /**
- * ntb_mw_count() - get the number of scratchpads
+ * ntb_spad_count() - get the number of scratchpads
  * @ntb:	NTB device context.
  *
  * Hardware and topology may support a different number of scratchpads.
-- 
cgit v1.2.3


From e28d2af43614eb86f59812e7221735fc221bbc10 Mon Sep 17 00:00:00 2001
From: Ingo Tuchscherer <ingo.tuchscherer@linux.vnet.ibm.com>
Date: Thu, 25 Aug 2016 11:16:03 +0200
Subject: s390/zcrypt: add multi domain support

Currently the ap infrastructure only supports one domain at a time.
This feature extends the generic cryptographic device driver to
support multiple cryptographic domains simultaneously.

There are now card and queue devices on the AP bus with independent
card and queue drivers. The new /sys layout is as follows:

/sys/bus/ap
    devices
        <xx>.<yyyy> -> ../../../devices/ap/card<xx>/<xx>.<yyyy>
        ...
        card<xx> -> ../../../devices/ap/card<xx>
        ...
    drivers
        <drv>card
            card<xx> -> ../../../../devices/ap/card<xx>
        <drv>queue
            <xx>.<yyyy> -> ../../../../devices/ap/card<xx>/<xx>.<yyyy>
            ...

/sys/devices/ap
    card<xx>
        <xx>.<yyyy>
            driver -> ../../../../bus/ap/drivers/<zzz>queue
            ...
        driver -> ../../../bus/ap/drivers/<drv>card
        ...

The two digit <xx> field is the card number, the four digit <yyyy>
field is the queue number and <drv> is the name of the device driver,
e.g. "cex4".

For compatability /sys/bus/ap/card<xx> for the old layout has to exist,
including the attributes that used to reside there.

With additional contributions from Harald Freudenberger and
Martin Schwidefsky.

Signed-off-by: Ingo Tuchscherer <ingo.tuchscherer@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/Makefile           |    5 +-
 drivers/s390/crypto/ap_bus.c           | 1089 ++++++++------------------------
 drivers/s390/crypto/ap_bus.h           |   94 ++-
 drivers/s390/crypto/ap_card.c          |  172 +++++
 drivers/s390/crypto/ap_queue.c         |  700 ++++++++++++++++++++
 drivers/s390/crypto/zcrypt_api.c       |  929 +++++++++++----------------
 drivers/s390/crypto/zcrypt_api.h       |   84 ++-
 drivers/s390/crypto/zcrypt_card.c      |  181 ++++++
 drivers/s390/crypto/zcrypt_cex2a.c     |  212 +++++--
 drivers/s390/crypto/zcrypt_cex4.c      |  317 ++++++----
 drivers/s390/crypto/zcrypt_error.h     |   38 +-
 drivers/s390/crypto/zcrypt_msgtype50.c |   90 +--
 drivers/s390/crypto/zcrypt_msgtype6.c  |  253 ++++----
 drivers/s390/crypto/zcrypt_msgtype6.h  |   12 +-
 drivers/s390/crypto/zcrypt_pcixcc.c    |  358 +++++------
 drivers/s390/crypto/zcrypt_queue.c     |  221 +++++++
 include/linux/mod_devicetable.h        |    3 +-
 17 files changed, 2768 insertions(+), 1990 deletions(-)
 create mode 100644 drivers/s390/crypto/ap_card.c
 create mode 100644 drivers/s390/crypto/ap_queue.c
 create mode 100644 drivers/s390/crypto/zcrypt_card.c
 create mode 100644 drivers/s390/crypto/zcrypt_queue.c

(limited to 'include/linux')

diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index d0549fc87247..0a7fb83f35e5 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -2,10 +2,11 @@
 # S/390 crypto devices
 #
 
-ap-objs := ap_bus.o
+ap-objs := ap_bus.o ap_card.o ap_queue.o
 obj-$(subst m,y,$(CONFIG_ZCRYPT)) += ap.o
 # zcrypt_api.o and zcrypt_msgtype*.o depend on ap.o
-zcrypt-objs := zcrypt_api.o zcrypt_msgtype6.o zcrypt_msgtype50.o
+zcrypt-objs := zcrypt_api.o zcrypt_card.o zcrypt_queue.o
+zcrypt-objs += zcrypt_msgtype6.o zcrypt_msgtype50.o
 obj-$(CONFIG_ZCRYPT) += zcrypt.o
 # adapter drivers depend on ap.o and zcrypt.o
 obj-$(CONFIG_ZCRYPT) += zcrypt_pcixcc.o zcrypt_cex2a.o zcrypt_cex4.o
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index fe1cfa4b22c9..ac6c258300cf 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -46,6 +46,7 @@
 #include <linux/ktime.h>
 #include <asm/facility.h>
 #include <linux/crypto.h>
+#include <linux/mod_devicetable.h>
 
 #include "ap_bus.h"
 #include "ap_asm.h"
@@ -72,10 +73,12 @@ static int ap_thread_flag = 0;
 module_param_named(poll_thread, ap_thread_flag, int, S_IRUSR|S_IRGRP);
 MODULE_PARM_DESC(poll_thread, "Turn on/off poll thread, default is 0 (off).");
 
-static struct device *ap_root_device = NULL;
+static struct device *ap_root_device;
+
+DEFINE_SPINLOCK(ap_list_lock);
+LIST_HEAD(ap_card_list);
+
 static struct ap_config_info *ap_configuration;
-static DEFINE_SPINLOCK(ap_device_list_lock);
-static LIST_HEAD(ap_device_list);
 static bool initialised;
 
 /*
@@ -129,6 +132,20 @@ static inline int ap_using_interrupts(void)
 	return ap_airq_flag;
 }
 
+/**
+ * ap_airq_ptr() - Get the address of the adapter interrupt indicator
+ *
+ * Returns the address of the local-summary-indicator of the adapter
+ * interrupt handler for AP, or NULL if adapter interrupts are not
+ * available.
+ */
+void *ap_airq_ptr(void)
+{
+	if (ap_using_interrupts())
+		return ap_airq.lsi_ptr;
+	return NULL;
+}
+
 /**
  * ap_interrupts_available(): Test if AP interrupts are available.
  *
@@ -229,101 +246,6 @@ static inline int ap_test_config_domain(unsigned int domain)
 	return ap_test_config(ap_configuration->aqm, domain);
 }
 
-/**
- * ap_queue_enable_interruption(): Enable interruption on an AP.
- * @qid: The AP queue number
- * @ind: the notification indicator byte
- *
- * Enables interruption on AP queue via ap_aqic(). Based on the return
- * value it waits a while and tests the AP queue if interrupts
- * have been switched on using ap_test_queue().
- */
-static int ap_queue_enable_interruption(struct ap_device *ap_dev, void *ind)
-{
-	struct ap_queue_status status;
-
-	status = ap_aqic(ap_dev->qid, ind);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-	case AP_RESPONSE_OTHERWISE_CHANGED:
-		return 0;
-	case AP_RESPONSE_Q_NOT_AVAIL:
-	case AP_RESPONSE_DECONFIGURED:
-	case AP_RESPONSE_CHECKSTOPPED:
-	case AP_RESPONSE_INVALID_ADDRESS:
-		pr_err("Registering adapter interrupts for AP %d failed\n",
-		       AP_QID_DEVICE(ap_dev->qid));
-		return -EOPNOTSUPP;
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-	case AP_RESPONSE_BUSY:
-	default:
-		return -EBUSY;
-	}
-}
-
-/**
- * __ap_send(): Send message to adjunct processor queue.
- * @qid: The AP queue number
- * @psmid: The program supplied message identifier
- * @msg: The message text
- * @length: The message length
- * @special: Special Bit
- *
- * Returns AP queue status structure.
- * Condition code 1 on NQAP can't happen because the L bit is 1.
- * Condition code 2 on NQAP also means the send is incomplete,
- * because a segment boundary was reached. The NQAP is repeated.
- */
-static inline struct ap_queue_status
-__ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length,
-	  unsigned int special)
-{
-	if (special == 1)
-		qid |= 0x400000UL;
-	return ap_nqap(qid, psmid, msg, length);
-}
-
-int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
-{
-	struct ap_queue_status status;
-
-	status = __ap_send(qid, psmid, msg, length, 0);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		return 0;
-	case AP_RESPONSE_Q_FULL:
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-		return -EBUSY;
-	case AP_RESPONSE_REQ_FAC_NOT_INST:
-		return -EINVAL;
-	default:	/* Device is gone. */
-		return -ENODEV;
-	}
-}
-EXPORT_SYMBOL(ap_send);
-
-int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
-{
-	struct ap_queue_status status;
-
-	if (msg == NULL)
-		return -EINVAL;
-	status = ap_dqap(qid, psmid, msg, length);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		return 0;
-	case AP_RESPONSE_NO_PENDING_REPLY:
-		if (status.queue_empty)
-			return -ENOENT;
-		return -EBUSY;
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-		return -EBUSY;
-	default:
-		return -ENODEV;
-	}
-}
-EXPORT_SYMBOL(ap_recv);
-
 /**
  * ap_query_queue(): Check if an AP queue is available.
  * @qid: The AP queue number
@@ -338,7 +260,7 @@ static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type,
 	unsigned long info;
 	int nd;
 
-	if (!ap_test_config_card_id(AP_QID_DEVICE(qid)))
+	if (!ap_test_config_card_id(AP_QID_CARD(qid)))
 		return -ENODEV;
 
 	status = ap_test_queue(qid, &info);
@@ -366,9 +288,7 @@ static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type,
 	}
 }
 
-/* State machine definitions and helpers */
-
-static void ap_sm_wait(enum ap_wait wait)
+void ap_wait(enum ap_wait wait)
 {
 	ktime_t hr_time;
 
@@ -397,347 +317,21 @@ static void ap_sm_wait(enum ap_wait wait)
 	}
 }
 
-static enum ap_wait ap_sm_nop(struct ap_device *ap_dev)
-{
-	return AP_WAIT_NONE;
-}
-
-/**
- * ap_sm_recv(): Receive pending reply messages from an AP device but do
- *	not change the state of the device.
- * @ap_dev: pointer to the AP device
- *
- * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
- */
-static struct ap_queue_status ap_sm_recv(struct ap_device *ap_dev)
-{
-	struct ap_queue_status status;
-	struct ap_message *ap_msg;
-
-	status = ap_dqap(ap_dev->qid, &ap_dev->reply->psmid,
-			 ap_dev->reply->message, ap_dev->reply->length);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		ap_dev->queue_count--;
-		if (ap_dev->queue_count > 0)
-			mod_timer(&ap_dev->timeout,
-				  jiffies + ap_dev->drv->request_timeout);
-		list_for_each_entry(ap_msg, &ap_dev->pendingq, list) {
-			if (ap_msg->psmid != ap_dev->reply->psmid)
-				continue;
-			list_del_init(&ap_msg->list);
-			ap_dev->pendingq_count--;
-			ap_msg->receive(ap_dev, ap_msg, ap_dev->reply);
-			break;
-		}
-	case AP_RESPONSE_NO_PENDING_REPLY:
-		if (!status.queue_empty || ap_dev->queue_count <= 0)
-			break;
-		/* The card shouldn't forget requests but who knows. */
-		ap_dev->queue_count = 0;
-		list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
-		ap_dev->requestq_count += ap_dev->pendingq_count;
-		ap_dev->pendingq_count = 0;
-		break;
-	default:
-		break;
-	}
-	return status;
-}
-
-/**
- * ap_sm_read(): Receive pending reply messages from an AP device.
- * @ap_dev: pointer to the AP device
- *
- * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
- */
-static enum ap_wait ap_sm_read(struct ap_device *ap_dev)
-{
-	struct ap_queue_status status;
-
-	if (!ap_dev->reply)
-		return AP_WAIT_NONE;
-	status = ap_sm_recv(ap_dev);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		if (ap_dev->queue_count > 0) {
-			ap_dev->state = AP_STATE_WORKING;
-			return AP_WAIT_AGAIN;
-		}
-		ap_dev->state = AP_STATE_IDLE;
-		return AP_WAIT_NONE;
-	case AP_RESPONSE_NO_PENDING_REPLY:
-		if (ap_dev->queue_count > 0)
-			return AP_WAIT_INTERRUPT;
-		ap_dev->state = AP_STATE_IDLE;
-		return AP_WAIT_NONE;
-	default:
-		ap_dev->state = AP_STATE_BORKED;
-		return AP_WAIT_NONE;
-	}
-}
-
-/**
- * ap_sm_suspend_read(): Receive pending reply messages from an AP device
- * without changing the device state in between. In suspend mode we don't
- * allow sending new requests, therefore just fetch pending replies.
- * @ap_dev: pointer to the AP device
- *
- * Returns AP_WAIT_NONE or AP_WAIT_AGAIN
- */
-static enum ap_wait ap_sm_suspend_read(struct ap_device *ap_dev)
-{
-	struct ap_queue_status status;
-
-	if (!ap_dev->reply)
-		return AP_WAIT_NONE;
-	status = ap_sm_recv(ap_dev);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		if (ap_dev->queue_count > 0)
-			return AP_WAIT_AGAIN;
-		/* fall through */
-	default:
-		return AP_WAIT_NONE;
-	}
-}
-
-/**
- * ap_sm_write(): Send messages from the request queue to an AP device.
- * @ap_dev: pointer to the AP device
- *
- * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
- */
-static enum ap_wait ap_sm_write(struct ap_device *ap_dev)
-{
-	struct ap_queue_status status;
-	struct ap_message *ap_msg;
-
-	if (ap_dev->requestq_count <= 0)
-		return AP_WAIT_NONE;
-	/* Start the next request on the queue. */
-	ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list);
-	status = __ap_send(ap_dev->qid, ap_msg->psmid,
-			   ap_msg->message, ap_msg->length, ap_msg->special);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		ap_dev->queue_count++;
-		if (ap_dev->queue_count == 1)
-			mod_timer(&ap_dev->timeout,
-				  jiffies + ap_dev->drv->request_timeout);
-		list_move_tail(&ap_msg->list, &ap_dev->pendingq);
-		ap_dev->requestq_count--;
-		ap_dev->pendingq_count++;
-		if (ap_dev->queue_count < ap_dev->queue_depth) {
-			ap_dev->state = AP_STATE_WORKING;
-			return AP_WAIT_AGAIN;
-		}
-		/* fall through */
-	case AP_RESPONSE_Q_FULL:
-		ap_dev->state = AP_STATE_QUEUE_FULL;
-		return AP_WAIT_INTERRUPT;
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-		ap_dev->state = AP_STATE_RESET_WAIT;
-		return AP_WAIT_TIMEOUT;
-	case AP_RESPONSE_MESSAGE_TOO_BIG:
-	case AP_RESPONSE_REQ_FAC_NOT_INST:
-		list_del_init(&ap_msg->list);
-		ap_dev->requestq_count--;
-		ap_msg->rc = -EINVAL;
-		ap_msg->receive(ap_dev, ap_msg, NULL);
-		return AP_WAIT_AGAIN;
-	default:
-		ap_dev->state = AP_STATE_BORKED;
-		return AP_WAIT_NONE;
-	}
-}
-
-/**
- * ap_sm_read_write(): Send and receive messages to/from an AP device.
- * @ap_dev: pointer to the AP device
- *
- * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
- */
-static enum ap_wait ap_sm_read_write(struct ap_device *ap_dev)
-{
-	return min(ap_sm_read(ap_dev), ap_sm_write(ap_dev));
-}
-
-/**
- * ap_sm_reset(): Reset an AP queue.
- * @qid: The AP queue number
- *
- * Submit the Reset command to an AP queue.
- */
-static enum ap_wait ap_sm_reset(struct ap_device *ap_dev)
-{
-	struct ap_queue_status status;
-
-	status = ap_rapq(ap_dev->qid);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-		ap_dev->state = AP_STATE_RESET_WAIT;
-		ap_dev->interrupt = AP_INTR_DISABLED;
-		return AP_WAIT_TIMEOUT;
-	case AP_RESPONSE_BUSY:
-		return AP_WAIT_TIMEOUT;
-	case AP_RESPONSE_Q_NOT_AVAIL:
-	case AP_RESPONSE_DECONFIGURED:
-	case AP_RESPONSE_CHECKSTOPPED:
-	default:
-		ap_dev->state = AP_STATE_BORKED;
-		return AP_WAIT_NONE;
-	}
-}
-
-/**
- * ap_sm_reset_wait(): Test queue for completion of the reset operation
- * @ap_dev: pointer to the AP device
- *
- * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0.
- */
-static enum ap_wait ap_sm_reset_wait(struct ap_device *ap_dev)
-{
-	struct ap_queue_status status;
-	unsigned long info;
-
-	if (ap_dev->queue_count > 0 && ap_dev->reply)
-		/* Try to read a completed message and get the status */
-		status = ap_sm_recv(ap_dev);
-	else
-		/* Get the status with TAPQ */
-		status = ap_test_queue(ap_dev->qid, &info);
-
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		if (ap_using_interrupts() &&
-		    ap_queue_enable_interruption(ap_dev,
-						 ap_airq.lsi_ptr) == 0)
-			ap_dev->state = AP_STATE_SETIRQ_WAIT;
-		else
-			ap_dev->state = (ap_dev->queue_count > 0) ?
-				AP_STATE_WORKING : AP_STATE_IDLE;
-		return AP_WAIT_AGAIN;
-	case AP_RESPONSE_BUSY:
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-		return AP_WAIT_TIMEOUT;
-	case AP_RESPONSE_Q_NOT_AVAIL:
-	case AP_RESPONSE_DECONFIGURED:
-	case AP_RESPONSE_CHECKSTOPPED:
-	default:
-		ap_dev->state = AP_STATE_BORKED;
-		return AP_WAIT_NONE;
-	}
-}
-
-/**
- * ap_sm_setirq_wait(): Test queue for completion of the irq enablement
- * @ap_dev: pointer to the AP device
- *
- * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0.
- */
-static enum ap_wait ap_sm_setirq_wait(struct ap_device *ap_dev)
-{
-	struct ap_queue_status status;
-	unsigned long info;
-
-	if (ap_dev->queue_count > 0 && ap_dev->reply)
-		/* Try to read a completed message and get the status */
-		status = ap_sm_recv(ap_dev);
-	else
-		/* Get the status with TAPQ */
-		status = ap_test_queue(ap_dev->qid, &info);
-
-	if (status.int_enabled == 1) {
-		/* Irqs are now enabled */
-		ap_dev->interrupt = AP_INTR_ENABLED;
-		ap_dev->state = (ap_dev->queue_count > 0) ?
-			AP_STATE_WORKING : AP_STATE_IDLE;
-	}
-
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		if (ap_dev->queue_count > 0)
-			return AP_WAIT_AGAIN;
-		/* fallthrough */
-	case AP_RESPONSE_NO_PENDING_REPLY:
-		return AP_WAIT_TIMEOUT;
-	default:
-		ap_dev->state = AP_STATE_BORKED;
-		return AP_WAIT_NONE;
-	}
-}
-
-/*
- * AP state machine jump table
- */
-static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = {
-	[AP_STATE_RESET_START] = {
-		[AP_EVENT_POLL] = ap_sm_reset,
-		[AP_EVENT_TIMEOUT] = ap_sm_nop,
-	},
-	[AP_STATE_RESET_WAIT] = {
-		[AP_EVENT_POLL] = ap_sm_reset_wait,
-		[AP_EVENT_TIMEOUT] = ap_sm_nop,
-	},
-	[AP_STATE_SETIRQ_WAIT] = {
-		[AP_EVENT_POLL] = ap_sm_setirq_wait,
-		[AP_EVENT_TIMEOUT] = ap_sm_nop,
-	},
-	[AP_STATE_IDLE] = {
-		[AP_EVENT_POLL] = ap_sm_write,
-		[AP_EVENT_TIMEOUT] = ap_sm_nop,
-	},
-	[AP_STATE_WORKING] = {
-		[AP_EVENT_POLL] = ap_sm_read_write,
-		[AP_EVENT_TIMEOUT] = ap_sm_reset,
-	},
-	[AP_STATE_QUEUE_FULL] = {
-		[AP_EVENT_POLL] = ap_sm_read,
-		[AP_EVENT_TIMEOUT] = ap_sm_reset,
-	},
-	[AP_STATE_SUSPEND_WAIT] = {
-		[AP_EVENT_POLL] = ap_sm_suspend_read,
-		[AP_EVENT_TIMEOUT] = ap_sm_nop,
-	},
-	[AP_STATE_BORKED] = {
-		[AP_EVENT_POLL] = ap_sm_nop,
-		[AP_EVENT_TIMEOUT] = ap_sm_nop,
-	},
-};
-
-static inline enum ap_wait ap_sm_event(struct ap_device *ap_dev,
-				       enum ap_event event)
-{
-	return ap_jumptable[ap_dev->state][event](ap_dev);
-}
-
-static inline enum ap_wait ap_sm_event_loop(struct ap_device *ap_dev,
-					    enum ap_event event)
-{
-	enum ap_wait wait;
-
-	while ((wait = ap_sm_event(ap_dev, event)) == AP_WAIT_AGAIN)
-		;
-	return wait;
-}
-
 /**
  * ap_request_timeout(): Handling of request timeouts
  * @data: Holds the AP device.
  *
  * Handles request timeouts.
  */
-static void ap_request_timeout(unsigned long data)
+void ap_request_timeout(unsigned long data)
 {
-	struct ap_device *ap_dev = (struct ap_device *) data;
+	struct ap_queue *aq = (struct ap_queue *) data;
 
 	if (ap_suspend_flag)
 		return;
-	spin_lock_bh(&ap_dev->lock);
-	ap_sm_wait(ap_sm_event(ap_dev, AP_EVENT_TIMEOUT));
-	spin_unlock_bh(&ap_dev->lock);
+	spin_lock_bh(&aq->lock);
+	ap_wait(ap_sm_event(aq, AP_EVENT_TIMEOUT));
+	spin_unlock_bh(&aq->lock);
 }
 
 /**
@@ -772,7 +366,8 @@ static void ap_interrupt_handler(struct airq_struct *airq)
  */
 static void ap_tasklet_fn(unsigned long dummy)
 {
-	struct ap_device *ap_dev;
+	struct ap_card *ac;
+	struct ap_queue *aq;
 	enum ap_wait wait = AP_WAIT_NONE;
 
 	/* Reset the indicator if interrupts are used. Thus new interrupts can
@@ -782,35 +377,35 @@ static void ap_tasklet_fn(unsigned long dummy)
 	if (ap_using_interrupts())
 		xchg(ap_airq.lsi_ptr, 0);
 
-	spin_lock(&ap_device_list_lock);
-	list_for_each_entry(ap_dev, &ap_device_list, list) {
-		spin_lock_bh(&ap_dev->lock);
-		wait = min(wait, ap_sm_event_loop(ap_dev, AP_EVENT_POLL));
-		spin_unlock_bh(&ap_dev->lock);
+	spin_lock_bh(&ap_list_lock);
+	for_each_ap_card(ac) {
+		for_each_ap_queue(aq, ac) {
+			spin_lock_bh(&aq->lock);
+			wait = min(wait, ap_sm_event_loop(aq, AP_EVENT_POLL));
+			spin_unlock_bh(&aq->lock);
+		}
 	}
-	spin_unlock(&ap_device_list_lock);
-	ap_sm_wait(wait);
+	spin_unlock_bh(&ap_list_lock);
+
+	ap_wait(wait);
 }
 
 static int ap_pending_requests(void)
 {
-	struct ap_device *ap_dev;
-	int id, pending = 0;
-
-	for (id = 0; pending == 0 && id < AP_DEVICES; id++) {
-		spin_lock_bh(&ap_device_list_lock);
-		list_for_each_entry(ap_dev, &ap_device_list, list) {
-			spin_lock_bh(&ap_dev->lock);
-			if (ap_dev->queue_count)
-				pending = 1;
-			spin_unlock_bh(&ap_dev->lock);
-			if (pending)
-				break;
+	struct ap_card *ac;
+	struct ap_queue *aq;
+
+	spin_lock_bh(&ap_list_lock);
+	for_each_ap_card(ac) {
+		for_each_ap_queue(aq, ac) {
+			if (aq->queue_count == 0)
+				continue;
+			spin_unlock_bh(&ap_list_lock);
+			return 1;
 		}
-		spin_unlock_bh(&ap_device_list_lock);
 	}
-
-	return pending;
+	spin_unlock_bh(&ap_list_lock);
+	return 0;
 }
 
 /**
@@ -874,207 +469,8 @@ static void ap_poll_thread_stop(void)
 	mutex_unlock(&ap_poll_thread_mutex);
 }
 
-/**
- * ap_queue_message(): Queue a request to an AP device.
- * @ap_dev: The AP device to queue the message to
- * @ap_msg: The message that is to be added
- */
-void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
-{
-	/* For asynchronous message handling a valid receive-callback
-	 * is required. */
-	BUG_ON(!ap_msg->receive);
-
-	spin_lock_bh(&ap_dev->lock);
-	/* Queue the message. */
-	list_add_tail(&ap_msg->list, &ap_dev->requestq);
-	ap_dev->requestq_count++;
-	ap_dev->total_request_count++;
-	/* Send/receive as many request from the queue as possible. */
-	ap_sm_wait(ap_sm_event_loop(ap_dev, AP_EVENT_POLL));
-	spin_unlock_bh(&ap_dev->lock);
-}
-EXPORT_SYMBOL(ap_queue_message);
-
-/**
- * ap_cancel_message(): Cancel a crypto request.
- * @ap_dev: The AP device that has the message queued
- * @ap_msg: The message that is to be removed
- *
- * Cancel a crypto request. This is done by removing the request
- * from the device pending or request queue. Note that the
- * request stays on the AP queue. When it finishes the message
- * reply will be discarded because the psmid can't be found.
- */
-void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
-{
-	struct ap_message *tmp;
-
-	spin_lock_bh(&ap_dev->lock);
-	if (!list_empty(&ap_msg->list)) {
-		list_for_each_entry(tmp, &ap_dev->pendingq, list)
-			if (tmp->psmid == ap_msg->psmid) {
-				ap_dev->pendingq_count--;
-				goto found;
-			}
-		ap_dev->requestq_count--;
-found:
-		list_del_init(&ap_msg->list);
-	}
-	spin_unlock_bh(&ap_dev->lock);
-}
-EXPORT_SYMBOL(ap_cancel_message);
-
-/*
- * AP device related attributes.
- */
-static ssize_t ap_hwtype_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-	return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->device_type);
-}
-
-static DEVICE_ATTR(hwtype, 0444, ap_hwtype_show, NULL);
-
-static ssize_t ap_raw_hwtype_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->raw_hwtype);
-}
-
-static DEVICE_ATTR(raw_hwtype, 0444, ap_raw_hwtype_show, NULL);
-
-static ssize_t ap_depth_show(struct device *dev, struct device_attribute *attr,
-			     char *buf)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-	return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->queue_depth);
-}
-
-static DEVICE_ATTR(depth, 0444, ap_depth_show, NULL);
-static ssize_t ap_request_count_show(struct device *dev,
-				     struct device_attribute *attr,
-				     char *buf)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-	int rc;
-
-	spin_lock_bh(&ap_dev->lock);
-	rc = snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->total_request_count);
-	spin_unlock_bh(&ap_dev->lock);
-	return rc;
-}
-
-static DEVICE_ATTR(request_count, 0444, ap_request_count_show, NULL);
-
-static ssize_t ap_requestq_count_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-	int rc;
-
-	spin_lock_bh(&ap_dev->lock);
-	rc = snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->requestq_count);
-	spin_unlock_bh(&ap_dev->lock);
-	return rc;
-}
-
-static DEVICE_ATTR(requestq_count, 0444, ap_requestq_count_show, NULL);
-
-static ssize_t ap_pendingq_count_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-	int rc;
-
-	spin_lock_bh(&ap_dev->lock);
-	rc = snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->pendingq_count);
-	spin_unlock_bh(&ap_dev->lock);
-	return rc;
-}
-
-static DEVICE_ATTR(pendingq_count, 0444, ap_pendingq_count_show, NULL);
-
-static ssize_t ap_reset_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-	int rc = 0;
-
-	spin_lock_bh(&ap_dev->lock);
-	switch (ap_dev->state) {
-	case AP_STATE_RESET_START:
-	case AP_STATE_RESET_WAIT:
-		rc = snprintf(buf, PAGE_SIZE, "Reset in progress.\n");
-		break;
-	case AP_STATE_WORKING:
-	case AP_STATE_QUEUE_FULL:
-		rc = snprintf(buf, PAGE_SIZE, "Reset Timer armed.\n");
-		break;
-	default:
-		rc = snprintf(buf, PAGE_SIZE, "No Reset Timer set.\n");
-	}
-	spin_unlock_bh(&ap_dev->lock);
-	return rc;
-}
-
-static DEVICE_ATTR(reset, 0444, ap_reset_show, NULL);
-
-static ssize_t ap_interrupt_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-	int rc = 0;
-
-	spin_lock_bh(&ap_dev->lock);
-	if (ap_dev->state == AP_STATE_SETIRQ_WAIT)
-		rc = snprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n");
-	else if (ap_dev->interrupt == AP_INTR_ENABLED)
-		rc = snprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
-	else
-		rc = snprintf(buf, PAGE_SIZE, "Interrupts disabled.\n");
-	spin_unlock_bh(&ap_dev->lock);
-	return rc;
-}
-
-static DEVICE_ATTR(interrupt, 0444, ap_interrupt_show, NULL);
-
-static ssize_t ap_modalias_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "ap:t%02X\n", to_ap_dev(dev)->device_type);
-}
-
-static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL);
-
-static ssize_t ap_functions_show(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-	return snprintf(buf, PAGE_SIZE, "0x%08X\n", ap_dev->functions);
-}
-
-static DEVICE_ATTR(ap_functions, 0444, ap_functions_show, NULL);
-
-static struct attribute *ap_dev_attrs[] = {
-	&dev_attr_hwtype.attr,
-	&dev_attr_raw_hwtype.attr,
-	&dev_attr_depth.attr,
-	&dev_attr_request_count.attr,
-	&dev_attr_requestq_count.attr,
-	&dev_attr_pendingq_count.attr,
-	&dev_attr_reset.attr,
-	&dev_attr_interrupt.attr,
-	&dev_attr_modalias.attr,
-	&dev_attr_ap_functions.attr,
-	NULL
-};
-static struct attribute_group ap_dev_attr_group = {
-	.attrs = ap_dev_attrs
-};
+#define is_card_dev(x) ((x)->parent == ap_root_device)
+#define is_queue_dev(x) ((x)->parent != ap_root_device)
 
 /**
  * ap_bus_match()
@@ -1085,7 +481,6 @@ static struct attribute_group ap_dev_attr_group = {
  */
 static int ap_bus_match(struct device *dev, struct device_driver *drv)
 {
-	struct ap_device *ap_dev = to_ap_dev(dev);
 	struct ap_driver *ap_drv = to_ap_drv(drv);
 	struct ap_device_id *id;
 
@@ -1094,10 +489,14 @@ static int ap_bus_match(struct device *dev, struct device_driver *drv)
 	 * supported types of the device_driver.
 	 */
 	for (id = ap_drv->ids; id->match_flags; id++) {
-		if ((id->match_flags & AP_DEVICE_ID_MATCH_DEVICE_TYPE) &&
-		    (id->dev_type != ap_dev->device_type))
-			continue;
-		return 1;
+		if (is_card_dev(dev) &&
+		    id->match_flags & AP_DEVICE_ID_MATCH_CARD_TYPE &&
+		    id->dev_type == to_ap_dev(dev)->device_type)
+			return 1;
+		if (is_queue_dev(dev) &&
+		    id->match_flags & AP_DEVICE_ID_MATCH_QUEUE_TYPE &&
+		    id->dev_type == to_ap_dev(dev)->device_type)
+			return 1;
 	}
 	return 0;
 }
@@ -1133,13 +532,17 @@ static int ap_dev_suspend(struct device *dev)
 {
 	struct ap_device *ap_dev = to_ap_dev(dev);
 
-	/* Poll on the device until all requests are finished. */
-	spin_lock_bh(&ap_dev->lock);
-	ap_dev->state = AP_STATE_SUSPEND_WAIT;
-	while (ap_sm_event(ap_dev, AP_EVENT_POLL) != AP_WAIT_NONE)
-		;
-	ap_dev->state = AP_STATE_BORKED;
-	spin_unlock_bh(&ap_dev->lock);
+	if (ap_dev->drv && ap_dev->drv->suspend)
+		ap_dev->drv->suspend(ap_dev);
+	return 0;
+}
+
+static int ap_dev_resume(struct device *dev)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+
+	if (ap_dev->drv && ap_dev->drv->resume)
+		ap_dev->drv->resume(ap_dev);
 	return 0;
 }
 
@@ -1154,9 +557,25 @@ static void ap_bus_suspend(void)
 	tasklet_disable(&ap_tasklet);
 }
 
-static int __ap_devices_unregister(struct device *dev, void *dummy)
+static int __ap_card_devices_unregister(struct device *dev, void *dummy)
+{
+	if (is_card_dev(dev))
+		device_unregister(dev);
+	return 0;
+}
+
+static int __ap_queue_devices_unregister(struct device *dev, void *dummy)
+{
+	if (is_queue_dev(dev))
+		device_unregister(dev);
+	return 0;
+}
+
+static int __ap_queue_devices_with_id_unregister(struct device *dev, void *data)
 {
-	device_unregister(dev);
+	if (is_queue_dev(dev) &&
+	    AP_QID_CARD(to_ap_queue(dev)->qid) == (int)(long) data)
+		device_unregister(dev);
 	return 0;
 }
 
@@ -1164,8 +583,13 @@ static void ap_bus_resume(void)
 {
 	int rc;
 
-	/* Unconditionally remove all AP devices */
-	bus_for_each_dev(&ap_bus_type, NULL, NULL, __ap_devices_unregister);
+	/* remove all queue devices */
+	bus_for_each_dev(&ap_bus_type, NULL, NULL,
+			 __ap_queue_devices_unregister);
+	/* remove all card devices */
+	bus_for_each_dev(&ap_bus_type, NULL, NULL,
+			 __ap_card_devices_unregister);
+
 	/* Reset thin interrupt setting */
 	if (ap_interrupts_available() && !ap_using_interrupts()) {
 		rc = register_adapter_interrupt(&ap_airq);
@@ -1207,7 +631,7 @@ static struct notifier_block ap_power_notifier = {
 	.notifier_call = ap_power_event,
 };
 
-static SIMPLE_DEV_PM_OPS(ap_bus_pm_ops, ap_dev_suspend, NULL);
+static SIMPLE_DEV_PM_OPS(ap_bus_pm_ops, ap_dev_suspend, ap_dev_resume);
 
 static struct bus_type ap_bus_type = {
 	.name = "ap",
@@ -1216,17 +640,6 @@ static struct bus_type ap_bus_type = {
 	.pm = &ap_bus_pm_ops,
 };
 
-void ap_device_init_reply(struct ap_device *ap_dev,
-			  struct ap_message *reply)
-{
-	ap_dev->reply = reply;
-
-	spin_lock_bh(&ap_dev->lock);
-	ap_sm_wait(ap_sm_event(ap_dev, AP_EVENT_POLL));
-	spin_unlock_bh(&ap_dev->lock);
-}
-EXPORT_SYMBOL(ap_device_init_reply);
-
 static int ap_device_probe(struct device *dev)
 {
 	struct ap_device *ap_dev = to_ap_dev(dev);
@@ -1240,58 +653,22 @@ static int ap_device_probe(struct device *dev)
 	return rc;
 }
 
-/**
- * __ap_flush_queue(): Flush requests.
- * @ap_dev: Pointer to the AP device
- *
- * Flush all requests from the request/pending queue of an AP device.
- */
-static void __ap_flush_queue(struct ap_device *ap_dev)
-{
-	struct ap_message *ap_msg, *next;
-
-	list_for_each_entry_safe(ap_msg, next, &ap_dev->pendingq, list) {
-		list_del_init(&ap_msg->list);
-		ap_dev->pendingq_count--;
-		ap_msg->rc = -EAGAIN;
-		ap_msg->receive(ap_dev, ap_msg, NULL);
-	}
-	list_for_each_entry_safe(ap_msg, next, &ap_dev->requestq, list) {
-		list_del_init(&ap_msg->list);
-		ap_dev->requestq_count--;
-		ap_msg->rc = -EAGAIN;
-		ap_msg->receive(ap_dev, ap_msg, NULL);
-	}
-}
-
-void ap_flush_queue(struct ap_device *ap_dev)
-{
-	spin_lock_bh(&ap_dev->lock);
-	__ap_flush_queue(ap_dev);
-	spin_unlock_bh(&ap_dev->lock);
-}
-EXPORT_SYMBOL(ap_flush_queue);
-
 static int ap_device_remove(struct device *dev)
 {
 	struct ap_device *ap_dev = to_ap_dev(dev);
 	struct ap_driver *ap_drv = ap_dev->drv;
 
-	ap_flush_queue(ap_dev);
-	del_timer_sync(&ap_dev->timeout);
-	spin_lock_bh(&ap_device_list_lock);
-	list_del_init(&ap_dev->list);
-	spin_unlock_bh(&ap_device_list_lock);
+	spin_lock_bh(&ap_list_lock);
+	if (is_card_dev(dev))
+		list_del_init(&to_ap_card(dev)->list);
+	else
+		list_del_init(&to_ap_queue(dev)->list);
+	spin_unlock_bh(&ap_list_lock);
 	if (ap_drv->remove)
 		ap_drv->remove(ap_dev);
 	return 0;
 }
 
-static void ap_device_release(struct device *dev)
-{
-	kfree(to_ap_dev(dev));
-}
-
 int ap_driver_register(struct ap_driver *ap_drv, struct module *owner,
 		       char *name)
 {
@@ -1354,12 +731,7 @@ static ssize_t ap_control_domain_mask_show(struct bus_type *bus, char *buf)
 {
 	if (!ap_configuration)	/* QCI not supported */
 		return snprintf(buf, PAGE_SIZE, "not supported\n");
-	if (!test_facility(76))
-		/* format 0 - 16 bit domain field */
-		return snprintf(buf, PAGE_SIZE, "%08x%08x\n",
-				ap_configuration->adm[0],
-				ap_configuration->adm[1]);
-	/* format 1 - 256 bit domain field */
+
 	return snprintf(buf, PAGE_SIZE,
 			"0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
 			ap_configuration->adm[0], ap_configuration->adm[1],
@@ -1371,6 +743,22 @@ static ssize_t ap_control_domain_mask_show(struct bus_type *bus, char *buf)
 static BUS_ATTR(ap_control_domain_mask, 0444,
 		ap_control_domain_mask_show, NULL);
 
+static ssize_t ap_usage_domain_mask_show(struct bus_type *bus, char *buf)
+{
+	if (!ap_configuration)	/* QCI not supported */
+		return snprintf(buf, PAGE_SIZE, "not supported\n");
+
+	return snprintf(buf, PAGE_SIZE,
+			"0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
+			ap_configuration->aqm[0], ap_configuration->aqm[1],
+			ap_configuration->aqm[2], ap_configuration->aqm[3],
+			ap_configuration->aqm[4], ap_configuration->aqm[5],
+			ap_configuration->aqm[6], ap_configuration->aqm[7]);
+}
+
+static BUS_ATTR(ap_usage_domain_mask, 0444,
+		ap_usage_domain_mask_show, NULL);
+
 static ssize_t ap_config_time_show(struct bus_type *bus, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%d\n", ap_config_time);
@@ -1466,6 +854,7 @@ static BUS_ATTR(ap_max_domain_id, 0444, ap_max_domain_id_show, NULL);
 static struct bus_attribute *const ap_bus_attrs[] = {
 	&bus_attr_ap_domain,
 	&bus_attr_ap_control_domain_mask,
+	&bus_attr_ap_usage_domain_mask,
 	&bus_attr_config_time,
 	&bus_attr_poll_thread,
 	&bus_attr_ap_interrupts,
@@ -1524,110 +913,162 @@ static int ap_select_domain(void)
 	return -ENODEV;
 }
 
-/**
- * __ap_scan_bus(): Scan the AP bus.
- * @dev: Pointer to device
- * @data: Pointer to data
- *
- * Scan the AP bus for new devices.
+/*
+ * helper function to be used with bus_find_dev
+ * matches for the card device with the given id
  */
-static int __ap_scan_bus(struct device *dev, void *data)
+static int __match_card_device_with_id(struct device *dev, void *data)
 {
-	return to_ap_dev(dev)->qid == (ap_qid_t)(unsigned long) data;
+	return is_card_dev(dev) && to_ap_card(dev)->id == (int)(long) data;
 }
 
+/* helper function to be used with bus_find_dev
+ * matches for the queue device with a given qid
+ */
+static int __match_queue_device_with_qid(struct device *dev, void *data)
+{
+	return is_queue_dev(dev) && to_ap_queue(dev)->qid == (int)(long) data;
+}
+
+/**
+ * ap_scan_bus(): Scan the AP bus for new devices
+ * Runs periodically, workqueue timer (ap_config_time)
+ */
 static void ap_scan_bus(struct work_struct *unused)
 {
-	struct ap_device *ap_dev;
+	struct ap_queue *aq;
+	struct ap_card *ac;
 	struct device *dev;
 	ap_qid_t qid;
-	int queue_depth = 0, device_type = 0;
-	unsigned int device_functions = 0;
-	int rc, i, borked;
+	int depth = 0, type = 0;
+	unsigned int functions = 0;
+	int rc, id, dom, borked, domains;
 
 	ap_query_configuration();
 	if (ap_select_domain() != 0)
 		goto out;
 
-
-	spin_lock_bh(&ap_domain_lock);
-	for (i = 0; i < AP_DEVICES; i++) {
-		qid = AP_MKQID(i, ap_domain_index);
+	for (id = 0; id < AP_DEVICES; id++) {
+		/* check if device is registered */
 		dev = bus_find_device(&ap_bus_type, NULL,
-				      (void *)(unsigned long)qid,
-				      __ap_scan_bus);
-		rc = ap_query_queue(qid, &queue_depth, &device_type,
-				    &device_functions);
-		if (dev) {
-			ap_dev = to_ap_dev(dev);
-			spin_lock_bh(&ap_dev->lock);
-			if (rc == -ENODEV)
-				ap_dev->state = AP_STATE_BORKED;
-			borked = ap_dev->state == AP_STATE_BORKED;
-			spin_unlock_bh(&ap_dev->lock);
-			if (borked)	/* Remove broken device */
+				      (void *)(long) id,
+				      __match_card_device_with_id);
+		ac = dev ? to_ap_card(dev) : NULL;
+		if (!ap_test_config_card_id(id)) {
+			if (dev) {
+				/* Card device has been removed from
+				 * configuration, remove the belonging
+				 * queue devices.
+				 */
+				bus_for_each_dev(&ap_bus_type, NULL,
+					(void *)(long) id,
+					__ap_queue_devices_with_id_unregister);
+				/* now remove the card device */
 				device_unregister(dev);
-			put_device(dev);
-			if (!borked)
-				continue;
-		}
-		if (rc)
-			continue;
-		ap_dev = kzalloc(sizeof(*ap_dev), GFP_KERNEL);
-		if (!ap_dev)
-			break;
-		ap_dev->qid = qid;
-		ap_dev->state = AP_STATE_RESET_START;
-		ap_dev->interrupt = AP_INTR_DISABLED;
-		ap_dev->queue_depth = queue_depth;
-		ap_dev->raw_hwtype = device_type;
-		ap_dev->device_type = device_type;
-		/* CEX6 toleration: map to CEX5 */
-		if (device_type == AP_DEVICE_TYPE_CEX6)
-			ap_dev->device_type = AP_DEVICE_TYPE_CEX5;
-		ap_dev->functions = device_functions;
-		spin_lock_init(&ap_dev->lock);
-		INIT_LIST_HEAD(&ap_dev->pendingq);
-		INIT_LIST_HEAD(&ap_dev->requestq);
-		INIT_LIST_HEAD(&ap_dev->list);
-		setup_timer(&ap_dev->timeout, ap_request_timeout,
-			    (unsigned long) ap_dev);
-
-		ap_dev->device.bus = &ap_bus_type;
-		ap_dev->device.parent = ap_root_device;
-		rc = dev_set_name(&ap_dev->device, "card%02x",
-				  AP_QID_DEVICE(ap_dev->qid));
-		if (rc) {
-			kfree(ap_dev);
-			continue;
-		}
-		/* Add to list of devices */
-		spin_lock_bh(&ap_device_list_lock);
-		list_add(&ap_dev->list, &ap_device_list);
-		spin_unlock_bh(&ap_device_list_lock);
-		/* Start with a device reset */
-		spin_lock_bh(&ap_dev->lock);
-		ap_sm_wait(ap_sm_event(ap_dev, AP_EVENT_POLL));
-		spin_unlock_bh(&ap_dev->lock);
-		/* Register device */
-		ap_dev->device.release = ap_device_release;
-		rc = device_register(&ap_dev->device);
-		if (rc) {
-			spin_lock_bh(&ap_dev->lock);
-			list_del_init(&ap_dev->list);
-			spin_unlock_bh(&ap_dev->lock);
-			put_device(&ap_dev->device);
+				put_device(dev);
+			}
 			continue;
 		}
-		/* Add device attributes. */
-		rc = sysfs_create_group(&ap_dev->device.kobj,
-					&ap_dev_attr_group);
-		if (rc) {
-			device_unregister(&ap_dev->device);
-			continue;
+		/* According to the configuration there should be a card
+		 * device, so check if there is at least one valid queue
+		 * and maybe create queue devices and the card device.
+		 */
+		domains = 0;
+		for (dom = 0; dom < AP_DOMAINS; dom++) {
+			qid = AP_MKQID(id, dom);
+			dev = bus_find_device(&ap_bus_type, NULL,
+					      (void *)(long) qid,
+					      __match_queue_device_with_qid);
+			aq = dev ? to_ap_queue(dev) : NULL;
+			if (!ap_test_config_domain(dom)) {
+				if (dev) {
+					/* Queue device exists but has been
+					 * removed from configuration.
+					 */
+					device_unregister(dev);
+					put_device(dev);
+				}
+				continue;
+			}
+			rc = ap_query_queue(qid, &depth, &type, &functions);
+			if (dev) {
+				spin_lock_bh(&aq->lock);
+				if (rc == -ENODEV ||
+				    /* adapter reconfiguration */
+				    (ac && ac->functions != functions))
+					aq->state = AP_STATE_BORKED;
+				borked = aq->state == AP_STATE_BORKED;
+				spin_unlock_bh(&aq->lock);
+				if (borked)	/* Remove broken device */
+					device_unregister(dev);
+				put_device(dev);
+				if (!borked) {
+					domains++;
+					continue;
+				}
+			}
+			if (rc)
+				continue;
+			/* new queue device needed */
+			if (!ac) {
+				/* but first create the card device */
+				ac = ap_card_create(id, depth,
+						    type, functions);
+				if (!ac)
+					continue;
+				ac->ap_dev.device.bus = &ap_bus_type;
+				ac->ap_dev.device.parent = ap_root_device;
+				dev_set_name(&ac->ap_dev.device,
+					     "card%02x", id);
+				/* Register card with AP bus */
+				rc = device_register(&ac->ap_dev.device);
+				if (rc) {
+					put_device(&ac->ap_dev.device);
+					ac = NULL;
+					break;
+				}
+				/* get it and thus adjust reference counter */
+				get_device(&ac->ap_dev.device);
+				/* Add card device to card list */
+				spin_lock_bh(&ap_list_lock);
+				list_add(&ac->list, &ap_card_list);
+				spin_unlock_bh(&ap_list_lock);
+			}
+			/* now create the new queue device */
+			aq = ap_queue_create(qid, type);
+			if (!aq)
+				continue;
+			aq->card = ac;
+			aq->ap_dev.device.bus = &ap_bus_type;
+			aq->ap_dev.device.parent = &ac->ap_dev.device;
+			dev_set_name(&aq->ap_dev.device,
+				     "%02x.%04x", id, dom);
+			/* Add queue device to card queue list */
+			spin_lock_bh(&ap_list_lock);
+			list_add(&aq->list, &ac->queues);
+			spin_unlock_bh(&ap_list_lock);
+			/* Start with a device reset */
+			spin_lock_bh(&aq->lock);
+			ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
+			spin_unlock_bh(&aq->lock);
+			/* Register device */
+			rc = device_register(&aq->ap_dev.device);
+			if (rc) {
+				spin_lock_bh(&ap_list_lock);
+				list_del_init(&aq->list);
+				spin_unlock_bh(&ap_list_lock);
+				put_device(&aq->ap_dev.device);
+				continue;
+			}
+			domains++;
+		} /* end domain loop */
+		if (ac) {
+			/* remove card dev if there are no queue devices */
+			if (!domains)
+				device_unregister(&ac->ap_dev.device);
+			put_device(&ac->ap_dev.device);
 		}
-	}
-	spin_unlock_bh(&ap_domain_lock);
+	} /* end device loop */
 out:
 	mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
 }
@@ -1787,7 +1228,15 @@ void ap_module_exit(void)
 	del_timer_sync(&ap_config_timer);
 	hrtimer_cancel(&ap_poll_timer);
 	tasklet_kill(&ap_tasklet);
-	bus_for_each_dev(&ap_bus_type, NULL, NULL, __ap_devices_unregister);
+
+	/* first remove queue devices */
+	bus_for_each_dev(&ap_bus_type, NULL, NULL,
+			 __ap_queue_devices_unregister);
+	/* now remove the card devices */
+	bus_for_each_dev(&ap_bus_type, NULL, NULL,
+			 __ap_card_devices_unregister);
+
+	/* remove bus attributes */
 	for (i = 0; ap_bus_attrs[i]; i++)
 		bus_remove_file(&ap_bus_type, ap_bus_attrs[i]);
 	unregister_pm_notifier(&ap_power_notifier);
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index fd66d2c450d5..54b17e142792 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -27,7 +27,6 @@
 #define _AP_BUS_H_
 
 #include <linux/device.h>
-#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 
 #define AP_DEVICES 64		/* Number of AP devices. */
@@ -38,14 +37,17 @@
 
 extern int ap_domain_index;
 
+extern spinlock_t ap_list_lock;
+extern struct list_head ap_card_list;
+
 /**
  * The ap_qid_t identifier of an ap queue. It contains a
- * 6 bit device index and a 4 bit queue index (domain).
+ * 6 bit card index and a 4 bit queue index (domain).
  */
 typedef unsigned int ap_qid_t;
 
-#define AP_MKQID(_device, _queue) (((_device) & 63) << 8 | ((_queue) & 255))
-#define AP_QID_DEVICE(_qid) (((_qid) >> 8) & 63)
+#define AP_MKQID(_card, _queue) (((_card) & 63) << 8 | ((_queue) & 255))
+#define AP_QID_CARD(_qid) (((_qid) >> 8) & 63)
 #define AP_QID_QUEUE(_qid) ((_qid) & 255)
 
 /**
@@ -55,7 +57,7 @@ typedef unsigned int ap_qid_t;
  * @queue_full: Is 1 if the queue is full
  * @pad: A 4 bit pad
  * @int_enabled: Shows if interrupts are enabled for the AP
- * @response_conde: Holds the 8 bit response code
+ * @response_code: Holds the 8 bit response code
  * @pad2: A 16 bit pad
  *
  * The ap queue status word is returned by all three AP functions
@@ -167,7 +169,8 @@ struct ap_driver {
 
 	int (*probe)(struct ap_device *);
 	void (*remove)(struct ap_device *);
-	int request_timeout;		/* request timeout in jiffies */
+	void (*suspend)(struct ap_device *);
+	void (*resume)(struct ap_device *);
 };
 
 #define to_ap_drv(x) container_of((x), struct ap_driver, driver)
@@ -175,38 +178,50 @@ struct ap_driver {
 int ap_driver_register(struct ap_driver *, struct module *, char *);
 void ap_driver_unregister(struct ap_driver *);
 
-typedef enum ap_wait (ap_func_t)(struct ap_device *ap_dev);
-
 struct ap_device {
 	struct device device;
 	struct ap_driver *drv;		/* Pointer to AP device driver. */
-	spinlock_t lock;		/* Per device lock. */
-	struct list_head list;		/* private list of all AP devices. */
+	int device_type;		/* AP device type. */
+};
 
-	enum ap_state state;		/* State of the AP device. */
+#define to_ap_dev(x) container_of((x), struct ap_device, device)
 
-	ap_qid_t qid;			/* AP queue id. */
-	int queue_depth;		/* AP queue depth.*/
-	int device_type;		/* AP device type. */
+struct ap_card {
+	struct ap_device ap_dev;
+	struct list_head list;		/* Private list of AP cards. */
+	struct list_head queues;	/* List of assoc. AP queues */
+	void *private;			/* ap driver private pointer. */
 	int raw_hwtype;			/* AP raw hardware type. */
 	unsigned int functions;		/* AP device function bitfield. */
-	struct timer_list timeout;	/* Timer for request timeouts. */
+	int queue_depth;		/* AP queue depth.*/
+	int id;				/* AP card number. */
+};
+
+#define to_ap_card(x) container_of((x), struct ap_card, ap_dev.device)
 
+struct ap_queue {
+	struct ap_device ap_dev;
+	struct list_head list;		/* Private list of AP queues. */
+	struct ap_card *card;		/* Ptr to assoc. AP card. */
+	spinlock_t lock;		/* Per device lock. */
+	void *private;			/* ap driver private pointer. */
+	ap_qid_t qid;			/* AP queue id. */
 	int interrupt;			/* indicate if interrupts are enabled */
 	int queue_count;		/* # messages currently on AP queue. */
-
-	struct list_head pendingq;	/* List of message sent to AP queue. */
+	enum ap_state state;		/* State of the AP device. */
 	int pendingq_count;		/* # requests on pendingq list. */
-	struct list_head requestq;	/* List of message yet to be sent. */
 	int requestq_count;		/* # requests on requestq list. */
 	int total_request_count;	/* # requests ever for this AP device. */
-
+	int request_timeout;		/* Request timout in jiffies. */
+	struct timer_list timeout;	/* Timer for request timeouts. */
+	struct list_head pendingq;	/* List of message sent to AP queue. */
+	struct list_head requestq;	/* List of message yet to be sent. */
 	struct ap_message *reply;	/* Per device reply message. */
-
-	void *private;			/* ap driver private pointer. */
 };
 
-#define to_ap_dev(x) container_of((x), struct ap_device, device)
+#define to_ap_queue(x) container_of((x), struct ap_queue, ap_dev.device)
+
+typedef enum ap_wait (ap_func_t)(struct ap_queue *queue);
 
 struct ap_message {
 	struct list_head list;		/* Request queueing. */
@@ -218,7 +233,7 @@ struct ap_message {
 	void *private;			/* ap driver private pointer. */
 	unsigned int special:1;		/* Used for special commands. */
 	/* receive is called from tasklet context */
-	void (*receive)(struct ap_device *, struct ap_message *,
+	void (*receive)(struct ap_queue *, struct ap_message *,
 			struct ap_message *);
 };
 
@@ -233,10 +248,6 @@ struct ap_config_info {
 	unsigned char reserved4[16];
 } __packed;
 
-#define AP_DEVICE(dt)					\
-	.dev_type=(dt),					\
-	.match_flags=AP_DEVICE_ID_MATCH_DEVICE_TYPE,
-
 /**
  * ap_init_message() - Initialize ap_message.
  * Initialize a message before using. Otherwise this might result in
@@ -251,6 +262,12 @@ static inline void ap_init_message(struct ap_message *ap_msg)
 	ap_msg->receive = NULL;
 }
 
+#define for_each_ap_card(_ac) \
+	list_for_each_entry(_ac, &ap_card_list, list)
+
+#define for_each_ap_queue(_aq, _ac) \
+	list_for_each_entry(_aq, &(_ac)->queues, list)
+
 /*
  * Note: don't use ap_send/ap_recv after using ap_queue_message
  * for the first time. Otherwise the ap message queue will get
@@ -259,11 +276,26 @@ static inline void ap_init_message(struct ap_message *ap_msg)
 int ap_send(ap_qid_t, unsigned long long, void *, size_t);
 int ap_recv(ap_qid_t, unsigned long long *, void *, size_t);
 
-void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg);
-void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg);
-void ap_flush_queue(struct ap_device *ap_dev);
+enum ap_wait ap_sm_event(struct ap_queue *aq, enum ap_event event);
+enum ap_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_event event);
+
+void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg);
+void ap_cancel_message(struct ap_queue *aq, struct ap_message *ap_msg);
+void ap_flush_queue(struct ap_queue *aq);
+
+void *ap_airq_ptr(void);
+void ap_wait(enum ap_wait wait);
+void ap_request_timeout(unsigned long data);
 void ap_bus_force_rescan(void);
-void ap_device_init_reply(struct ap_device *ap_dev, struct ap_message *ap_msg);
+
+void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg);
+struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type);
+void ap_queue_remove(struct ap_queue *aq);
+void ap_queue_suspend(struct ap_device *ap_dev);
+void ap_queue_resume(struct ap_device *ap_dev);
+
+struct ap_card *ap_card_create(int id, int queue_depth, int device_type,
+			       unsigned int device_functions);
 
 int ap_module_init(void);
 void ap_module_exit(void);
diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c
new file mode 100644
index 000000000000..731dc0dbfb75
--- /dev/null
+++ b/drivers/s390/crypto/ap_card.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * Adjunct processor bus, card related code.
+ */
+
+#define KMSG_COMPONENT "ap"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/facility.h>
+
+#include "ap_bus.h"
+#include "ap_asm.h"
+
+/*
+ * AP card related attributes.
+ */
+static ssize_t ap_hwtype_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct ap_card *ac = to_ap_card(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", ac->ap_dev.device_type);
+}
+
+static DEVICE_ATTR(hwtype, 0444, ap_hwtype_show, NULL);
+
+static ssize_t ap_raw_hwtype_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct ap_card *ac = to_ap_card(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", ac->raw_hwtype);
+}
+
+static DEVICE_ATTR(raw_hwtype, 0444, ap_raw_hwtype_show, NULL);
+
+static ssize_t ap_depth_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct ap_card *ac = to_ap_card(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", ac->queue_depth);
+}
+
+static DEVICE_ATTR(depth, 0444, ap_depth_show, NULL);
+
+static ssize_t ap_functions_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct ap_card *ac = to_ap_card(dev);
+
+	return snprintf(buf, PAGE_SIZE, "0x%08X\n", ac->functions);
+}
+
+static DEVICE_ATTR(ap_functions, 0444, ap_functions_show, NULL);
+
+static ssize_t ap_request_count_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct ap_card *ac = to_ap_card(dev);
+	struct ap_queue *aq;
+	unsigned int req_cnt;
+
+	req_cnt = 0;
+	spin_lock_bh(&ap_list_lock);
+	for_each_ap_queue(aq, ac)
+		req_cnt += aq->total_request_count;
+	spin_unlock_bh(&ap_list_lock);
+	return snprintf(buf, PAGE_SIZE, "%d\n", req_cnt);
+}
+
+static DEVICE_ATTR(request_count, 0444, ap_request_count_show, NULL);
+
+static ssize_t ap_requestq_count_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct ap_card *ac = to_ap_card(dev);
+	struct ap_queue *aq;
+	unsigned int reqq_cnt;
+
+	reqq_cnt = 0;
+	spin_lock_bh(&ap_list_lock);
+	for_each_ap_queue(aq, ac)
+		reqq_cnt += aq->requestq_count;
+	spin_unlock_bh(&ap_list_lock);
+	return snprintf(buf, PAGE_SIZE, "%d\n", reqq_cnt);
+}
+
+static DEVICE_ATTR(requestq_count, 0444, ap_requestq_count_show, NULL);
+
+static ssize_t ap_pendingq_count_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct ap_card *ac = to_ap_card(dev);
+	struct ap_queue *aq;
+	unsigned int penq_cnt;
+
+	penq_cnt = 0;
+	spin_lock_bh(&ap_list_lock);
+	for_each_ap_queue(aq, ac)
+		penq_cnt += aq->pendingq_count;
+	spin_unlock_bh(&ap_list_lock);
+	return snprintf(buf, PAGE_SIZE, "%d\n", penq_cnt);
+}
+
+static DEVICE_ATTR(pendingq_count, 0444, ap_pendingq_count_show, NULL);
+
+static ssize_t ap_modalias_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "ap:t%02X\n", to_ap_dev(dev)->device_type);
+}
+
+static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL);
+
+static struct attribute *ap_card_dev_attrs[] = {
+	&dev_attr_hwtype.attr,
+	&dev_attr_raw_hwtype.attr,
+	&dev_attr_depth.attr,
+	&dev_attr_ap_functions.attr,
+	&dev_attr_request_count.attr,
+	&dev_attr_requestq_count.attr,
+	&dev_attr_pendingq_count.attr,
+	&dev_attr_modalias.attr,
+	NULL
+};
+
+static struct attribute_group ap_card_dev_attr_group = {
+	.attrs = ap_card_dev_attrs
+};
+
+static const struct attribute_group *ap_card_dev_attr_groups[] = {
+	&ap_card_dev_attr_group,
+	NULL
+};
+
+struct device_type ap_card_type = {
+	.name = "ap_card",
+	.groups = ap_card_dev_attr_groups,
+};
+
+static void ap_card_device_release(struct device *dev)
+{
+	kfree(to_ap_card(dev));
+}
+
+struct ap_card *ap_card_create(int id, int queue_depth, int device_type,
+			       unsigned int functions)
+{
+	struct ap_card *ac;
+
+	ac = kzalloc(sizeof(*ac), GFP_KERNEL);
+	if (!ac)
+		return NULL;
+	INIT_LIST_HEAD(&ac->queues);
+	ac->ap_dev.device.release = ap_card_device_release;
+	ac->ap_dev.device.type = &ap_card_type;
+	ac->ap_dev.device_type = device_type;
+	/* CEX6 toleration: map to CEX5 */
+	if (device_type == AP_DEVICE_TYPE_CEX6)
+		ac->ap_dev.device_type = AP_DEVICE_TYPE_CEX5;
+	ac->raw_hwtype = device_type;
+	ac->queue_depth = queue_depth;
+	ac->functions = functions;
+	ac->id = id;
+	return ac;
+}
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
new file mode 100644
index 000000000000..8f95a071b670
--- /dev/null
+++ b/drivers/s390/crypto/ap_queue.c
@@ -0,0 +1,700 @@
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * Adjunct processor bus, queue related code.
+ */
+
+#define KMSG_COMPONENT "ap"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/facility.h>
+
+#include "ap_bus.h"
+#include "ap_asm.h"
+
+/**
+ * ap_queue_enable_interruption(): Enable interruption on an AP queue.
+ * @qid: The AP queue number
+ * @ind: the notification indicator byte
+ *
+ * Enables interruption on AP queue via ap_aqic(). Based on the return
+ * value it waits a while and tests the AP queue if interrupts
+ * have been switched on using ap_test_queue().
+ */
+static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind)
+{
+	struct ap_queue_status status;
+
+	status = ap_aqic(aq->qid, ind);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+	case AP_RESPONSE_OTHERWISE_CHANGED:
+		return 0;
+	case AP_RESPONSE_Q_NOT_AVAIL:
+	case AP_RESPONSE_DECONFIGURED:
+	case AP_RESPONSE_CHECKSTOPPED:
+	case AP_RESPONSE_INVALID_ADDRESS:
+		pr_err("Registering adapter interrupts for AP device %02x.%04x failed\n",
+		       AP_QID_CARD(aq->qid),
+		       AP_QID_QUEUE(aq->qid));
+		return -EOPNOTSUPP;
+	case AP_RESPONSE_RESET_IN_PROGRESS:
+	case AP_RESPONSE_BUSY:
+	default:
+		return -EBUSY;
+	}
+}
+
+/**
+ * __ap_send(): Send message to adjunct processor queue.
+ * @qid: The AP queue number
+ * @psmid: The program supplied message identifier
+ * @msg: The message text
+ * @length: The message length
+ * @special: Special Bit
+ *
+ * Returns AP queue status structure.
+ * Condition code 1 on NQAP can't happen because the L bit is 1.
+ * Condition code 2 on NQAP also means the send is incomplete,
+ * because a segment boundary was reached. The NQAP is repeated.
+ */
+static inline struct ap_queue_status
+__ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length,
+	  unsigned int special)
+{
+	if (special == 1)
+		qid |= 0x400000UL;
+	return ap_nqap(qid, psmid, msg, length);
+}
+
+int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
+{
+	struct ap_queue_status status;
+
+	status = __ap_send(qid, psmid, msg, length, 0);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		return 0;
+	case AP_RESPONSE_Q_FULL:
+	case AP_RESPONSE_RESET_IN_PROGRESS:
+		return -EBUSY;
+	case AP_RESPONSE_REQ_FAC_NOT_INST:
+		return -EINVAL;
+	default:	/* Device is gone. */
+		return -ENODEV;
+	}
+}
+EXPORT_SYMBOL(ap_send);
+
+int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
+{
+	struct ap_queue_status status;
+
+	if (msg == NULL)
+		return -EINVAL;
+	status = ap_dqap(qid, psmid, msg, length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		return 0;
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		if (status.queue_empty)
+			return -ENOENT;
+		return -EBUSY;
+	case AP_RESPONSE_RESET_IN_PROGRESS:
+		return -EBUSY;
+	default:
+		return -ENODEV;
+	}
+}
+EXPORT_SYMBOL(ap_recv);
+
+/* State machine definitions and helpers */
+
+static enum ap_wait ap_sm_nop(struct ap_queue *aq)
+{
+	return AP_WAIT_NONE;
+}
+
+/**
+ * ap_sm_recv(): Receive pending reply messages from an AP queue but do
+ *	not change the state of the device.
+ * @aq: pointer to the AP queue
+ *
+ * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ */
+static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
+{
+	struct ap_queue_status status;
+	struct ap_message *ap_msg;
+
+	status = ap_dqap(aq->qid, &aq->reply->psmid,
+			 aq->reply->message, aq->reply->length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		aq->queue_count--;
+		if (aq->queue_count > 0)
+			mod_timer(&aq->timeout,
+				  jiffies + aq->request_timeout);
+		list_for_each_entry(ap_msg, &aq->pendingq, list) {
+			if (ap_msg->psmid != aq->reply->psmid)
+				continue;
+			list_del_init(&ap_msg->list);
+			aq->pendingq_count--;
+			ap_msg->receive(aq, ap_msg, aq->reply);
+			break;
+		}
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		if (!status.queue_empty || aq->queue_count <= 0)
+			break;
+		/* The card shouldn't forget requests but who knows. */
+		aq->queue_count = 0;
+		list_splice_init(&aq->pendingq, &aq->requestq);
+		aq->requestq_count += aq->pendingq_count;
+		aq->pendingq_count = 0;
+		break;
+	default:
+		break;
+	}
+	return status;
+}
+
+/**
+ * ap_sm_read(): Receive pending reply messages from an AP queue.
+ * @aq: pointer to the AP queue
+ *
+ * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ */
+static enum ap_wait ap_sm_read(struct ap_queue *aq)
+{
+	struct ap_queue_status status;
+
+	if (!aq->reply)
+		return AP_WAIT_NONE;
+	status = ap_sm_recv(aq);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		if (aq->queue_count > 0) {
+			aq->state = AP_STATE_WORKING;
+			return AP_WAIT_AGAIN;
+		}
+		aq->state = AP_STATE_IDLE;
+		return AP_WAIT_NONE;
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		if (aq->queue_count > 0)
+			return AP_WAIT_INTERRUPT;
+		aq->state = AP_STATE_IDLE;
+		return AP_WAIT_NONE;
+	default:
+		aq->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
+	}
+}
+
+/**
+ * ap_sm_suspend_read(): Receive pending reply messages from an AP queue
+ * without changing the device state in between. In suspend mode we don't
+ * allow sending new requests, therefore just fetch pending replies.
+ * @aq: pointer to the AP queue
+ *
+ * Returns AP_WAIT_NONE or AP_WAIT_AGAIN
+ */
+static enum ap_wait ap_sm_suspend_read(struct ap_queue *aq)
+{
+	struct ap_queue_status status;
+
+	if (!aq->reply)
+		return AP_WAIT_NONE;
+	status = ap_sm_recv(aq);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		if (aq->queue_count > 0)
+			return AP_WAIT_AGAIN;
+		/* fall through */
+	default:
+		return AP_WAIT_NONE;
+	}
+}
+
+/**
+ * ap_sm_write(): Send messages from the request queue to an AP queue.
+ * @aq: pointer to the AP queue
+ *
+ * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ */
+static enum ap_wait ap_sm_write(struct ap_queue *aq)
+{
+	struct ap_queue_status status;
+	struct ap_message *ap_msg;
+
+	if (aq->requestq_count <= 0)
+		return AP_WAIT_NONE;
+	/* Start the next request on the queue. */
+	ap_msg = list_entry(aq->requestq.next, struct ap_message, list);
+	status = __ap_send(aq->qid, ap_msg->psmid,
+			   ap_msg->message, ap_msg->length, ap_msg->special);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		aq->queue_count++;
+		if (aq->queue_count == 1)
+			mod_timer(&aq->timeout, jiffies + aq->request_timeout);
+		list_move_tail(&ap_msg->list, &aq->pendingq);
+		aq->requestq_count--;
+		aq->pendingq_count++;
+		if (aq->queue_count < aq->card->queue_depth) {
+			aq->state = AP_STATE_WORKING;
+			return AP_WAIT_AGAIN;
+		}
+		/* fall through */
+	case AP_RESPONSE_Q_FULL:
+		aq->state = AP_STATE_QUEUE_FULL;
+		return AP_WAIT_INTERRUPT;
+	case AP_RESPONSE_RESET_IN_PROGRESS:
+		aq->state = AP_STATE_RESET_WAIT;
+		return AP_WAIT_TIMEOUT;
+	case AP_RESPONSE_MESSAGE_TOO_BIG:
+	case AP_RESPONSE_REQ_FAC_NOT_INST:
+		list_del_init(&ap_msg->list);
+		aq->requestq_count--;
+		ap_msg->rc = -EINVAL;
+		ap_msg->receive(aq, ap_msg, NULL);
+		return AP_WAIT_AGAIN;
+	default:
+		aq->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
+	}
+}
+
+/**
+ * ap_sm_read_write(): Send and receive messages to/from an AP queue.
+ * @aq: pointer to the AP queue
+ *
+ * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ */
+static enum ap_wait ap_sm_read_write(struct ap_queue *aq)
+{
+	return min(ap_sm_read(aq), ap_sm_write(aq));
+}
+
+/**
+ * ap_sm_reset(): Reset an AP queue.
+ * @qid: The AP queue number
+ *
+ * Submit the Reset command to an AP queue.
+ */
+static enum ap_wait ap_sm_reset(struct ap_queue *aq)
+{
+	struct ap_queue_status status;
+
+	status = ap_rapq(aq->qid);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+	case AP_RESPONSE_RESET_IN_PROGRESS:
+		aq->state = AP_STATE_RESET_WAIT;
+		aq->interrupt = AP_INTR_DISABLED;
+		return AP_WAIT_TIMEOUT;
+	case AP_RESPONSE_BUSY:
+		return AP_WAIT_TIMEOUT;
+	case AP_RESPONSE_Q_NOT_AVAIL:
+	case AP_RESPONSE_DECONFIGURED:
+	case AP_RESPONSE_CHECKSTOPPED:
+	default:
+		aq->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
+	}
+}
+
+/**
+ * ap_sm_reset_wait(): Test queue for completion of the reset operation
+ * @aq: pointer to the AP queue
+ *
+ * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0.
+ */
+static enum ap_wait ap_sm_reset_wait(struct ap_queue *aq)
+{
+	struct ap_queue_status status;
+	void *lsi_ptr;
+
+	if (aq->queue_count > 0 && aq->reply)
+		/* Try to read a completed message and get the status */
+		status = ap_sm_recv(aq);
+	else
+		/* Get the status with TAPQ */
+		status = ap_tapq(aq->qid, NULL);
+
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		lsi_ptr = ap_airq_ptr();
+		if (lsi_ptr && ap_queue_enable_interruption(aq, lsi_ptr) == 0)
+			aq->state = AP_STATE_SETIRQ_WAIT;
+		else
+			aq->state = (aq->queue_count > 0) ?
+				AP_STATE_WORKING : AP_STATE_IDLE;
+		return AP_WAIT_AGAIN;
+	case AP_RESPONSE_BUSY:
+	case AP_RESPONSE_RESET_IN_PROGRESS:
+		return AP_WAIT_TIMEOUT;
+	case AP_RESPONSE_Q_NOT_AVAIL:
+	case AP_RESPONSE_DECONFIGURED:
+	case AP_RESPONSE_CHECKSTOPPED:
+	default:
+		aq->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
+	}
+}
+
+/**
+ * ap_sm_setirq_wait(): Test queue for completion of the irq enablement
+ * @aq: pointer to the AP queue
+ *
+ * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0.
+ */
+static enum ap_wait ap_sm_setirq_wait(struct ap_queue *aq)
+{
+	struct ap_queue_status status;
+
+	if (aq->queue_count > 0 && aq->reply)
+		/* Try to read a completed message and get the status */
+		status = ap_sm_recv(aq);
+	else
+		/* Get the status with TAPQ */
+		status = ap_tapq(aq->qid, NULL);
+
+	if (status.int_enabled == 1) {
+		/* Irqs are now enabled */
+		aq->interrupt = AP_INTR_ENABLED;
+		aq->state = (aq->queue_count > 0) ?
+			AP_STATE_WORKING : AP_STATE_IDLE;
+	}
+
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		if (aq->queue_count > 0)
+			return AP_WAIT_AGAIN;
+		/* fallthrough */
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		return AP_WAIT_TIMEOUT;
+	default:
+		aq->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
+	}
+}
+
+/*
+ * AP state machine jump table
+ */
+static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = {
+	[AP_STATE_RESET_START] = {
+		[AP_EVENT_POLL] = ap_sm_reset,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_RESET_WAIT] = {
+		[AP_EVENT_POLL] = ap_sm_reset_wait,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_SETIRQ_WAIT] = {
+		[AP_EVENT_POLL] = ap_sm_setirq_wait,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_IDLE] = {
+		[AP_EVENT_POLL] = ap_sm_write,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_WORKING] = {
+		[AP_EVENT_POLL] = ap_sm_read_write,
+		[AP_EVENT_TIMEOUT] = ap_sm_reset,
+	},
+	[AP_STATE_QUEUE_FULL] = {
+		[AP_EVENT_POLL] = ap_sm_read,
+		[AP_EVENT_TIMEOUT] = ap_sm_reset,
+	},
+	[AP_STATE_SUSPEND_WAIT] = {
+		[AP_EVENT_POLL] = ap_sm_suspend_read,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_BORKED] = {
+		[AP_EVENT_POLL] = ap_sm_nop,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+};
+
+enum ap_wait ap_sm_event(struct ap_queue *aq, enum ap_event event)
+{
+	return ap_jumptable[aq->state][event](aq);
+}
+
+enum ap_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_event event)
+{
+	enum ap_wait wait;
+
+	while ((wait = ap_sm_event(aq, event)) == AP_WAIT_AGAIN)
+		;
+	return wait;
+}
+
+/*
+ * Power management for queue devices
+ */
+void ap_queue_suspend(struct ap_device *ap_dev)
+{
+	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
+
+	/* Poll on the device until all requests are finished. */
+	spin_lock_bh(&aq->lock);
+	aq->state = AP_STATE_SUSPEND_WAIT;
+	while (ap_sm_event(aq, AP_EVENT_POLL) != AP_WAIT_NONE)
+		;
+	aq->state = AP_STATE_BORKED;
+	spin_unlock_bh(&aq->lock);
+}
+EXPORT_SYMBOL(ap_queue_suspend);
+
+void ap_queue_resume(struct ap_device *ap_dev)
+{
+}
+EXPORT_SYMBOL(ap_queue_resume);
+
+/*
+ * AP queue related attributes.
+ */
+static ssize_t ap_request_count_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct ap_queue *aq = to_ap_queue(dev);
+	unsigned int req_cnt;
+
+	spin_lock_bh(&aq->lock);
+	req_cnt = aq->total_request_count;
+	spin_unlock_bh(&aq->lock);
+	return snprintf(buf, PAGE_SIZE, "%d\n", req_cnt);
+}
+
+static DEVICE_ATTR(request_count, 0444, ap_request_count_show, NULL);
+
+static ssize_t ap_requestq_count_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct ap_queue *aq = to_ap_queue(dev);
+	unsigned int reqq_cnt = 0;
+
+	spin_lock_bh(&aq->lock);
+	reqq_cnt = aq->requestq_count;
+	spin_unlock_bh(&aq->lock);
+	return snprintf(buf, PAGE_SIZE, "%d\n", reqq_cnt);
+}
+
+static DEVICE_ATTR(requestq_count, 0444, ap_requestq_count_show, NULL);
+
+static ssize_t ap_pendingq_count_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct ap_queue *aq = to_ap_queue(dev);
+	unsigned int penq_cnt = 0;
+
+	spin_lock_bh(&aq->lock);
+	penq_cnt = aq->pendingq_count;
+	spin_unlock_bh(&aq->lock);
+	return snprintf(buf, PAGE_SIZE, "%d\n", penq_cnt);
+}
+
+static DEVICE_ATTR(pendingq_count, 0444, ap_pendingq_count_show, NULL);
+
+static ssize_t ap_reset_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct ap_queue *aq = to_ap_queue(dev);
+	int rc = 0;
+
+	spin_lock_bh(&aq->lock);
+	switch (aq->state) {
+	case AP_STATE_RESET_START:
+	case AP_STATE_RESET_WAIT:
+		rc = snprintf(buf, PAGE_SIZE, "Reset in progress.\n");
+		break;
+	case AP_STATE_WORKING:
+	case AP_STATE_QUEUE_FULL:
+		rc = snprintf(buf, PAGE_SIZE, "Reset Timer armed.\n");
+		break;
+	default:
+		rc = snprintf(buf, PAGE_SIZE, "No Reset Timer set.\n");
+	}
+	spin_unlock_bh(&aq->lock);
+	return rc;
+}
+
+static DEVICE_ATTR(reset, 0444, ap_reset_show, NULL);
+
+static ssize_t ap_interrupt_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct ap_queue *aq = to_ap_queue(dev);
+	int rc = 0;
+
+	spin_lock_bh(&aq->lock);
+	if (aq->state == AP_STATE_SETIRQ_WAIT)
+		rc = snprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n");
+	else if (aq->interrupt == AP_INTR_ENABLED)
+		rc = snprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
+	else
+		rc = snprintf(buf, PAGE_SIZE, "Interrupts disabled.\n");
+	spin_unlock_bh(&aq->lock);
+	return rc;
+}
+
+static DEVICE_ATTR(interrupt, 0444, ap_interrupt_show, NULL);
+
+static struct attribute *ap_queue_dev_attrs[] = {
+	&dev_attr_request_count.attr,
+	&dev_attr_requestq_count.attr,
+	&dev_attr_pendingq_count.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_interrupt.attr,
+	NULL
+};
+
+static struct attribute_group ap_queue_dev_attr_group = {
+	.attrs = ap_queue_dev_attrs
+};
+
+static const struct attribute_group *ap_queue_dev_attr_groups[] = {
+	&ap_queue_dev_attr_group,
+	NULL
+};
+
+struct device_type ap_queue_type = {
+	.name = "ap_queue",
+	.groups = ap_queue_dev_attr_groups,
+};
+
+static void ap_queue_device_release(struct device *dev)
+{
+	kfree(to_ap_queue(dev));
+}
+
+struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
+{
+	struct ap_queue *aq;
+
+	aq = kzalloc(sizeof(*aq), GFP_KERNEL);
+	if (!aq)
+		return NULL;
+	aq->ap_dev.device.release = ap_queue_device_release;
+	aq->ap_dev.device.type = &ap_queue_type;
+	aq->ap_dev.device_type = device_type;
+	/* CEX6 toleration: map to CEX5 */
+	if (device_type == AP_DEVICE_TYPE_CEX6)
+		aq->ap_dev.device_type = AP_DEVICE_TYPE_CEX5;
+	aq->qid = qid;
+	aq->state = AP_STATE_RESET_START;
+	aq->interrupt = AP_INTR_DISABLED;
+	spin_lock_init(&aq->lock);
+	INIT_LIST_HEAD(&aq->pendingq);
+	INIT_LIST_HEAD(&aq->requestq);
+	setup_timer(&aq->timeout, ap_request_timeout, (unsigned long) aq);
+
+	return aq;
+}
+
+void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *reply)
+{
+	aq->reply = reply;
+
+	spin_lock_bh(&aq->lock);
+	ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
+	spin_unlock_bh(&aq->lock);
+}
+EXPORT_SYMBOL(ap_queue_init_reply);
+
+/**
+ * ap_queue_message(): Queue a request to an AP device.
+ * @aq: The AP device to queue the message to
+ * @ap_msg: The message that is to be added
+ */
+void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg)
+{
+	/* For asynchronous message handling a valid receive-callback
+	 * is required.
+	 */
+	BUG_ON(!ap_msg->receive);
+
+	spin_lock_bh(&aq->lock);
+	/* Queue the message. */
+	list_add_tail(&ap_msg->list, &aq->requestq);
+	aq->requestq_count++;
+	aq->total_request_count++;
+	/* Send/receive as many request from the queue as possible. */
+	ap_wait(ap_sm_event_loop(aq, AP_EVENT_POLL));
+	spin_unlock_bh(&aq->lock);
+}
+EXPORT_SYMBOL(ap_queue_message);
+
+/**
+ * ap_cancel_message(): Cancel a crypto request.
+ * @aq: The AP device that has the message queued
+ * @ap_msg: The message that is to be removed
+ *
+ * Cancel a crypto request. This is done by removing the request
+ * from the device pending or request queue. Note that the
+ * request stays on the AP queue. When it finishes the message
+ * reply will be discarded because the psmid can't be found.
+ */
+void ap_cancel_message(struct ap_queue *aq, struct ap_message *ap_msg)
+{
+	struct ap_message *tmp;
+
+	spin_lock_bh(&aq->lock);
+	if (!list_empty(&ap_msg->list)) {
+		list_for_each_entry(tmp, &aq->pendingq, list)
+			if (tmp->psmid == ap_msg->psmid) {
+				aq->pendingq_count--;
+				goto found;
+			}
+		aq->requestq_count--;
+found:
+		list_del_init(&ap_msg->list);
+	}
+	spin_unlock_bh(&aq->lock);
+}
+EXPORT_SYMBOL(ap_cancel_message);
+
+/**
+ * __ap_flush_queue(): Flush requests.
+ * @aq: Pointer to the AP queue
+ *
+ * Flush all requests from the request/pending queue of an AP device.
+ */
+static void __ap_flush_queue(struct ap_queue *aq)
+{
+	struct ap_message *ap_msg, *next;
+
+	list_for_each_entry_safe(ap_msg, next, &aq->pendingq, list) {
+		list_del_init(&ap_msg->list);
+		aq->pendingq_count--;
+		ap_msg->rc = -EAGAIN;
+		ap_msg->receive(aq, ap_msg, NULL);
+	}
+	list_for_each_entry_safe(ap_msg, next, &aq->requestq, list) {
+		list_del_init(&ap_msg->list);
+		aq->requestq_count--;
+		ap_msg->rc = -EAGAIN;
+		ap_msg->receive(aq, ap_msg, NULL);
+	}
+}
+
+void ap_flush_queue(struct ap_queue *aq)
+{
+	spin_lock_bh(&aq->lock);
+	__ap_flush_queue(aq);
+	spin_unlock_bh(&aq->lock);
+}
+EXPORT_SYMBOL(ap_flush_queue);
+
+void ap_queue_remove(struct ap_queue *aq)
+{
+	ap_flush_queue(aq);
+	del_timer_sync(&aq->timeout);
+}
+EXPORT_SYMBOL(ap_queue_remove);
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 28913e540096..fd0ae8cd2bee 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -59,71 +59,22 @@ static int zcrypt_hwrng_seed = 1;
 module_param_named(hwrng_seed, zcrypt_hwrng_seed, int, S_IRUSR|S_IRGRP);
 MODULE_PARM_DESC(hwrng_seed, "Turn on/off hwrng auto seed, default is 1 (on).");
 
-static DEFINE_SPINLOCK(zcrypt_device_lock);
-static LIST_HEAD(zcrypt_device_list);
-static int zcrypt_device_count = 0;
+DEFINE_SPINLOCK(zcrypt_list_lock);
+LIST_HEAD(zcrypt_card_list);
+int zcrypt_device_count;
+
 static atomic_t zcrypt_open_count = ATOMIC_INIT(0);
 static atomic_t zcrypt_rescan_count = ATOMIC_INIT(0);
 
 atomic_t zcrypt_rescan_req = ATOMIC_INIT(0);
 EXPORT_SYMBOL(zcrypt_rescan_req);
 
-static int zcrypt_rng_device_add(void);
-static void zcrypt_rng_device_remove(void);
-
 static LIST_HEAD(zcrypt_ops_list);
 
-static debug_info_t *zcrypt_dbf_common;
-static debug_info_t *zcrypt_dbf_devices;
 static struct dentry *debugfs_root;
-
-/*
- * Device attributes common for all crypto devices.
- */
-static ssize_t zcrypt_type_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct zcrypt_device *zdev = to_ap_dev(dev)->private;
-	return snprintf(buf, PAGE_SIZE, "%s\n", zdev->type_string);
-}
-
-static DEVICE_ATTR(type, 0444, zcrypt_type_show, NULL);
-
-static ssize_t zcrypt_online_show(struct device *dev,
-				  struct device_attribute *attr, char *buf)
-{
-	struct zcrypt_device *zdev = to_ap_dev(dev)->private;
-	return snprintf(buf, PAGE_SIZE, "%d\n", zdev->online);
-}
-
-static ssize_t zcrypt_online_store(struct device *dev,
-				   struct device_attribute *attr,
-				   const char *buf, size_t count)
-{
-	struct zcrypt_device *zdev = to_ap_dev(dev)->private;
-	int online;
-
-	if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
-		return -EINVAL;
-	zdev->online = online;
-	ZCRYPT_DBF_DEV(DBF_INFO, zdev, "dev%04xo%dman", zdev->ap_dev->qid,
-		       zdev->online);
-	if (!online)
-		ap_flush_queue(zdev->ap_dev);
-	return count;
-}
-
-static DEVICE_ATTR(online, 0644, zcrypt_online_show, zcrypt_online_store);
-
-static struct attribute * zcrypt_device_attrs[] = {
-	&dev_attr_type.attr,
-	&dev_attr_online.attr,
-	NULL,
-};
-
-static struct attribute_group zcrypt_device_attr_group = {
-	.attrs = zcrypt_device_attrs,
-};
+debug_info_t *zcrypt_dbf_common;
+debug_info_t *zcrypt_dbf_devices;
+debug_info_t *zcrypt_dbf_cards;
 
 /**
  * Process a rescan of the transport layer.
@@ -143,174 +94,6 @@ static inline int zcrypt_process_rescan(void)
 	return 0;
 }
 
-/**
- * __zcrypt_increase_preference(): Increase preference of a crypto device.
- * @zdev: Pointer the crypto device
- *
- * Move the device towards the head of the device list.
- * Need to be called while holding the zcrypt device list lock.
- * Note: cards with speed_rating of 0 are kept at the end of the list.
- */
-static void __zcrypt_increase_preference(struct zcrypt_device *zdev,
-				  unsigned int weight)
-{
-	struct zcrypt_device *tmp;
-	struct list_head *l;
-
-	zdev->load -= weight;
-	for (l = zdev->list.prev; l != &zcrypt_device_list; l = l->prev) {
-		tmp = list_entry(l, struct zcrypt_device, list);
-		if (tmp->load <= zdev->load)
-			break;
-	}
-	if (l == zdev->list.prev)
-		return;
-	/* Move zdev behind l */
-	list_move(&zdev->list, l);
-}
-
-/**
- * __zcrypt_decrease_preference(): Decrease preference of a crypto device.
- * @zdev: Pointer to a crypto device.
- *
- * Move the device towards the tail of the device list.
- * Need to be called while holding the zcrypt device list lock.
- * Note: cards with speed_rating of 0 are kept at the end of the list.
- */
-static void __zcrypt_decrease_preference(struct zcrypt_device *zdev,
-				  unsigned int weight)
-{
-	struct zcrypt_device *tmp;
-	struct list_head *l;
-
-	zdev->load += weight;
-	for (l = zdev->list.next; l != &zcrypt_device_list; l = l->next) {
-		tmp = list_entry(l, struct zcrypt_device, list);
-		if (tmp->load > zdev->load)
-			break;
-	}
-	if (l == zdev->list.next)
-		return;
-	/* Move zdev before l */
-	list_move_tail(&zdev->list, l);
-}
-
-static void zcrypt_device_release(struct kref *kref)
-{
-	struct zcrypt_device *zdev =
-		container_of(kref, struct zcrypt_device, refcount);
-	zcrypt_device_free(zdev);
-}
-
-void zcrypt_device_get(struct zcrypt_device *zdev)
-{
-	kref_get(&zdev->refcount);
-}
-EXPORT_SYMBOL(zcrypt_device_get);
-
-int zcrypt_device_put(struct zcrypt_device *zdev)
-{
-	return kref_put(&zdev->refcount, zcrypt_device_release);
-}
-EXPORT_SYMBOL(zcrypt_device_put);
-
-struct zcrypt_device *zcrypt_device_alloc(size_t max_response_size)
-{
-	struct zcrypt_device *zdev;
-
-	zdev = kzalloc(sizeof(struct zcrypt_device), GFP_KERNEL);
-	if (!zdev)
-		return NULL;
-	zdev->reply.message = kmalloc(max_response_size, GFP_KERNEL);
-	if (!zdev->reply.message)
-		goto out_free;
-	zdev->reply.length = max_response_size;
-	spin_lock_init(&zdev->lock);
-	INIT_LIST_HEAD(&zdev->list);
-	zdev->dbf_area = zcrypt_dbf_devices;
-	return zdev;
-
-out_free:
-	kfree(zdev);
-	return NULL;
-}
-EXPORT_SYMBOL(zcrypt_device_alloc);
-
-void zcrypt_device_free(struct zcrypt_device *zdev)
-{
-	kfree(zdev->reply.message);
-	kfree(zdev);
-}
-EXPORT_SYMBOL(zcrypt_device_free);
-
-/**
- * zcrypt_device_register() - Register a crypto device.
- * @zdev: Pointer to a crypto device
- *
- * Register a crypto device. Returns 0 if successful.
- */
-int zcrypt_device_register(struct zcrypt_device *zdev)
-{
-	int rc;
-
-	if (!zdev->ops)
-		return -ENODEV;
-	rc = sysfs_create_group(&zdev->ap_dev->device.kobj,
-				&zcrypt_device_attr_group);
-	if (rc)
-		goto out;
-	get_device(&zdev->ap_dev->device);
-	kref_init(&zdev->refcount);
-	spin_lock_bh(&zcrypt_device_lock);
-	zdev->online = 1;	/* New devices are online by default. */
-	ZCRYPT_DBF_DEV(DBF_INFO, zdev, "dev%04xo%dreg", zdev->ap_dev->qid,
-		       zdev->online);
-	list_add_tail(&zdev->list, &zcrypt_device_list);
-	__zcrypt_increase_preference(zdev, 0); /* sort devices acc. weight */
-	zcrypt_device_count++;
-	spin_unlock_bh(&zcrypt_device_lock);
-	if (zdev->ops->rng) {
-		rc = zcrypt_rng_device_add();
-		if (rc)
-			goto out_unregister;
-	}
-	return 0;
-
-out_unregister:
-	spin_lock_bh(&zcrypt_device_lock);
-	zcrypt_device_count--;
-	list_del_init(&zdev->list);
-	spin_unlock_bh(&zcrypt_device_lock);
-	sysfs_remove_group(&zdev->ap_dev->device.kobj,
-			   &zcrypt_device_attr_group);
-	put_device(&zdev->ap_dev->device);
-	zcrypt_device_put(zdev);
-out:
-	return rc;
-}
-EXPORT_SYMBOL(zcrypt_device_register);
-
-/**
- * zcrypt_device_unregister(): Unregister a crypto device.
- * @zdev: Pointer to crypto device
- *
- * Unregister a crypto device.
- */
-void zcrypt_device_unregister(struct zcrypt_device *zdev)
-{
-	if (zdev->ops->rng)
-		zcrypt_rng_device_remove();
-	spin_lock_bh(&zcrypt_device_lock);
-	zcrypt_device_count--;
-	list_del_init(&zdev->list);
-	spin_unlock_bh(&zcrypt_device_lock);
-	sysfs_remove_group(&zdev->ap_dev->device.kobj,
-			   &zcrypt_device_attr_group);
-	put_device(&zdev->ap_dev->device);
-	zcrypt_device_put(zdev);
-}
-EXPORT_SYMBOL(zcrypt_device_unregister);
-
 void zcrypt_msgtype_register(struct zcrypt_ops *zops)
 {
 	list_add_tail(&zops->list, &zcrypt_ops_list);
@@ -377,14 +160,44 @@ static int zcrypt_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc,
+						     struct zcrypt_queue *zq,
+						     unsigned int weight)
+{
+	if (!zq || !try_module_get(zq->queue->ap_dev.drv->driver.owner))
+		return NULL;
+	zcrypt_queue_get(zq);
+	get_device(&zq->queue->ap_dev.device);
+	atomic_add(weight, &zc->load);
+	atomic_add(weight, &zq->load);
+	zq->request_count++;
+	return zq;
+}
+
+static inline void zcrypt_drop_queue(struct zcrypt_card *zc,
+				     struct zcrypt_queue *zq,
+				     unsigned int weight)
+{
+	struct module *mod = zq->queue->ap_dev.drv->driver.owner;
+
+	zq->request_count--;
+	atomic_sub(weight, &zc->load);
+	atomic_sub(weight, &zq->load);
+	put_device(&zq->queue->ap_dev.device);
+	zcrypt_queue_put(zq);
+	module_put(mod);
+}
+
 /*
  * zcrypt ioctls.
  */
 static long zcrypt_rsa_modexpo(struct ica_rsa_modexpo *mex)
 {
-	struct zcrypt_device *zdev, *pref_zdev = NULL;
+	struct zcrypt_card *zc, *pref_zc;
+	struct zcrypt_queue *zq, *pref_zq;
+	unsigned int weight, pref_weight;
+	unsigned int func_code;
 	int rc;
-	unsigned int weight, func_code, pref_weight = 0;
 
 	if (mex->outputdatalength < mex->inputdatalength)
 		return -EINVAL;
@@ -399,59 +212,56 @@ static long zcrypt_rsa_modexpo(struct ica_rsa_modexpo *mex)
 	if (rc)
 		return rc;
 
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list) {
-		if (!zdev->online ||
-		    !zdev->ops->rsa_modexpo ||
-		    zdev->min_mod_size > mex->inputdatalength ||
-		    zdev->max_mod_size < mex->inputdatalength)
+	pref_zc = NULL;
+	pref_zq = NULL;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		/* Check for online accelarator and CCA cards */
+		if (!zc->online || !(zc->card->functions & 0x18000000))
 			continue;
-		weight = zdev->speed_rating[func_code];
-		if (!pref_zdev) {
-			pref_zdev = zdev;
-			pref_weight = weight;
+		/* Check for size limits */
+		if (zc->min_mod_size > mex->inputdatalength ||
+		    zc->max_mod_size < mex->inputdatalength)
 			continue;
-		}
-		if ((pref_zdev->load + pref_weight) > (zdev->load + weight)) {
-			pref_zdev = zdev;
-			pref_weight = weight;
+		/* get weight index of the card device	*/
+		weight = zc->speed_rating[func_code];
+		if (pref_zc && atomic_read(&zc->load) + weight >=
+		    atomic_read(&pref_zc->load) + pref_weight)
 			continue;
+		for_each_zcrypt_queue(zq, zc) {
+			/* check if device is online and eligible */
+			if (!zq->online)
+				continue;
+			if (pref_zq && atomic_read(&zq->load) + weight >=
+			    atomic_read(&pref_zq->load) + pref_weight)
+				continue;
+			pref_zc = zc;
+			pref_zq = zq;
+			pref_weight = weight;
 		}
-		if ((pref_zdev->load + pref_weight) <= zdev->load)
-			break; /* Load on remaining devices too high - abort */
 	}
+	pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
 
-	if (!pref_zdev) {
-		spin_unlock_bh(&zcrypt_device_lock);
+	if (!pref_zq)
 		return -ENODEV;
-	}
-	__zcrypt_decrease_preference(pref_zdev, pref_weight);
-	zcrypt_device_get(pref_zdev);
-	get_device(&pref_zdev->ap_dev->device);
-	pref_zdev->request_count++;
-	if (try_module_get(pref_zdev->ap_dev->drv->driver.owner)) {
-		spin_unlock_bh(&zcrypt_device_lock);
-		rc = -ENODEV;
-		rc = pref_zdev->ops->rsa_modexpo(pref_zdev, mex);
-		spin_lock_bh(&zcrypt_device_lock);
-		module_put(pref_zdev->ap_dev->drv->driver.owner);
-	} else
-		rc = -EAGAIN;
 
-	pref_zdev->request_count--;
-	__zcrypt_increase_preference(pref_zdev, pref_weight);
-	put_device(&pref_zdev->ap_dev->device);
-	zcrypt_device_put(pref_zdev);
-	spin_unlock_bh(&zcrypt_device_lock);
+	rc = pref_zq->ops->rsa_modexpo(pref_zq, mex);
+
+	spin_lock(&zcrypt_list_lock);
+	zcrypt_drop_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
+
 	return rc;
 }
 
 static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt)
 {
-	struct zcrypt_device *zdev, *pref_zdev = NULL;
-	unsigned long long z1, z2, z3;
-	int rc, copied;
-	unsigned int weight, func_code, pref_weight = 0;
+	struct zcrypt_card *zc, *pref_zc;
+	struct zcrypt_queue *zq, *pref_zq;
+	unsigned int weight, pref_weight;
+	unsigned int func_code;
+	int rc;
 
 	if (crt->outputdatalength < crt->inputdatalength)
 		return -EINVAL;
@@ -466,385 +276,388 @@ static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt)
 	if (rc)
 		return rc;
 
-	copied = 0;
- restart:
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list) {
-		if (!zdev->online ||
-		    !zdev->ops->rsa_modexpo_crt ||
-		    zdev->min_mod_size > crt->inputdatalength ||
-		    zdev->max_mod_size < crt->inputdatalength)
+	pref_zc = NULL;
+	pref_zq = NULL;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		/* Check for online accelarator and CCA cards */
+		if (!zc->online || !(zc->card->functions & 0x18000000))
 			continue;
-		if (zdev->short_crt && crt->inputdatalength > 240) {
-			/*
-			 * Check inputdata for leading zeros for cards
-			 * that can't handle np_prime, bp_key, or
-			 * u_mult_inv > 128 bytes.
-			 */
-			if (copied == 0) {
-				unsigned int len;
-				spin_unlock_bh(&zcrypt_device_lock);
-				/* len is max 256 / 2 - 120 = 8
-				 * For bigger device just assume len of leading
-				 * 0s is 8 as stated in the requirements for
-				 * ica_rsa_modexpo_crt struct in zcrypt.h.
-				 */
-				if (crt->inputdatalength <= 256)
-					len = crt->inputdatalength / 2 - 120;
-				else
-					len = 8;
-				if (len > sizeof(z1))
-					return -EFAULT;
-				z1 = z2 = z3 = 0;
-				if (copy_from_user(&z1, crt->np_prime, len) ||
-				    copy_from_user(&z2, crt->bp_key, len) ||
-				    copy_from_user(&z3, crt->u_mult_inv, len))
-					return -EFAULT;
-				z1 = z2 = z3 = 0;
-				copied = 1;
-				/*
-				 * We have to restart device lookup -
-				 * the device list may have changed by now.
-				 */
-				goto restart;
-			}
-			if (z1 != 0ULL || z2 != 0ULL || z3 != 0ULL)
-				/* The device can't handle this request. */
-				continue;
-		}
-
-		weight = zdev->speed_rating[func_code];
-		if (!pref_zdev) {
-			pref_zdev = zdev;
-			pref_weight = weight;
+		/* Check for size limits */
+		if (zc->min_mod_size > crt->inputdatalength ||
+		    zc->max_mod_size < crt->inputdatalength)
 			continue;
-		}
-		if ((pref_zdev->load + pref_weight) > (zdev->load + weight)) {
-			pref_zdev = zdev;
-			pref_weight = weight;
+		/* get weight index of the card device	*/
+		weight = zc->speed_rating[func_code];
+		if (pref_zc && atomic_read(&zc->load) + weight >=
+		    atomic_read(&pref_zc->load) + pref_weight)
 			continue;
+		for_each_zcrypt_queue(zq, zc) {
+			/* check if device is online and eligible */
+			if (!zq->online)
+				continue;
+			if (pref_zq && atomic_read(&zq->load) + weight >=
+			    atomic_read(&pref_zq->load) + pref_weight)
+				continue;
+			pref_zc = zc;
+			pref_zq = zq;
+			pref_weight = weight;
 		}
-		if ((pref_zdev->load + pref_weight) <= zdev->load)
-			break; /* Load on remaining devices too high - abort */
 	}
-	if (!pref_zdev) {
-		spin_unlock_bh(&zcrypt_device_lock);
+	pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
+
+	if (!pref_zq)
 		return -ENODEV;
-	}
-	__zcrypt_decrease_preference(pref_zdev, pref_weight);
-	zcrypt_device_get(pref_zdev);
-	get_device(&pref_zdev->ap_dev->device);
-	pref_zdev->request_count++;
-	if (try_module_get(pref_zdev->ap_dev->drv->driver.owner)) {
-		spin_unlock_bh(&zcrypt_device_lock);
-		rc = pref_zdev->ops->rsa_modexpo_crt(pref_zdev, crt);
-		spin_lock_bh(&zcrypt_device_lock);
-		module_put(pref_zdev->ap_dev->drv->driver.owner);
-	} else
-		rc = -EAGAIN;
-	pref_zdev->request_count--;
-	__zcrypt_increase_preference(pref_zdev, pref_weight);
-	put_device(&pref_zdev->ap_dev->device);
-	zcrypt_device_put(pref_zdev);
-	spin_unlock_bh(&zcrypt_device_lock);
+
+	rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt);
+
+	spin_lock(&zcrypt_list_lock);
+	zcrypt_drop_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
+
 	return rc;
 }
 
 static long zcrypt_send_cprb(struct ica_xcRB *xcRB)
 {
-	struct zcrypt_device *zdev, *pref_zdev = NULL;
-	unsigned int weight = 0, func_code = 0, pref_weight = 0;
-	int rc;
+	struct zcrypt_card *zc, *pref_zc;
+	struct zcrypt_queue *zq, *pref_zq;
 	struct ap_message ap_msg;
+	unsigned int weight, pref_weight;
+	unsigned int func_code;
+	unsigned short *domain;
+	int rc;
 
-	rc = get_cprb_fc(xcRB, &ap_msg, &func_code);
+	rc = get_cprb_fc(xcRB, &ap_msg, &func_code, &domain);
 	if (rc)
 		return rc;
 
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list) {
-		if (!zdev->online || !zdev->ops->send_cprb ||
-		   (zdev->ops->variant == MSGTYPE06_VARIANT_EP11) ||
-		   (xcRB->user_defined != AUTOSELECT &&
-		    AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined))
+	pref_zc = NULL;
+	pref_zq = NULL;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		/* Check for online CCA cards */
+		if (!zc->online || !(zc->card->functions & 0x10000000))
 			continue;
-
-		weight = speed_idx_cca(func_code) * zdev->speed_rating[SECKEY];
-		if (!pref_zdev) {
-			pref_zdev = zdev;
-			pref_weight = weight;
+		/* Check for user selected CCA card */
+		if (xcRB->user_defined != AUTOSELECT &&
+		    xcRB->user_defined != zc->card->id)
 			continue;
-		}
-		if ((pref_zdev->load + pref_weight) > (zdev->load + weight)) {
-			pref_zdev = zdev;
-			pref_weight = weight;
+		/* get weight index of the card device	*/
+		weight = speed_idx_cca(func_code) * zc->speed_rating[SECKEY];
+		if (pref_zc && atomic_read(&zc->load) + weight >=
+		    atomic_read(&pref_zc->load) + pref_weight)
 			continue;
+		for_each_zcrypt_queue(zq, zc) {
+			/* check if device is online and eligible */
+			if (!zq->online ||
+			    ((*domain != (unsigned short) AUTOSELECT) &&
+			     (*domain != AP_QID_QUEUE(zq->queue->qid))))
+				continue;
+			if (pref_zq && atomic_read(&zq->load) + weight >=
+			    atomic_read(&pref_zq->load) + pref_weight)
+				continue;
+			pref_zc = zc;
+			pref_zq = zq;
+			pref_weight = weight;
 		}
-		if ((pref_zdev->load + pref_weight) <= zdev->load)
-			break; /* Load on remaining devices too high - abort */
 	}
-	if (!pref_zdev) {
-		spin_unlock_bh(&zcrypt_device_lock);
+	pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
+
+	if (!pref_zq)
 		return -ENODEV;
-	}
-	__zcrypt_decrease_preference(pref_zdev, pref_weight);
-	zcrypt_device_get(pref_zdev);
-	get_device(&pref_zdev->ap_dev->device);
-	pref_zdev->request_count++;
-	if (try_module_get(pref_zdev->ap_dev->drv->driver.owner)) {
-		spin_unlock_bh(&zcrypt_device_lock);
-		rc = pref_zdev->ops->send_cprb(pref_zdev, xcRB, &ap_msg);
-		spin_lock_bh(&zcrypt_device_lock);
-		module_put(pref_zdev->ap_dev->drv->driver.owner);
-	} else
-		rc = -EAGAIN;
-	pref_zdev->request_count--;
-	__zcrypt_increase_preference(pref_zdev, pref_weight);
-	put_device(&pref_zdev->ap_dev->device);
-	zcrypt_device_put(pref_zdev);
-	spin_unlock_bh(&zcrypt_device_lock);
+
+	/* in case of auto select, provide the correct domain */
+	if (*domain == (unsigned short) AUTOSELECT)
+		*domain = AP_QID_QUEUE(pref_zq->queue->qid);
+
+	rc = pref_zq->ops->send_cprb(pref_zq, xcRB, &ap_msg);
+
+	spin_lock(&zcrypt_list_lock);
+	zcrypt_drop_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
 	return rc;
 }
 
-struct ep11_target_dev_list {
-	unsigned short		targets_num;
-	struct ep11_target_dev	*targets;
-};
-
-static bool is_desired_ep11dev(unsigned int dev_qid,
-			       struct ep11_target_dev_list dev_list)
+static bool is_desired_ep11_card(unsigned int dev_id,
+				 unsigned short target_num,
+				 struct ep11_target_dev *targets)
 {
-	int n;
+	while (target_num-- > 0) {
+		if (dev_id == targets->ap_id)
+			return true;
+		targets++;
+	}
+	return false;
+}
 
-	for (n = 0; n < dev_list.targets_num; n++, dev_list.targets++) {
-		if ((AP_QID_DEVICE(dev_qid) == dev_list.targets->ap_id) &&
-		    (AP_QID_QUEUE(dev_qid) == dev_list.targets->dom_id)) {
+static bool is_desired_ep11_queue(unsigned int dev_qid,
+				  unsigned short target_num,
+				  struct ep11_target_dev *targets)
+{
+	while (target_num-- > 0) {
+		if (AP_MKQID(targets->ap_id, targets->dom_id) == dev_qid)
 			return true;
-		}
+		targets++;
 	}
 	return false;
 }
 
 static long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb)
 {
-	struct zcrypt_device *zdev, *pref_zdev = NULL;
+	struct zcrypt_card *zc, *pref_zc;
+	struct zcrypt_queue *zq, *pref_zq;
+	struct ep11_target_dev *targets;
+	unsigned short target_num;
+	unsigned int weight, pref_weight;
+	unsigned int func_code;
 	struct ap_message ap_msg;
-	unsigned int weight = 0, func_code = 0, pref_weight = 0;
-	bool autoselect = false;
 	int rc;
-	struct ep11_target_dev_list ep11_dev_list = {
-		.targets_num	=  0x00,
-		.targets	=  NULL,
-	};
 
-	ep11_dev_list.targets_num = (unsigned short) xcrb->targets_num;
+	target_num = (unsigned short) xcrb->targets_num;
 
 	/* empty list indicates autoselect (all available targets) */
-	if (ep11_dev_list.targets_num == 0)
-		autoselect = true;
-	else {
-		ep11_dev_list.targets = kcalloc((unsigned short)
-						xcrb->targets_num,
-						sizeof(struct ep11_target_dev),
-						GFP_KERNEL);
-		if (!ep11_dev_list.targets)
+	targets = NULL;
+	if (target_num != 0) {
+		struct ep11_target_dev __user *uptr;
+
+		targets = kcalloc(target_num, sizeof(*targets), GFP_KERNEL);
+		if (!targets)
 			return -ENOMEM;
 
-		if (copy_from_user(ep11_dev_list.targets,
-				   (struct ep11_target_dev __force __user *)
-				   xcrb->targets, xcrb->targets_num *
-				   sizeof(struct ep11_target_dev)))
+		uptr = (struct ep11_target_dev __force __user *) xcrb->targets;
+		if (copy_from_user(targets, uptr,
+				   target_num * sizeof(*targets)))
 			return -EFAULT;
 	}
 
 	rc = get_ep11cprb_fc(xcrb, &ap_msg, &func_code);
 	if (rc)
-		return rc;
+		goto out_free;
 
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list) {
-		/* check if device is eligible */
-		if (!zdev->online ||
-		    zdev->ops->variant != MSGTYPE06_VARIANT_EP11)
+	pref_zc = NULL;
+	pref_zq = NULL;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		/* Check for online EP11 cards */
+		if (!zc->online || !(zc->card->functions & 0x04000000))
 			continue;
-
-		/* check if device is selected as valid target */
-		if (!is_desired_ep11dev(zdev->ap_dev->qid, ep11_dev_list) &&
-		    !autoselect)
+		/* Check for user selected EP11 card */
+		if (targets &&
+		    !is_desired_ep11_card(zc->card->id, target_num, targets))
 			continue;
-
-		weight = speed_idx_ep11(func_code) * zdev->speed_rating[SECKEY];
-		if (!pref_zdev) {
-			pref_zdev = zdev;
-			pref_weight = weight;
+		/* get weight index of the card device	*/
+		weight = speed_idx_ep11(func_code) * zc->speed_rating[SECKEY];
+		if (pref_zc && atomic_read(&zc->load) + weight >=
+		    atomic_read(&pref_zc->load) + pref_weight)
 			continue;
-		}
-		if ((pref_zdev->load + pref_weight) > (zdev->load + weight)) {
-			pref_zdev = zdev;
+		for_each_zcrypt_queue(zq, zc) {
+			/* check if device is online and eligible */
+			if (!zq->online ||
+			    (targets &&
+			     !is_desired_ep11_queue(zq->queue->qid,
+						    target_num, targets)))
+				continue;
+			if (pref_zq && atomic_read(&zq->load) + weight >=
+			    atomic_read(&pref_zq->load) + pref_weight)
+				continue;
+			pref_zc = zc;
+			pref_zq = zq;
 			pref_weight = weight;
-			continue;
 		}
-		if ((pref_zdev->load + pref_weight) <= zdev->load)
-			break; /* Load on remaining devices too high - abort */
-	}
-	if (!pref_zdev) {
-		spin_unlock_bh(&zcrypt_device_lock);
-		return -ENODEV;
 	}
+	pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
 
-	zcrypt_device_get(pref_zdev);
-	get_device(&pref_zdev->ap_dev->device);
-	pref_zdev->request_count++;
-	if (try_module_get(pref_zdev->ap_dev->drv->driver.owner)) {
-		spin_unlock_bh(&zcrypt_device_lock);
-		rc = pref_zdev->ops->send_ep11_cprb(pref_zdev, xcrb, &ap_msg);
-		spin_lock_bh(&zcrypt_device_lock);
-		module_put(pref_zdev->ap_dev->drv->driver.owner);
-	} else {
-		rc = -EAGAIN;
+	if (!pref_zq) {
+		rc = -ENODEV;
+		goto out_free;
 	}
-	pref_zdev->request_count--;
-	put_device(&pref_zdev->ap_dev->device);
-	zcrypt_device_put(pref_zdev);
-	spin_unlock_bh(&zcrypt_device_lock);
+
+	rc = pref_zq->ops->send_ep11_cprb(pref_zq, xcrb, &ap_msg);
+
+	spin_lock(&zcrypt_list_lock);
+	zcrypt_drop_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
+
+out_free:
+	kfree(targets);
 	return rc;
 }
 
 static long zcrypt_rng(char *buffer)
 {
-	struct zcrypt_device *zdev, *pref_zdev = NULL;
+	struct zcrypt_card *zc, *pref_zc;
+	struct zcrypt_queue *zq, *pref_zq;
+	unsigned int weight, pref_weight;
+	unsigned int func_code;
 	struct ap_message ap_msg;
-	unsigned int weight = 0, func_code = 0, pref_weight = 0;
+	unsigned int domain;
 	int rc;
 
-	rc = get_rng_fc(&ap_msg, &func_code);
+	rc = get_rng_fc(&ap_msg, &func_code, &domain);
 	if (rc)
 		return rc;
 
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list) {
-		if (!zdev->online || !zdev->ops->rng)
+	pref_zc = NULL;
+	pref_zq = NULL;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		/* Check for online CCA cards */
+		if (!zc->online || !(zc->card->functions & 0x10000000))
 			continue;
-
-		weight = zdev->speed_rating[func_code];
-		if (!pref_zdev) {
-			pref_zdev = zdev;
-			pref_weight = weight;
+		/* get weight index of the card device	*/
+		weight = zc->speed_rating[func_code];
+		if (pref_zc && atomic_read(&zc->load) + weight >=
+		    atomic_read(&pref_zc->load) + pref_weight)
 			continue;
-		}
-		if ((pref_zdev->load + pref_weight) > (zdev->load + weight)) {
-			pref_zdev = zdev;
+		for_each_zcrypt_queue(zq, zc) {
+			/* check if device is online and eligible */
+			if (!zq->online)
+				continue;
+			if (pref_zq && atomic_read(&zq->load) + weight >=
+			    atomic_read(&pref_zq->load) + pref_weight)
+				continue;
+			pref_zc = zc;
+			pref_zq = zq;
 			pref_weight = weight;
-			continue;
 		}
-		if ((pref_zdev->load + pref_weight) <= zdev->load)
-			break; /* Load on remaining devices too high - abort */
 	}
-	if (!pref_zdev) {
-		spin_unlock_bh(&zcrypt_device_lock);
+	pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
+
+	if (!pref_zq)
 		return -ENODEV;
-	}
 
-	zcrypt_device_get(pref_zdev);
-	get_device(&pref_zdev->ap_dev->device);
-	pref_zdev->request_count++;
-	if (try_module_get(pref_zdev->ap_dev->drv->driver.owner)) {
-		spin_unlock_bh(&zcrypt_device_lock);
-		rc = pref_zdev->ops->rng(pref_zdev, buffer, &ap_msg);
-		spin_lock_bh(&zcrypt_device_lock);
-		module_put(pref_zdev->ap_dev->drv->driver.owner);
-	} else
-		rc = -EAGAIN;
-	pref_zdev->request_count--;
-	put_device(&pref_zdev->ap_dev->device);
-	zcrypt_device_put(pref_zdev);
-	spin_unlock_bh(&zcrypt_device_lock);
+	rc = pref_zq->ops->rng(pref_zq, buffer, &ap_msg);
+
+	spin_lock(&zcrypt_list_lock);
+	zcrypt_drop_queue(pref_zc, pref_zq, weight);
+	spin_unlock(&zcrypt_list_lock);
 	return rc;
 }
 
 static void zcrypt_status_mask(char status[AP_DEVICES])
 {
-	struct zcrypt_device *zdev;
+	struct zcrypt_card *zc;
+	struct zcrypt_queue *zq;
 
 	memset(status, 0, sizeof(char) * AP_DEVICES);
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list)
-		status[AP_QID_DEVICE(zdev->ap_dev->qid)] =
-			zdev->online ? zdev->user_space_type : 0x0d;
-	spin_unlock_bh(&zcrypt_device_lock);
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		for_each_zcrypt_queue(zq, zc) {
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+				continue;
+			status[AP_QID_CARD(zq->queue->qid)] =
+				zc->online ? zc->user_space_type : 0x0d;
+		}
+	}
+	spin_unlock(&zcrypt_list_lock);
 }
 
 static void zcrypt_qdepth_mask(char qdepth[AP_DEVICES])
 {
-	struct zcrypt_device *zdev;
+	struct zcrypt_card *zc;
+	struct zcrypt_queue *zq;
 
 	memset(qdepth, 0, sizeof(char)	* AP_DEVICES);
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list) {
-		spin_lock(&zdev->ap_dev->lock);
-		qdepth[AP_QID_DEVICE(zdev->ap_dev->qid)] =
-			zdev->ap_dev->pendingq_count +
-			zdev->ap_dev->requestq_count;
-		spin_unlock(&zdev->ap_dev->lock);
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		for_each_zcrypt_queue(zq, zc) {
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+				continue;
+			spin_lock(&zq->queue->lock);
+			qdepth[AP_QID_CARD(zq->queue->qid)] =
+				zq->queue->pendingq_count +
+				zq->queue->requestq_count;
+			spin_unlock(&zq->queue->lock);
+		}
 	}
-	spin_unlock_bh(&zcrypt_device_lock);
+	spin_unlock(&zcrypt_list_lock);
 }
 
 static void zcrypt_perdev_reqcnt(int reqcnt[AP_DEVICES])
 {
-	struct zcrypt_device *zdev;
+	struct zcrypt_card *zc;
+	struct zcrypt_queue *zq;
 
 	memset(reqcnt, 0, sizeof(int) * AP_DEVICES);
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list) {
-		spin_lock(&zdev->ap_dev->lock);
-		reqcnt[AP_QID_DEVICE(zdev->ap_dev->qid)] =
-			zdev->ap_dev->total_request_count;
-		spin_unlock(&zdev->ap_dev->lock);
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		for_each_zcrypt_queue(zq, zc) {
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+				continue;
+			spin_lock(&zq->queue->lock);
+			reqcnt[AP_QID_CARD(zq->queue->qid)] =
+				zq->queue->total_request_count;
+			spin_unlock(&zq->queue->lock);
+		}
 	}
-	spin_unlock_bh(&zcrypt_device_lock);
+	spin_unlock(&zcrypt_list_lock);
 }
 
 static int zcrypt_pendingq_count(void)
 {
-	struct zcrypt_device *zdev;
-	int pendingq_count = 0;
-
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list) {
-		spin_lock(&zdev->ap_dev->lock);
-		pendingq_count += zdev->ap_dev->pendingq_count;
-		spin_unlock(&zdev->ap_dev->lock);
+	struct zcrypt_card *zc;
+	struct zcrypt_queue *zq;
+	int pendingq_count;
+
+	pendingq_count = 0;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		for_each_zcrypt_queue(zq, zc) {
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+				continue;
+			spin_lock(&zq->queue->lock);
+			pendingq_count += zq->queue->pendingq_count;
+			spin_unlock(&zq->queue->lock);
+		}
 	}
-	spin_unlock_bh(&zcrypt_device_lock);
+	spin_unlock(&zcrypt_list_lock);
 	return pendingq_count;
 }
 
 static int zcrypt_requestq_count(void)
 {
-	struct zcrypt_device *zdev;
-	int requestq_count = 0;
-
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list) {
-		spin_lock(&zdev->ap_dev->lock);
-		requestq_count += zdev->ap_dev->requestq_count;
-		spin_unlock(&zdev->ap_dev->lock);
+	struct zcrypt_card *zc;
+	struct zcrypt_queue *zq;
+	int requestq_count;
+
+	requestq_count = 0;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		for_each_zcrypt_queue(zq, zc) {
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+				continue;
+			spin_lock(&zq->queue->lock);
+			requestq_count += zq->queue->requestq_count;
+			spin_unlock(&zq->queue->lock);
+		}
 	}
-	spin_unlock_bh(&zcrypt_device_lock);
+	spin_unlock(&zcrypt_list_lock);
 	return requestq_count;
 }
 
 static int zcrypt_count_type(int type)
 {
-	struct zcrypt_device *zdev;
-	int device_count = 0;
-
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list)
-		if (zdev->user_space_type == type)
+	struct zcrypt_card *zc;
+	struct zcrypt_queue *zq;
+	int device_count;
+
+	device_count = 0;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		if (zc->card->id != type)
+			continue;
+		for_each_zcrypt_queue(zq, zc) {
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+				continue;
 			device_count++;
-	spin_unlock_bh(&zcrypt_device_lock);
+		}
+	}
+	spin_unlock(&zcrypt_list_lock);
 	return device_count;
 }
 
@@ -1313,29 +1126,36 @@ static int zcrypt_proc_open(struct inode *inode, struct file *file)
 
 static void zcrypt_disable_card(int index)
 {
-	struct zcrypt_device *zdev;
+	struct zcrypt_card *zc;
+	struct zcrypt_queue *zq;
 
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list)
-		if (AP_QID_DEVICE(zdev->ap_dev->qid) == index) {
-			zdev->online = 0;
-			ap_flush_queue(zdev->ap_dev);
-			break;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		for_each_zcrypt_queue(zq, zc) {
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+				continue;
+			zq->online = 0;
+			ap_flush_queue(zq->queue);
 		}
-	spin_unlock_bh(&zcrypt_device_lock);
+	}
+	spin_unlock(&zcrypt_list_lock);
 }
 
 static void zcrypt_enable_card(int index)
 {
-	struct zcrypt_device *zdev;
+	struct zcrypt_card *zc;
+	struct zcrypt_queue *zq;
 
-	spin_lock_bh(&zcrypt_device_lock);
-	list_for_each_entry(zdev, &zcrypt_device_list, list)
-		if (AP_QID_DEVICE(zdev->ap_dev->qid) == index) {
-			zdev->online = 1;
-			break;
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		for_each_zcrypt_queue(zq, zc) {
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+				continue;
+			zq->online = 1;
+			ap_flush_queue(zq->queue);
 		}
-	spin_unlock_bh(&zcrypt_device_lock);
+	}
+	spin_unlock(&zcrypt_list_lock);
 }
 
 static ssize_t zcrypt_proc_write(struct file *file, const char __user *buffer,
@@ -1433,7 +1253,7 @@ static struct hwrng zcrypt_rng_dev = {
 	.quality	= 990,
 };
 
-static int zcrypt_rng_device_add(void)
+int zcrypt_rng_device_add(void)
 {
 	int rc = 0;
 
@@ -1463,7 +1283,7 @@ out:
 	return rc;
 }
 
-static void zcrypt_rng_device_remove(void)
+void zcrypt_rng_device_remove(void)
 {
 	mutex_lock(&zcrypt_rng_mutex);
 	zcrypt_rng_device_count--;
@@ -1486,6 +1306,10 @@ int __init zcrypt_debug_init(void)
 	debug_register_view(zcrypt_dbf_devices, &debug_hex_ascii_view);
 	debug_set_level(zcrypt_dbf_devices, DBF_ERR);
 
+	zcrypt_dbf_cards = debug_register("zcrypt_cards", 1, 1, 16);
+	debug_register_view(zcrypt_dbf_cards, &debug_hex_ascii_view);
+	debug_set_level(zcrypt_dbf_cards, DBF_ERR);
+
 	return 0;
 }
 
@@ -1517,7 +1341,8 @@ int __init zcrypt_api_init(void)
 		goto out;
 
 	/* Set up the proc file system */
-	zcrypt_entry = proc_create("driver/z90crypt", 0644, NULL, &zcrypt_proc_fops);
+	zcrypt_entry = proc_create("driver/z90crypt", 0644, NULL,
+				   &zcrypt_proc_fops);
 	if (!zcrypt_entry) {
 		rc = -ENOMEM;
 		goto out_misc;
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 3d0d1e25d751..4fb892b99f41 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -88,7 +88,7 @@ struct ica_z90_status {
  * Identifier for Crypto Request Performance Index
  */
 enum crypto_ops {
-	MEX_1K = 0,
+	MEX_1K,
 	MEX_2K,
 	MEX_4K,
 	CRT_1K,
@@ -99,43 +99,56 @@ enum crypto_ops {
 	NUM_OPS
 };
 
-struct zcrypt_device;
+struct zcrypt_queue;
 
 struct zcrypt_ops {
-	long (*rsa_modexpo)(struct zcrypt_device *, struct ica_rsa_modexpo *);
-	long (*rsa_modexpo_crt)(struct zcrypt_device *,
+	long (*rsa_modexpo)(struct zcrypt_queue *, struct ica_rsa_modexpo *);
+	long (*rsa_modexpo_crt)(struct zcrypt_queue *,
 				struct ica_rsa_modexpo_crt *);
-	long (*send_cprb)(struct zcrypt_device *, struct ica_xcRB *,
+	long (*send_cprb)(struct zcrypt_queue *, struct ica_xcRB *,
 			  struct ap_message *);
-	long (*send_ep11_cprb)(struct zcrypt_device *, struct ep11_urb *,
+	long (*send_ep11_cprb)(struct zcrypt_queue *, struct ep11_urb *,
 			       struct ap_message *);
-	long (*rng)(struct zcrypt_device *, char *, struct ap_message *);
+	long (*rng)(struct zcrypt_queue *, char *, struct ap_message *);
 	struct list_head list;		/* zcrypt ops list. */
 	struct module *owner;
 	int variant;
 	char name[128];
 };
 
-struct zcrypt_device {
+struct zcrypt_card {
 	struct list_head list;		/* Device list. */
-	spinlock_t lock;		/* Per device lock. */
+	struct list_head zqueues;	/* List of zcrypt queues */
 	struct kref refcount;		/* device refcounting */
-	struct ap_device *ap_dev;	/* The "real" ap device. */
-	struct zcrypt_ops *ops;		/* Crypto operations. */
+	struct ap_card *card;		/* The "real" ap card device. */
 	int online;			/* User online/offline */
 
 	int user_space_type;		/* User space device id. */
 	char *type_string;		/* User space device name. */
 	int min_mod_size;		/* Min number of bits. */
 	int max_mod_size;		/* Max number of bits. */
-	int short_crt;			/* Card has crt length restriction. */
+	int max_exp_bit_length;
 	int speed_rating[NUM_OPS];	/* Speed idx of crypto ops. */
-	int load;			/* Utilization of the crypto device */
+	atomic_t load;			/* Utilization of the crypto device */
+
+	int request_count;		/* # current requests. */
+
+	debug_info_t *dbf_area;		/* debugging */
+};
+
+struct zcrypt_queue {
+	struct list_head list;		/* Device list. */
+	struct kref refcount;		/* device refcounting */
+	struct zcrypt_card *zcard;
+	struct zcrypt_ops *ops;		/* Crypto operations. */
+	struct ap_queue *queue;		/* The "real" ap queue device. */
+	int online;			/* User online/offline */
+
+	atomic_t load;			/* Utilization of the crypto device */
 
 	int request_count;		/* # current requests. */
 
 	struct ap_message reply;	/* Per-device reply structure. */
-	int max_exp_bit_length;
 
 	debug_info_t *dbf_area;		/* debugging */
 };
@@ -143,12 +156,43 @@ struct zcrypt_device {
 /* transport layer rescanning */
 extern atomic_t zcrypt_rescan_req;
 
-struct zcrypt_device *zcrypt_device_alloc(size_t);
-void zcrypt_device_free(struct zcrypt_device *);
-void zcrypt_device_get(struct zcrypt_device *);
-int zcrypt_device_put(struct zcrypt_device *);
-int zcrypt_device_register(struct zcrypt_device *);
-void zcrypt_device_unregister(struct zcrypt_device *);
+extern spinlock_t zcrypt_list_lock;
+extern int zcrypt_device_count;
+extern struct list_head zcrypt_card_list;
+
+extern debug_info_t *zcrypt_dbf_common;
+extern debug_info_t *zcrypt_dbf_devices;
+extern debug_info_t *zcrypt_dbf_cards;
+
+#define for_each_zcrypt_card(_zc) \
+	list_for_each_entry(_zc, &zcrypt_card_list, list)
+
+#define for_each_zcrypt_queue(_zq, _zc) \
+	list_for_each_entry(_zq, &(_zc)->zqueues, list)
+
+struct zcrypt_card *zcrypt_card_alloc(void);
+void zcrypt_card_free(struct zcrypt_card *);
+void zcrypt_card_get(struct zcrypt_card *);
+int zcrypt_card_put(struct zcrypt_card *);
+int zcrypt_card_register(struct zcrypt_card *);
+void zcrypt_card_unregister(struct zcrypt_card *);
+struct zcrypt_card *zcrypt_card_get_best(unsigned int *,
+					 unsigned int, unsigned int);
+void zcrypt_card_put_best(struct zcrypt_card *, unsigned int);
+
+struct zcrypt_queue *zcrypt_queue_alloc(size_t);
+void zcrypt_queue_free(struct zcrypt_queue *);
+void zcrypt_queue_get(struct zcrypt_queue *);
+int zcrypt_queue_put(struct zcrypt_queue *);
+int zcrypt_queue_register(struct zcrypt_queue *);
+void zcrypt_queue_unregister(struct zcrypt_queue *);
+void zcrypt_queue_force_online(struct zcrypt_queue *, int);
+struct zcrypt_queue *zcrypt_queue_get_best(unsigned int, unsigned int);
+void  zcrypt_queue_put_best(struct zcrypt_queue *, unsigned int);
+
+int zcrypt_rng_device_add(void);
+void zcrypt_rng_device_remove(void);
+
 void zcrypt_msgtype_register(struct zcrypt_ops *);
 void zcrypt_msgtype_unregister(struct zcrypt_ops *);
 struct zcrypt_ops *zcrypt_msgtype(unsigned char *, int);
diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c
new file mode 100644
index 000000000000..57873d775e95
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_card.c
@@ -0,0 +1,181 @@
+/*
+ *  zcrypt 2.1.0
+ *
+ *  Copyright IBM Corp. 2001, 2012
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *	       Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *  MSGTYPE restruct:		  Holger Dengler <hd@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/compat.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/uaccess.h>
+#include <linux/hw_random.h>
+#include <linux/debugfs.h>
+#include <asm/debug.h>
+
+#include "zcrypt_debug.h"
+#include "zcrypt_api.h"
+
+#include "zcrypt_msgtype6.h"
+#include "zcrypt_msgtype50.h"
+
+/*
+ * Device attributes common for all crypto card devices.
+ */
+
+static ssize_t zcrypt_card_type_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct zcrypt_card *zc = to_ap_card(dev)->private;
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", zc->type_string);
+}
+
+static DEVICE_ATTR(type, 0444, zcrypt_card_type_show, NULL);
+
+static ssize_t zcrypt_card_online_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct zcrypt_card *zc = to_ap_card(dev)->private;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", zc->online);
+}
+
+static ssize_t zcrypt_card_online_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct zcrypt_card *zc = to_ap_card(dev)->private;
+	struct zcrypt_queue *zq;
+	int online, id;
+
+	if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
+		return -EINVAL;
+
+	zc->online = online;
+	id = zc->card->id;
+	ZCRYPT_DBF_DEV(DBF_INFO, zc, "card%02xo%dman", id, online);
+	spin_lock(&zcrypt_list_lock);
+	list_for_each_entry(zq, &zc->zqueues, list)
+		zcrypt_queue_force_online(zq, online);
+	spin_unlock(&zcrypt_list_lock);
+	return count;
+}
+
+static DEVICE_ATTR(online, 0644, zcrypt_card_online_show,
+		   zcrypt_card_online_store);
+
+static struct attribute *zcrypt_card_attrs[] = {
+	&dev_attr_type.attr,
+	&dev_attr_online.attr,
+	NULL,
+};
+
+static struct attribute_group zcrypt_card_attr_group = {
+	.attrs = zcrypt_card_attrs,
+};
+
+struct zcrypt_card *zcrypt_card_alloc(void)
+{
+	struct zcrypt_card *zc;
+
+	zc = kzalloc(sizeof(struct zcrypt_card), GFP_KERNEL);
+	if (!zc)
+		return NULL;
+	INIT_LIST_HEAD(&zc->list);
+	INIT_LIST_HEAD(&zc->zqueues);
+	zc->dbf_area = zcrypt_dbf_cards;
+	kref_init(&zc->refcount);
+	return zc;
+}
+EXPORT_SYMBOL(zcrypt_card_alloc);
+
+void zcrypt_card_free(struct zcrypt_card *zc)
+{
+	kfree(zc);
+}
+EXPORT_SYMBOL(zcrypt_card_free);
+
+static void zcrypt_card_release(struct kref *kref)
+{
+	struct zcrypt_card *zdev =
+		container_of(kref, struct zcrypt_card, refcount);
+	zcrypt_card_free(zdev);
+}
+
+void zcrypt_card_get(struct zcrypt_card *zc)
+{
+	kref_get(&zc->refcount);
+}
+EXPORT_SYMBOL(zcrypt_card_get);
+
+int zcrypt_card_put(struct zcrypt_card *zc)
+{
+	return kref_put(&zc->refcount, zcrypt_card_release);
+}
+EXPORT_SYMBOL(zcrypt_card_put);
+
+/**
+ * zcrypt_card_register() - Register a crypto card device.
+ * @zc: Pointer to a crypto card device
+ *
+ * Register a crypto card device. Returns 0 if successful.
+ */
+int zcrypt_card_register(struct zcrypt_card *zc)
+{
+	int rc;
+
+	rc = sysfs_create_group(&zc->card->ap_dev.device.kobj,
+				&zcrypt_card_attr_group);
+	if (rc)
+		return rc;
+
+	spin_lock(&zcrypt_list_lock);
+	list_add_tail(&zc->list, &zcrypt_card_list);
+	spin_unlock(&zcrypt_list_lock);
+
+	zc->online = 1;
+	return rc;
+}
+EXPORT_SYMBOL(zcrypt_card_register);
+
+/**
+ * zcrypt_card_unregister(): Unregister a crypto card device.
+ * @zc: Pointer to crypto card device
+ *
+ * Unregister a crypto card device.
+ */
+void zcrypt_card_unregister(struct zcrypt_card *zc)
+{
+	spin_lock(&zcrypt_list_lock);
+	list_del_init(&zc->list);
+	spin_unlock(&zcrypt_list_lock);
+	sysfs_remove_group(&zc->card->ap_dev.device.kobj,
+			   &zcrypt_card_attr_group);
+}
+EXPORT_SYMBOL(zcrypt_card_unregister);
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
index 4bb13eadd0f1..c7d48a18199e 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.c
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -31,6 +31,7 @@
 #include <linux/err.h>
 #include <linux/atomic.h>
 #include <asm/uaccess.h>
+#include <linux/mod_devicetable.h>
 
 #include "ap_bus.h"
 #include "zcrypt_api.h"
@@ -54,108 +55,195 @@
 #define CEX2A_CLEANUP_TIME	(15*HZ)
 #define CEX3A_CLEANUP_TIME	CEX2A_CLEANUP_TIME
 
-static struct ap_device_id zcrypt_cex2a_ids[] = {
-	{ AP_DEVICE(AP_DEVICE_TYPE_CEX2A) },
-	{ AP_DEVICE(AP_DEVICE_TYPE_CEX3A) },
-	{ /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_ids);
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("CEX2A Cryptographic Coprocessor device driver, " \
 		   "Copyright IBM Corp. 2001, 2012");
 MODULE_LICENSE("GPL");
 
-static int zcrypt_cex2a_probe(struct ap_device *ap_dev);
-static void zcrypt_cex2a_remove(struct ap_device *ap_dev);
+static struct ap_device_id zcrypt_cex2a_card_ids[] = {
+	{ .dev_type = AP_DEVICE_TYPE_CEX2A,
+	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX3A,
+	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
+	{ /* end of list */ },
+};
+
+MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_card_ids);
 
-static struct ap_driver zcrypt_cex2a_driver = {
-	.probe = zcrypt_cex2a_probe,
-	.remove = zcrypt_cex2a_remove,
-	.ids = zcrypt_cex2a_ids,
-	.request_timeout = CEX2A_CLEANUP_TIME,
+static struct ap_device_id zcrypt_cex2a_queue_ids[] = {
+	{ .dev_type = AP_DEVICE_TYPE_CEX2A,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX3A,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ /* end of list */ },
 };
 
+MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_queue_ids);
+
 /**
- * Probe function for CEX2A cards. It always accepts the AP device
- * since the bus_match already checked the hardware type.
+ * Probe function for CEX2A card devices. It always accepts the AP device
+ * since the bus_match already checked the card type.
  * @ap_dev: pointer to the AP device.
  */
-static int zcrypt_cex2a_probe(struct ap_device *ap_dev)
+static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
 {
-	struct zcrypt_device *zdev = NULL;
-	int CEX2A_SPEED_IDX[] = { 800, 1000, 2000,  900, 1200, 2400, 0};
-	int CEX3A_SPEED_IDX[] = { 400,	500, 1000,  450,  550, 1200, 0};
+	/*
+	 * Normalized speed ratings per crypto adapter
+	 * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
+	 */
+	static const int CEX2A_SPEED_IDX[] = {
+		800, 1000, 2000,  900, 1200, 2400, 0, 0};
+	static const int CEX3A_SPEED_IDX[] = {
+		400,  500, 1000,  450,	550, 1200, 0, 0};
+
+	struct ap_card *ac = to_ap_card(&ap_dev->device);
+	struct zcrypt_card *zc;
 	int rc = 0;
 
+	zc = zcrypt_card_alloc();
+	if (!zc)
+		return -ENOMEM;
+	zc->card = ac;
+	ac->private = zc;
+
+	if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) {
+		zc->min_mod_size = CEX2A_MIN_MOD_SIZE;
+		zc->max_mod_size = CEX2A_MAX_MOD_SIZE;
+		memcpy(zc->speed_rating, CEX2A_SPEED_IDX,
+		       sizeof(CEX2A_SPEED_IDX));
+		zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE;
+		zc->type_string = "CEX2A";
+		zc->user_space_type = ZCRYPT_CEX2A;
+	} else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX3A) {
+		zc->min_mod_size = CEX2A_MIN_MOD_SIZE;
+		zc->max_mod_size = CEX2A_MAX_MOD_SIZE;
+		zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE;
+		if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) &&
+		    ap_test_bit(&ac->functions, AP_FUNC_CRT4K)) {
+			zc->max_mod_size = CEX3A_MAX_MOD_SIZE;
+			zc->max_exp_bit_length = CEX3A_MAX_MOD_SIZE;
+		}
+		memcpy(zc->speed_rating, CEX3A_SPEED_IDX,
+		       sizeof(CEX3A_SPEED_IDX));
+		zc->type_string = "CEX3A";
+		zc->user_space_type = ZCRYPT_CEX3A;
+	} else {
+		zcrypt_card_free(zc);
+		return -ENODEV;
+	}
+	zc->online = 1;
+
+	rc = zcrypt_card_register(zc);
+	if (rc) {
+		ac->private = NULL;
+		zcrypt_card_free(zc);
+	}
+
+	return rc;
+}
+
+/**
+ * This is called to remove the CEX2A card driver information
+ * if an AP card device is removed.
+ */
+static void zcrypt_cex2a_card_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_card *zc = to_ap_card(&ap_dev->device)->private;
+
+	if (zc)
+		zcrypt_card_unregister(zc);
+}
+
+static struct ap_driver zcrypt_cex2a_card_driver = {
+	.probe = zcrypt_cex2a_card_probe,
+	.remove = zcrypt_cex2a_card_remove,
+	.ids = zcrypt_cex2a_card_ids,
+};
+
+/**
+ * Probe function for CEX2A queue devices. It always accepts the AP device
+ * since the bus_match already checked the queue type.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev)
+{
+	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
+	struct zcrypt_queue *zq = NULL;
+	int rc;
+
 	switch (ap_dev->device_type) {
 	case AP_DEVICE_TYPE_CEX2A:
-		zdev = zcrypt_device_alloc(CEX2A_MAX_RESPONSE_SIZE);
-		if (!zdev)
+		zq = zcrypt_queue_alloc(CEX2A_MAX_RESPONSE_SIZE);
+		if (!zq)
 			return -ENOMEM;
-		zdev->user_space_type = ZCRYPT_CEX2A;
-		zdev->type_string = "CEX2A";
-		zdev->min_mod_size = CEX2A_MIN_MOD_SIZE;
-		zdev->max_mod_size = CEX2A_MAX_MOD_SIZE;
-		zdev->short_crt = 1;
-		memcpy(zdev->speed_rating, CEX2A_SPEED_IDX,
-		       sizeof(CEX2A_SPEED_IDX));
-		zdev->max_exp_bit_length = CEX2A_MAX_MOD_SIZE;
 		break;
 	case AP_DEVICE_TYPE_CEX3A:
-		zdev = zcrypt_device_alloc(CEX3A_MAX_RESPONSE_SIZE);
-		if (!zdev)
+		zq = zcrypt_queue_alloc(CEX3A_MAX_RESPONSE_SIZE);
+		if (!zq)
 			return -ENOMEM;
-		zdev->user_space_type = ZCRYPT_CEX3A;
-		zdev->type_string = "CEX3A";
-		zdev->min_mod_size = CEX2A_MIN_MOD_SIZE;
-		zdev->max_mod_size = CEX2A_MAX_MOD_SIZE;
-		zdev->max_exp_bit_length = CEX2A_MAX_MOD_SIZE;
-		if (ap_test_bit(&ap_dev->functions, AP_FUNC_MEX4K) &&
-		    ap_test_bit(&ap_dev->functions, AP_FUNC_CRT4K)) {
-			zdev->max_mod_size = CEX3A_MAX_MOD_SIZE;
-			zdev->max_exp_bit_length = CEX3A_MAX_MOD_SIZE;
-		}
-		zdev->short_crt = 1;
-		memcpy(zdev->speed_rating, CEX3A_SPEED_IDX,
-		       sizeof(CEX3A_SPEED_IDX));
 		break;
 	}
-	if (!zdev)
+	if (!zq)
 		return -ENODEV;
-	zdev->ops = zcrypt_msgtype(MSGTYPE50_NAME, MSGTYPE50_VARIANT_DEFAULT);
-	zdev->ap_dev = ap_dev;
-	zdev->online = 1;
-	zdev->load = zdev->speed_rating[0];
-	ap_device_init_reply(ap_dev, &zdev->reply);
-	ap_dev->private = zdev;
-	rc = zcrypt_device_register(zdev);
+	zq->ops = zcrypt_msgtype(MSGTYPE50_NAME, MSGTYPE50_VARIANT_DEFAULT);
+	zq->queue = aq;
+	zq->online = 1;
+	atomic_set(&zq->load, 0);
+	ap_queue_init_reply(aq, &zq->reply);
+	aq->request_timeout = CEX2A_CLEANUP_TIME,
+	aq->private = zq;
+	rc = zcrypt_queue_register(zq);
 	if (rc) {
-		ap_dev->private = NULL;
-		zcrypt_device_free(zdev);
+		aq->private = NULL;
+		zcrypt_queue_free(zq);
 	}
+
 	return rc;
 }
 
 /**
- * This is called to remove the extended CEX2A driver information
- * if an AP device is removed.
+ * This is called to remove the CEX2A queue driver information
+ * if an AP queue device is removed.
  */
-static void zcrypt_cex2a_remove(struct ap_device *ap_dev)
+static void zcrypt_cex2a_queue_remove(struct ap_device *ap_dev)
 {
-	struct zcrypt_device *zdev = ap_dev->private;
+	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
+	struct zcrypt_queue *zq = aq->private;
 
-	zcrypt_device_unregister(zdev);
+	ap_queue_remove(aq);
+	if (zq)
+		zcrypt_queue_unregister(zq);
 }
 
+static struct ap_driver zcrypt_cex2a_queue_driver = {
+	.probe = zcrypt_cex2a_queue_probe,
+	.remove = zcrypt_cex2a_queue_remove,
+	.suspend = ap_queue_suspend,
+	.resume = ap_queue_resume,
+	.ids = zcrypt_cex2a_queue_ids,
+};
+
 int __init zcrypt_cex2a_init(void)
 {
-	return ap_driver_register(&zcrypt_cex2a_driver, THIS_MODULE, "cex2a");
+	int rc;
+
+	rc = ap_driver_register(&zcrypt_cex2a_card_driver,
+				THIS_MODULE, "cex2acard");
+	if (rc)
+		return rc;
+
+	rc = ap_driver_register(&zcrypt_cex2a_queue_driver,
+				THIS_MODULE, "cex2aqueue");
+	if (rc)
+		ap_driver_unregister(&zcrypt_cex2a_card_driver);
+
+	return rc;
 }
 
 void __exit zcrypt_cex2a_exit(void)
 {
-	ap_driver_unregister(&zcrypt_cex2a_driver);
+	ap_driver_unregister(&zcrypt_cex2a_queue_driver);
+	ap_driver_unregister(&zcrypt_cex2a_card_driver);
 }
 
 module_init(zcrypt_cex2a_init);
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index ff28ad543c30..4e91163d70a6 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -9,6 +9,7 @@
 #include <linux/err.h>
 #include <linux/atomic.h>
 #include <linux/uaccess.h>
+#include <linux/mod_devicetable.h>
 
 #include "ap_bus.h"
 #include "zcrypt_api.h"
@@ -34,160 +35,246 @@
  */
 #define CEX4_CLEANUP_TIME	(900*HZ)
 
-static struct ap_device_id zcrypt_cex4_ids[] = {
-	{ AP_DEVICE(AP_DEVICE_TYPE_CEX4)  },
-	{ AP_DEVICE(AP_DEVICE_TYPE_CEX5)  },
-	{ /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex4_ids);
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("CEX4 Cryptographic Card device driver, " \
 		   "Copyright IBM Corp. 2012");
 MODULE_LICENSE("GPL");
 
-static int zcrypt_cex4_probe(struct ap_device *ap_dev);
-static void zcrypt_cex4_remove(struct ap_device *ap_dev);
+static struct ap_device_id zcrypt_cex4_card_ids[] = {
+	{ .dev_type = AP_DEVICE_TYPE_CEX4,
+	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX5,
+	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
+	{ /* end of list */ },
+};
+
+MODULE_DEVICE_TABLE(ap, zcrypt_cex4_card_ids);
 
-static struct ap_driver zcrypt_cex4_driver = {
-	.probe = zcrypt_cex4_probe,
-	.remove = zcrypt_cex4_remove,
-	.ids = zcrypt_cex4_ids,
-	.request_timeout = CEX4_CLEANUP_TIME,
+static struct ap_device_id zcrypt_cex4_queue_ids[] = {
+	{ .dev_type = AP_DEVICE_TYPE_CEX4,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX5,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ /* end of list */ },
 };
 
+MODULE_DEVICE_TABLE(ap, zcrypt_cex4_queue_ids);
+
 /**
- * Probe function for CEX4 cards. It always accepts the AP device
+ * Probe function for CEX4 card device. It always accepts the AP device
  * since the bus_match already checked the hardware type.
  * @ap_dev: pointer to the AP device.
  */
-static int zcrypt_cex4_probe(struct ap_device *ap_dev)
+static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
 {
-	struct zcrypt_device *zdev = NULL;
 	/*
 	 * Normalized speed ratings per crypto adapter
 	 * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
 	 */
-	int CEX4A_SPEED_IDX[] = {  5,  6,    59,  20, 115,  581,  0,  0};
-	int CEX5A_SPEED_IDX[] = {  3,  3,     6,   8,  32,  218,  0,  0};
-	int CEX4C_SPEED_IDX[] = { 24,  25,   82,  41, 138, 1111, 79,  8};
-	int CEX5C_SPEED_IDX[] = { 10,  14,   23,  17,  45,  242, 63,  4};
-	int CEX4P_SPEED_IDX[] = {142, 198, 1852, 203, 331, 1563,  0,  8};
-	int CEX5P_SPEED_IDX[] = { 49,  67,  131,  52,  85,  287,  0,  4};
+	static const int CEX4A_SPEED_IDX[] = {
+		5,  6,	  59,  20, 115,  581,  0,  0};
+	static const int CEX5A_SPEED_IDX[] = {
+		3,  3,	   6,	8,  32,  218,  0,  0};
+	static const int CEX4C_SPEED_IDX[] = {
+		24,  25,   82,	41, 138, 1111, 79,  8};
+	static const int CEX5C_SPEED_IDX[] = {
+		10,  14,   23,	17,  45,  242, 63,  4};
+	static const int CEX4P_SPEED_IDX[] = {
+		142, 198, 1852, 203, 331, 1563,  0,  8};
+	static const int CEX5P_SPEED_IDX[] = {
+		49,  67,  131,	52,  85,  287,	0,  4};
+
+	struct ap_card *ac = to_ap_card(&ap_dev->device);
+	struct zcrypt_card *zc;
 	int rc = 0;
 
-	switch (ap_dev->device_type) {
-	case AP_DEVICE_TYPE_CEX4:
-	case AP_DEVICE_TYPE_CEX5:
-		if (ap_test_bit(&ap_dev->functions, AP_FUNC_ACCEL)) {
-			zdev = zcrypt_device_alloc(CEX4A_MAX_MESSAGE_SIZE);
-			if (!zdev)
-				return -ENOMEM;
-			if (ap_dev->device_type == AP_DEVICE_TYPE_CEX4) {
-				zdev->type_string = "CEX4A";
-				memcpy(zdev->speed_rating, CEX4A_SPEED_IDX,
-				       sizeof(CEX4A_SPEED_IDX));
-			} else {
-				zdev->type_string = "CEX5A";
-				memcpy(zdev->speed_rating, CEX5A_SPEED_IDX,
-				       sizeof(CEX5A_SPEED_IDX));
-			}
-			zdev->user_space_type = ZCRYPT_CEX3A;
-			zdev->min_mod_size = CEX4A_MIN_MOD_SIZE;
-			if (ap_test_bit(&ap_dev->functions, AP_FUNC_MEX4K) &&
-			    ap_test_bit(&ap_dev->functions, AP_FUNC_CRT4K)) {
-				zdev->max_mod_size =
-					CEX4A_MAX_MOD_SIZE_4K;
-				zdev->max_exp_bit_length =
-					CEX4A_MAX_MOD_SIZE_4K;
-			} else {
-				zdev->max_mod_size =
-					CEX4A_MAX_MOD_SIZE_2K;
-				zdev->max_exp_bit_length =
-					CEX4A_MAX_MOD_SIZE_2K;
-			}
-			zdev->short_crt = 1;
-			zdev->ops = zcrypt_msgtype(MSGTYPE50_NAME,
-						   MSGTYPE50_VARIANT_DEFAULT);
-		} else if (ap_test_bit(&ap_dev->functions, AP_FUNC_COPRO)) {
-			zdev = zcrypt_device_alloc(CEX4C_MAX_MESSAGE_SIZE);
-			if (!zdev)
-				return -ENOMEM;
-			if (ap_dev->device_type == AP_DEVICE_TYPE_CEX4) {
-				zdev->type_string = "CEX4C";
-				memcpy(zdev->speed_rating, CEX4C_SPEED_IDX,
-				       sizeof(CEX4C_SPEED_IDX));
-			} else {
-				zdev->type_string = "CEX5C";
-				memcpy(zdev->speed_rating, CEX5C_SPEED_IDX,
-				       sizeof(CEX5C_SPEED_IDX));
-			}
-			zdev->user_space_type = ZCRYPT_CEX3C;
-			zdev->min_mod_size = CEX4C_MIN_MOD_SIZE;
-			zdev->max_mod_size = CEX4C_MAX_MOD_SIZE;
-			zdev->max_exp_bit_length = CEX4C_MAX_MOD_SIZE;
-			zdev->short_crt = 0;
-			zdev->ops = zcrypt_msgtype(MSGTYPE06_NAME,
-						   MSGTYPE06_VARIANT_DEFAULT);
-		} else if (ap_test_bit(&ap_dev->functions, AP_FUNC_EP11)) {
-			zdev = zcrypt_device_alloc(CEX4C_MAX_MESSAGE_SIZE);
-			if (!zdev)
-				return -ENOMEM;
-			if (ap_dev->device_type == AP_DEVICE_TYPE_CEX4) {
-				zdev->type_string = "CEX4P";
-				memcpy(zdev->speed_rating, CEX4P_SPEED_IDX,
-				       sizeof(CEX4P_SPEED_IDX));
-			} else {
-				zdev->type_string = "CEX5P";
-				memcpy(zdev->speed_rating, CEX5P_SPEED_IDX,
-				       sizeof(CEX5P_SPEED_IDX));
-			}
-			zdev->user_space_type = ZCRYPT_CEX4;
-			zdev->min_mod_size = CEX4C_MIN_MOD_SIZE;
-			zdev->max_mod_size = CEX4C_MAX_MOD_SIZE;
-			zdev->max_exp_bit_length = CEX4C_MAX_MOD_SIZE;
-			zdev->short_crt = 0;
-			zdev->ops = zcrypt_msgtype(MSGTYPE06_NAME,
-						   MSGTYPE06_VARIANT_EP11);
+	zc = zcrypt_card_alloc();
+	if (!zc)
+		return -ENOMEM;
+	zc->card = ac;
+	ac->private = zc;
+	if (ap_test_bit(&ac->functions, AP_FUNC_ACCEL)) {
+		if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) {
+			zc->type_string = "CEX4A";
+			zc->user_space_type = ZCRYPT_CEX4;
+			memcpy(zc->speed_rating, CEX4A_SPEED_IDX,
+			       sizeof(CEX4A_SPEED_IDX));
+		} else {
+			zc->type_string = "CEX5A";
+			zc->user_space_type = ZCRYPT_CEX5;
+			memcpy(zc->speed_rating, CEX5A_SPEED_IDX,
+			       sizeof(CEX5A_SPEED_IDX));
 		}
-		break;
-	}
-	if (!zdev)
+		zc->min_mod_size = CEX4A_MIN_MOD_SIZE;
+		if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) &&
+		    ap_test_bit(&ac->functions, AP_FUNC_CRT4K)) {
+			zc->max_mod_size = CEX4A_MAX_MOD_SIZE_4K;
+			zc->max_exp_bit_length =
+				CEX4A_MAX_MOD_SIZE_4K;
+		} else {
+			zc->max_mod_size = CEX4A_MAX_MOD_SIZE_2K;
+			zc->max_exp_bit_length =
+				CEX4A_MAX_MOD_SIZE_2K;
+		}
+	} else if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) {
+		if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) {
+			zc->type_string = "CEX4C";
+			/* wrong user space type, must be CEX4
+			 * just keep it for cca compatibility
+			 */
+			zc->user_space_type = ZCRYPT_CEX3C;
+			memcpy(zc->speed_rating, CEX4C_SPEED_IDX,
+			       sizeof(CEX4C_SPEED_IDX));
+		} else {
+			zc->type_string = "CEX5C";
+			/* wrong user space type, must be CEX5
+			 * just keep it for cca compatibility
+			 */
+			zc->user_space_type = ZCRYPT_CEX3C;
+			memcpy(zc->speed_rating, CEX5C_SPEED_IDX,
+			       sizeof(CEX5C_SPEED_IDX));
+		}
+		zc->min_mod_size = CEX4C_MIN_MOD_SIZE;
+		zc->max_mod_size = CEX4C_MAX_MOD_SIZE;
+		zc->max_exp_bit_length = CEX4C_MAX_MOD_SIZE;
+	} else if (ap_test_bit(&ac->functions, AP_FUNC_EP11)) {
+		if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) {
+			zc->type_string = "CEX4P";
+			zc->user_space_type = ZCRYPT_CEX4;
+			memcpy(zc->speed_rating, CEX4P_SPEED_IDX,
+			       sizeof(CEX4P_SPEED_IDX));
+		} else {
+			zc->type_string = "CEX5P";
+			zc->user_space_type = ZCRYPT_CEX5;
+			memcpy(zc->speed_rating, CEX5P_SPEED_IDX,
+			       sizeof(CEX5P_SPEED_IDX));
+		}
+		zc->min_mod_size = CEX4C_MIN_MOD_SIZE;
+		zc->max_mod_size = CEX4C_MAX_MOD_SIZE;
+		zc->max_exp_bit_length = CEX4C_MAX_MOD_SIZE;
+	} else {
+		zcrypt_card_free(zc);
 		return -ENODEV;
-	zdev->ap_dev = ap_dev;
-	zdev->online = 1;
-	zdev->load = zdev->speed_rating[0];
-	ap_device_init_reply(ap_dev, &zdev->reply);
-	ap_dev->private = zdev;
-	rc = zcrypt_device_register(zdev);
+	}
+	zc->online = 1;
+
+	rc = zcrypt_card_register(zc);
 	if (rc) {
-		ap_dev->private = NULL;
-		zcrypt_device_free(zdev);
+		ac->private = NULL;
+		zcrypt_card_free(zc);
 	}
+
 	return rc;
 }
 
 /**
- * This is called to remove the extended CEX4 driver information
- * if an AP device is removed.
+ * This is called to remove the CEX4 card driver information
+ * if an AP card device is removed.
  */
-static void zcrypt_cex4_remove(struct ap_device *ap_dev)
+static void zcrypt_cex4_card_remove(struct ap_device *ap_dev)
 {
-	struct zcrypt_device *zdev = ap_dev->private;
+	struct zcrypt_card *zc = to_ap_card(&ap_dev->device)->private;
 
-	if (zdev) {
-		zcrypt_device_unregister(zdev);
+	if (zc)
+		zcrypt_card_unregister(zc);
+}
+
+static struct ap_driver zcrypt_cex4_card_driver = {
+	.probe = zcrypt_cex4_card_probe,
+	.remove = zcrypt_cex4_card_remove,
+	.ids = zcrypt_cex4_card_ids,
+};
+
+/**
+ * Probe function for CEX4 queue device. It always accepts the AP device
+ * since the bus_match already checked the hardware type.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev)
+{
+	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
+	struct zcrypt_queue *zq;
+	int rc;
+
+	if (ap_test_bit(&aq->card->functions, AP_FUNC_ACCEL)) {
+		zq = zcrypt_queue_alloc(CEX4A_MAX_MESSAGE_SIZE);
+		if (!zq)
+			return -ENOMEM;
+		zq->ops = zcrypt_msgtype(MSGTYPE50_NAME,
+					 MSGTYPE50_VARIANT_DEFAULT);
+	} else if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) {
+		zq = zcrypt_queue_alloc(CEX4C_MAX_MESSAGE_SIZE);
+		if (!zq)
+			return -ENOMEM;
+		zq->ops = zcrypt_msgtype(MSGTYPE06_NAME,
+					 MSGTYPE06_VARIANT_DEFAULT);
+	} else if (ap_test_bit(&aq->card->functions, AP_FUNC_EP11)) {
+		zq = zcrypt_queue_alloc(CEX4C_MAX_MESSAGE_SIZE);
+		if (!zq)
+			return -ENOMEM;
+		zq->ops = zcrypt_msgtype(MSGTYPE06_NAME,
+					 MSGTYPE06_VARIANT_EP11);
+	} else {
+		return -ENODEV;
 	}
+	zq->queue = aq;
+	zq->online = 1;
+	atomic_set(&zq->load, 0);
+	ap_queue_init_reply(aq, &zq->reply);
+	aq->request_timeout = CEX4_CLEANUP_TIME,
+	aq->private = zq;
+	rc = zcrypt_queue_register(zq);
+	if (rc) {
+		aq->private = NULL;
+		zcrypt_queue_free(zq);
+	}
+
+	return rc;
 }
 
+/**
+ * This is called to remove the CEX4 queue driver information
+ * if an AP queue device is removed.
+ */
+static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev)
+{
+	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
+	struct zcrypt_queue *zq = aq->private;
+
+	ap_queue_remove(aq);
+	if (zq)
+		zcrypt_queue_unregister(zq);
+}
+
+static struct ap_driver zcrypt_cex4_queue_driver = {
+	.probe = zcrypt_cex4_queue_probe,
+	.remove = zcrypt_cex4_queue_remove,
+	.suspend = ap_queue_suspend,
+	.resume = ap_queue_resume,
+	.ids = zcrypt_cex4_queue_ids,
+};
+
 int __init zcrypt_cex4_init(void)
 {
-	return ap_driver_register(&zcrypt_cex4_driver, THIS_MODULE, "cex4");
+	int rc;
+
+	rc = ap_driver_register(&zcrypt_cex4_card_driver,
+				THIS_MODULE, "cex4card");
+	if (rc)
+		return rc;
+
+	rc = ap_driver_register(&zcrypt_cex4_queue_driver,
+				THIS_MODULE, "cex4queue");
+	if (rc)
+		ap_driver_unregister(&zcrypt_cex4_card_driver);
+
+	return rc;
 }
 
 void __exit zcrypt_cex4_exit(void)
 {
-	ap_driver_unregister(&zcrypt_cex4_driver);
+	ap_driver_unregister(&zcrypt_cex4_queue_driver);
+	ap_driver_unregister(&zcrypt_cex4_card_driver);
 }
 
 module_init(zcrypt_cex4_init);
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index de1b6c1d172c..09247331e9fb 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -87,7 +87,7 @@ struct error_hdr {
 #define REP88_ERROR_OPERAND	     0x84	/* CEX2A	*/
 #define REP88_ERROR_OPERAND_EVEN_MOD 0x85	/* CEX2A	*/
 
-static inline int convert_error(struct zcrypt_device *zdev,
+static inline int convert_error(struct zcrypt_queue *zq,
 				struct ap_message *reply)
 {
 	struct error_hdr *ehdr = reply->message;
@@ -110,11 +110,13 @@ static inline int convert_error(struct zcrypt_device *zdev,
 		 * and then repeat the request.
 		 */
 		atomic_set(&zcrypt_rescan_req, 1);
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%drc%d",
+			AP_QID_CARD(zq->queue->qid),
+			AP_QID_QUEUE(zq->queue->qid), zq->online,
 			ehdr->reply_code);
 		return -EAGAIN;
 	case REP82_ERROR_TRANSPORT_FAIL:
@@ -122,19 +124,23 @@ static inline int convert_error(struct zcrypt_device *zdev,
 	//   REP88_ERROR_MODULE_FAILURE		// '10' CEX2A
 		/* If a card fails disable it and repeat the request. */
 		atomic_set(&zcrypt_rescan_req, 1);
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%drc%d",
+			AP_QID_CARD(zq->queue->qid),
+			AP_QID_QUEUE(zq->queue->qid), zq->online,
 			ehdr->reply_code);
 		return -EAGAIN;
 	default:
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%drc%d",
+			AP_QID_CARD(zq->queue->qid),
+			AP_QID_QUEUE(zq->queue->qid), zq->online,
 			ehdr->reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index fb97479af3f8..a9873b436115 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -53,9 +53,6 @@ MODULE_DESCRIPTION("Cryptographic Accelerator (message type 50), " \
 		   "Copyright IBM Corp. 2001, 2012");
 MODULE_LICENSE("GPL");
 
-static void zcrypt_cex2a_receive(struct ap_device *, struct ap_message *,
-				 struct ap_message *);
-
 /**
  * The type 50 message family is associated with a CEX2A card.
  *
@@ -208,13 +205,13 @@ unsigned int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode)
 /**
  * Convert a ICAMEX message to a type50 MEX message.
  *
- * @zdev: crypto device pointer
- * @zreq: crypto request pointer
+ * @zq: crypto queue pointer
+ * @ap_msg: crypto request pointer
  * @mex: pointer to user input data
  *
  * Returns 0 on success or -EFAULT.
  */
-static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_device *zdev,
+static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq,
 				       struct ap_message *ap_msg,
 				       struct ica_rsa_modexpo *mex)
 {
@@ -266,13 +263,13 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_device *zdev,
 /**
  * Convert a ICACRT message to a type50 CRT message.
  *
- * @zdev: crypto device pointer
- * @zreq: crypto request pointer
+ * @zq: crypto queue pointer
+ * @ap_msg: crypto request pointer
  * @crt: pointer to user input data
  *
  * Returns 0 on success or -EFAULT.
  */
-static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_device *zdev,
+static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq,
 				       struct ap_message *ap_msg,
 				       struct ica_rsa_modexpo_crt *crt)
 {
@@ -315,7 +312,7 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_device *zdev,
 		u = crb2->u + sizeof(crb2->u) - short_len;
 		inp = crb2->message + sizeof(crb2->message) - mod_len;
 	} else if ((mod_len <= 512) &&	/* up to 4096 bit key size */
-		   (zdev->max_mod_size == CEX3A_MAX_MOD_SIZE)) { /* >= CEX3A */
+		   (zq->zcard->max_mod_size == CEX3A_MAX_MOD_SIZE)) {
 		struct type50_crb3_msg *crb3 = ap_msg->message;
 		memset(crb3, 0, sizeof(*crb3));
 		ap_msg->length = sizeof(*crb3);
@@ -349,14 +346,14 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_device *zdev,
 /**
  * Copy results from a type 80 reply message back to user space.
  *
- * @zdev: crypto device pointer
+ * @zq: crypto device pointer
  * @reply: reply AP message.
  * @data: pointer to user output data
  * @length: size of user output data
  *
  * Returns 0 on success or -EFAULT.
  */
-static int convert_type80(struct zcrypt_device *zdev,
+static int convert_type80(struct zcrypt_queue *zq,
 			  struct ap_message *reply,
 			  char __user *outputdata,
 			  unsigned int outputdatalength)
@@ -366,16 +363,18 @@ static int convert_type80(struct zcrypt_device *zdev,
 
 	if (t80h->len < sizeof(*t80h) + outputdatalength) {
 		/* The result is too short, the CEX2A card may not do that.. */
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       AP_QID_DEVICE(zdev->ap_dev->qid),
-			       zdev->online, t80h->code);
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%drc%d",
+			       AP_QID_CARD(zq->queue->qid),
+			       AP_QID_QUEUE(zq->queue->qid),
+			       zq->online, t80h->code);
 
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
-	if (zdev->user_space_type == ZCRYPT_CEX2A)
+	if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
 		BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE);
 	else
 		BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE);
@@ -385,7 +384,7 @@ static int convert_type80(struct zcrypt_device *zdev,
 	return 0;
 }
 
-static int convert_response(struct zcrypt_device *zdev,
+static int convert_response(struct zcrypt_queue *zq,
 			    struct ap_message *reply,
 			    char __user *outputdata,
 			    unsigned int outputdatalength)
@@ -394,16 +393,19 @@ static int convert_response(struct zcrypt_device *zdev,
 	switch (((unsigned char *) reply->message)[1]) {
 	case TYPE82_RSP_CODE:
 	case TYPE88_RSP_CODE:
-		return convert_error(zdev, reply);
+		return convert_error(zq, reply);
 	case TYPE80_RSP_CODE:
-		return convert_type80(zdev, reply,
+		return convert_type80(zq, reply,
 				      outputdata, outputdatalength);
 	default: /* Unknown response type, this should NEVER EVER happen */
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%dfail",
+			       AP_QID_CARD(zq->queue->qid),
+			       AP_QID_QUEUE(zq->queue->qid),
+			       zq->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -412,11 +414,11 @@ static int convert_response(struct zcrypt_device *zdev,
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
  * It is called from tasklet context.
- * @ap_dev: pointer to the AP device
+ * @aq: pointer to the AP device
  * @msg: pointer to the AP message
  * @reply: pointer to the AP reply message
  */
-static void zcrypt_cex2a_receive(struct ap_device *ap_dev,
+static void zcrypt_cex2a_receive(struct ap_queue *aq,
 				 struct ap_message *msg,
 				 struct ap_message *reply)
 {
@@ -432,7 +434,7 @@ static void zcrypt_cex2a_receive(struct ap_device *ap_dev,
 		goto out;	/* ap_msg->rc indicates the error */
 	t80h = reply->message;
 	if (t80h->type == TYPE80_RSP_CODE) {
-		if (ap_dev->device_type == AP_DEVICE_TYPE_CEX2A)
+		if (aq->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A)
 			length = min_t(int,
 				       CEX2A_MAX_RESPONSE_SIZE, t80h->len);
 		else
@@ -450,11 +452,11 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0);
 /**
  * The request distributor calls this function if it picked the CEX2A
  * device to handle a modexpo request.
- * @zdev: pointer to zcrypt_device structure that identifies the
+ * @zq: pointer to zcrypt_queue structure that identifies the
  *	  CEX2A device to the request distributor
  * @mex: pointer to the modexpo request buffer
  */
-static long zcrypt_cex2a_modexpo(struct zcrypt_device *zdev,
+static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
 				 struct ica_rsa_modexpo *mex)
 {
 	struct ap_message ap_msg;
@@ -462,7 +464,7 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_device *zdev,
 	int rc;
 
 	ap_init_message(&ap_msg);
-	if (zdev->user_space_type == ZCRYPT_CEX2A)
+	if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
 		ap_msg.message = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE,
 					 GFP_KERNEL);
 	else
@@ -474,20 +476,20 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_device *zdev,
 	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
 				atomic_inc_return(&zcrypt_step);
 	ap_msg.private = &work;
-	rc = ICAMEX_msg_to_type50MEX_msg(zdev, &ap_msg, mex);
+	rc = ICAMEX_msg_to_type50MEX_msg(zq, &ap_msg, mex);
 	if (rc)
 		goto out_free;
 	init_completion(&work);
-	ap_queue_message(zdev->ap_dev, &ap_msg);
+	ap_queue_message(zq->queue, &ap_msg);
 	rc = wait_for_completion_interruptible(&work);
 	if (rc == 0) {
 		rc = ap_msg.rc;
 		if (rc == 0)
-			rc = convert_response(zdev, &ap_msg, mex->outputdata,
+			rc = convert_response(zq, &ap_msg, mex->outputdata,
 					      mex->outputdatalength);
 	} else
 		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		ap_cancel_message(zq->queue, &ap_msg);
 out_free:
 	kfree(ap_msg.message);
 	return rc;
@@ -496,11 +498,11 @@ out_free:
 /**
  * The request distributor calls this function if it picked the CEX2A
  * device to handle a modexpo_crt request.
- * @zdev: pointer to zcrypt_device structure that identifies the
+ * @zq: pointer to zcrypt_queue structure that identifies the
  *	  CEX2A device to the request distributor
  * @crt: pointer to the modexpoc_crt request buffer
  */
-static long zcrypt_cex2a_modexpo_crt(struct zcrypt_device *zdev,
+static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
 				     struct ica_rsa_modexpo_crt *crt)
 {
 	struct ap_message ap_msg;
@@ -508,7 +510,7 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_device *zdev,
 	int rc;
 
 	ap_init_message(&ap_msg);
-	if (zdev->user_space_type == ZCRYPT_CEX2A)
+	if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
 		ap_msg.message = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE,
 					 GFP_KERNEL);
 	else
@@ -520,20 +522,20 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_device *zdev,
 	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
 				atomic_inc_return(&zcrypt_step);
 	ap_msg.private = &work;
-	rc = ICACRT_msg_to_type50CRT_msg(zdev, &ap_msg, crt);
+	rc = ICACRT_msg_to_type50CRT_msg(zq, &ap_msg, crt);
 	if (rc)
 		goto out_free;
 	init_completion(&work);
-	ap_queue_message(zdev->ap_dev, &ap_msg);
+	ap_queue_message(zq->queue, &ap_msg);
 	rc = wait_for_completion_interruptible(&work);
 	if (rc == 0) {
 		rc = ap_msg.rc;
 		if (rc == 0)
-			rc = convert_response(zdev, &ap_msg, crt->outputdata,
+			rc = convert_response(zq, &ap_msg, crt->outputdata,
 					      crt->outputdatalength);
 	} else
 		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		ap_cancel_message(zq->queue, &ap_msg);
 out_free:
 	kfree(ap_msg.message);
 	return rc;
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 957a88d5768b..c6cfb9acec19 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -60,9 +60,6 @@ MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \
 		   "Copyright IBM Corp. 2001, 2012");
 MODULE_LICENSE("GPL");
 
-static void zcrypt_msgtype6_receive(struct ap_device *, struct ap_message *,
-				 struct ap_message *);
-
 /**
  * CPRB
  *	  Note that all shorts, ints and longs are little-endian.
@@ -258,13 +255,13 @@ int speed_idx_ep11(int req_type)
 /**
  * Convert a ICAMEX message to a type6 MEX message.
  *
- * @zdev: crypto device pointer
+ * @zq: crypto device pointer
  * @ap_msg: pointer to AP message
  * @mex: pointer to user input data
  *
  * Returns 0 on success or -EFAULT.
  */
-static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_device *zdev,
+static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq,
 				       struct ap_message *ap_msg,
 				       struct ica_rsa_modexpo *mex)
 {
@@ -279,11 +276,6 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_device *zdev,
 		.ulen		= 10,
 		.only_rule	= {'M', 'R', 'P', ' ', ' ', ' ', ' ', ' '}
 	};
-	static struct function_and_rules_block static_pke_fnr_MCL2 = {
-		.function_code	= {'P', 'K'},
-		.ulen		= 10,
-		.only_rule	= {'Z', 'E', 'R', 'O', '-', 'P', 'A', 'D'}
-	};
 	struct {
 		struct type6_hdr hdr;
 		struct CPRBX cprbx;
@@ -310,11 +302,10 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_device *zdev,
 	msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
 
 	msg->cprbx = static_cprbx;
-	msg->cprbx.domain = AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid);
 	msg->cprbx.rpl_msgbl = msg->hdr.FromCardLen1;
 
-	msg->fr = (zdev->user_space_type == ZCRYPT_PCIXCC_MCL2) ?
-		static_pke_fnr_MCL2 : static_pke_fnr;
+	msg->fr = static_pke_fnr;
 
 	msg->cprbx.req_parml = size - sizeof(msg->hdr) - sizeof(msg->cprbx);
 
@@ -325,13 +316,13 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_device *zdev,
 /**
  * Convert a ICACRT message to a type6 CRT message.
  *
- * @zdev: crypto device pointer
+ * @zq: crypto device pointer
  * @ap_msg: pointer to AP message
  * @crt: pointer to user input data
  *
  * Returns 0 on success or -EFAULT.
  */
-static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_device *zdev,
+static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq,
 				       struct ap_message *ap_msg,
 				       struct ica_rsa_modexpo_crt *crt)
 {
@@ -347,11 +338,6 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_device *zdev,
 		.only_rule	= {'Z', 'E', 'R', 'O', '-', 'P', 'A', 'D'}
 	};
 
-	static struct function_and_rules_block static_pkd_fnr_MCL2 = {
-		.function_code	= {'P', 'D'},
-		.ulen		= 10,
-		.only_rule	= {'P', 'K', 'C', 'S', '-', '1', '.', '2'}
-	};
 	struct {
 		struct type6_hdr hdr;
 		struct CPRBX cprbx;
@@ -378,12 +364,11 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_device *zdev,
 	msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
 
 	msg->cprbx = static_cprbx;
-	msg->cprbx.domain = AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid);
 	msg->cprbx.req_parml = msg->cprbx.rpl_msgbl =
 		size - sizeof(msg->hdr) - sizeof(msg->cprbx);
 
-	msg->fr = (zdev->user_space_type == ZCRYPT_PCIXCC_MCL2) ?
-		static_pkd_fnr_MCL2 : static_pkd_fnr;
+	msg->fr = static_pkd_fnr;
 
 	ap_msg->length = size;
 	return 0;
@@ -392,7 +377,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_device *zdev,
 /**
  * Convert a XCRB message to a type6 CPRB message.
  *
- * @zdev: crypto device pointer
+ * @zq: crypto device pointer
  * @ap_msg: pointer to AP message
  * @xcRB: pointer to user input data
  *
@@ -405,7 +390,8 @@ struct type86_fmt2_msg {
 
 static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg,
 				      struct ica_xcRB *xcRB,
-				      unsigned int *fcode)
+				      unsigned int *fcode,
+				      unsigned short **dom)
 {
 	static struct type6_hdr static_type6_hdrX = {
 		.type		=  0x06,
@@ -486,6 +472,7 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg,
 	       sizeof(msg->hdr.function_code));
 
 	*fcode = (msg->hdr.function_code[0] << 8) | msg->hdr.function_code[1];
+	*dom = (unsigned short *)&msg->cprbx.domain;
 
 	if (memcmp(function_code, "US", 2) == 0)
 		ap_msg->special = 1;
@@ -497,6 +484,7 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg,
 	    copy_from_user(req_data, xcRB->request_data_address,
 		xcRB->request_data_length))
 		return -EFAULT;
+
 	return 0;
 }
 
@@ -504,6 +492,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
 				       struct ep11_urb *xcRB,
 				       unsigned int *fcode)
 {
+	unsigned int lfmt;
 	static struct type6_hdr static_type6_ep11_hdr = {
 		.type		=  0x06,
 		.rqid		= {0x00, 0x01},
@@ -556,14 +545,30 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
 		return -EFAULT;
 	}
 
-	*fcode = speed_idx_ep11(payload_hdr->func_val & 0xFFFF);
+	if ((msg->pld_lenfmt & 0x80) == 0x80) { /*ext.len.fmt 2 or 3*/
+		switch (msg->pld_lenfmt & 0x03) {
+		case 1:
+			lfmt = 2;
+			break;
+		case 2:
+			lfmt = 3;
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else {
+		lfmt = 1; /* length format #1 */
+	}
+	payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt);
+	*fcode = payload_hdr->func_val & 0xFFFF;
+
 	return 0;
 }
 
 /**
  * Copy results from a type 86 ICA reply message back to user space.
  *
- * @zdev: crypto device pointer
+ * @zq: crypto device pointer
  * @reply: reply AP message.
  * @data: pointer to user output data
  * @length: size of user output data
@@ -585,7 +590,7 @@ struct type86_ep11_reply {
 	struct ep11_cprb cprbx;
 } __packed;
 
-static int convert_type86_ica(struct zcrypt_device *zdev,
+static int convert_type86_ica(struct zcrypt_queue *zq,
 			  struct ap_message *reply,
 			  char __user *outputdata,
 			  unsigned int outputdatalength)
@@ -640,18 +645,22 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
 		if (service_rc == 8 && service_rs == 770)
 			return -EINVAL;
 		if (service_rc == 8 && service_rs == 783) {
-			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD;
+			zq->zcard->min_mod_size =
+				PCIXCC_MIN_MOD_SIZE_OLD;
 			return -EAGAIN;
 		}
 		if (service_rc == 12 && service_rs == 769)
 			return -EINVAL;
 		if (service_rc == 8 && service_rs == 72)
 			return -EINVAL;
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%drc%d",
+			       AP_QID_CARD(zq->queue->qid),
+			       AP_QID_QUEUE(zq->queue->qid),
+			       zq->online,
 			       msg->hdr.reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
@@ -688,13 +697,13 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
 /**
  * Copy results from a type 86 XCRB reply message back to user space.
  *
- * @zdev: crypto device pointer
+ * @zq: crypto device pointer
  * @reply: reply AP message.
  * @xcRB: pointer to XCRB
  *
  * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
  */
-static int convert_type86_xcrb(struct zcrypt_device *zdev,
+static int convert_type86_xcrb(struct zcrypt_queue *zq,
 			       struct ap_message *reply,
 			       struct ica_xcRB *xcRB)
 {
@@ -719,13 +728,13 @@ static int convert_type86_xcrb(struct zcrypt_device *zdev,
 /**
  * Copy results from a type 86 EP11 XCRB reply message back to user space.
  *
- * @zdev: crypto device pointer
+ * @zq: crypto device pointer
  * @reply: reply AP message.
  * @xcRB: pointer to EP11 user request block
  *
  * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
  */
-static int convert_type86_ep11_xcrb(struct zcrypt_device *zdev,
+static int convert_type86_ep11_xcrb(struct zcrypt_queue *zq,
 				    struct ap_message *reply,
 				    struct ep11_urb *xcRB)
 {
@@ -743,7 +752,7 @@ static int convert_type86_ep11_xcrb(struct zcrypt_device *zdev,
 	return 0;
 }
 
-static int convert_type86_rng(struct zcrypt_device *zdev,
+static int convert_type86_rng(struct zcrypt_queue *zq,
 			  struct ap_message *reply,
 			  char *buffer)
 {
@@ -760,7 +769,7 @@ static int convert_type86_rng(struct zcrypt_device *zdev,
 	return msg->fmt2.count2;
 }
 
-static int convert_response_ica(struct zcrypt_device *zdev,
+static int convert_response_ica(struct zcrypt_queue *zq,
 			    struct ap_message *reply,
 			    char __user *outputdata,
 			    unsigned int outputdatalength)
@@ -771,35 +780,38 @@ static int convert_response_ica(struct zcrypt_device *zdev,
 	switch (((unsigned char *) reply->message)[1]) {
 	case TYPE82_RSP_CODE:
 	case TYPE88_RSP_CODE:
-		return convert_error(zdev, reply);
+		return convert_error(zq, reply);
 	case TYPE86_RSP_CODE:
 		if (msg->cprbx.ccp_rtcode &&
 		   (msg->cprbx.ccp_rscode == 0x14f) &&
 		   (outputdatalength > 256)) {
-			if (zdev->max_exp_bit_length <= 17) {
-				zdev->max_exp_bit_length = 17;
+			if (zq->zcard->max_exp_bit_length <= 17) {
+				zq->zcard->max_exp_bit_length = 17;
 				return -EAGAIN;
 			} else
 				return -EINVAL;
 		}
 		if (msg->hdr.reply_code)
-			return convert_error(zdev, reply);
+			return convert_error(zq, reply);
 		if (msg->cprbx.cprb_ver_id == 0x02)
-			return convert_type86_ica(zdev, reply,
+			return convert_type86_ica(zq, reply,
 						  outputdata, outputdatalength);
 		/* Fall through, no break, incorrect cprb version is an unknown
 		 * response */
 	default: /* Unknown response type, this should NEVER EVER happen */
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%dfail",
+			       AP_QID_CARD(zq->queue->qid),
+			       AP_QID_QUEUE(zq->queue->qid),
+			       zq->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
 
-static int convert_response_xcrb(struct zcrypt_device *zdev,
+static int convert_response_xcrb(struct zcrypt_queue *zq,
 			    struct ap_message *reply,
 			    struct ica_xcRB *xcRB)
 {
@@ -810,28 +822,31 @@ static int convert_response_xcrb(struct zcrypt_device *zdev,
 	case TYPE82_RSP_CODE:
 	case TYPE88_RSP_CODE:
 		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
-		return convert_error(zdev, reply);
+		return convert_error(zq, reply);
 	case TYPE86_RSP_CODE:
 		if (msg->hdr.reply_code) {
 			memcpy(&(xcRB->status), msg->fmt2.apfs, sizeof(u32));
-			return convert_error(zdev, reply);
+			return convert_error(zq, reply);
 		}
 		if (msg->cprbx.cprb_ver_id == 0x02)
-			return convert_type86_xcrb(zdev, reply, xcRB);
+			return convert_type86_xcrb(zq, reply, xcRB);
 		/* Fall through, no break, incorrect cprb version is an unknown
 		 * response */
 	default: /* Unknown response type, this should NEVER EVER happen */
 		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%dfail",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid),
+			       zq->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
 
-static int convert_response_ep11_xcrb(struct zcrypt_device *zdev,
+static int convert_response_ep11_xcrb(struct zcrypt_queue *zq,
 	struct ap_message *reply, struct ep11_urb *xcRB)
 {
 	struct type86_ep11_reply *msg = reply->message;
@@ -840,24 +855,27 @@ static int convert_response_ep11_xcrb(struct zcrypt_device *zdev,
 	switch (((unsigned char *)reply->message)[1]) {
 	case TYPE82_RSP_CODE:
 	case TYPE87_RSP_CODE:
-		return convert_error(zdev, reply);
+		return convert_error(zq, reply);
 	case TYPE86_RSP_CODE:
 		if (msg->hdr.reply_code)
-			return convert_error(zdev, reply);
+			return convert_error(zq, reply);
 		if (msg->cprbx.cprb_ver_id == 0x04)
-			return convert_type86_ep11_xcrb(zdev, reply, xcRB);
+			return convert_type86_ep11_xcrb(zq, reply, xcRB);
 	/* Fall through, no break, incorrect cprb version is an unknown resp.*/
 	default: /* Unknown response type, this should NEVER EVER happen */
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%dfail",
+			       AP_QID_CARD(zq->queue->qid),
+			       AP_QID_QUEUE(zq->queue->qid),
+			       zq->online);
 		return -EAGAIN; /* repeat the request on a different device. */
 	}
 }
 
-static int convert_response_rng(struct zcrypt_device *zdev,
+static int convert_response_rng(struct zcrypt_queue *zq,
 				 struct ap_message *reply,
 				 char *data)
 {
@@ -871,15 +889,18 @@ static int convert_response_rng(struct zcrypt_device *zdev,
 		if (msg->hdr.reply_code)
 			return -EINVAL;
 		if (msg->cprbx.cprb_ver_id == 0x02)
-			return convert_type86_rng(zdev, reply, data);
+			return convert_type86_rng(zq, reply, data);
 		/* Fall through, no break, incorrect cprb version is an unknown
 		 * response */
 	default: /* Unknown response type, this should NEVER EVER happen */
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       AP_QID_DEVICE(zdev->ap_dev->qid));
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
+		zq->online = 0;
+		pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid));
+		ZCRYPT_DBF_DEV(DBF_ERR, zq, "dev%02x%04xo%dfail",
+			       AP_QID_CARD(zq->queue->qid),
+			       AP_QID_QUEUE(zq->queue->qid),
+			       zq->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -888,11 +909,11 @@ static int convert_response_rng(struct zcrypt_device *zdev,
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
  * It is called from tasklet context.
- * @ap_dev: pointer to the AP device
+ * @aq: pointer to the AP queue
  * @msg: pointer to the AP message
  * @reply: pointer to the AP reply message
  */
-static void zcrypt_msgtype6_receive(struct ap_device *ap_dev,
+static void zcrypt_msgtype6_receive(struct ap_queue *aq,
 				  struct ap_message *msg,
 				  struct ap_message *reply)
 {
@@ -937,11 +958,11 @@ out:
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
  * It is called from tasklet context.
- * @ap_dev: pointer to the AP device
+ * @aq: pointer to the AP queue
  * @msg: pointer to the AP message
  * @reply: pointer to the AP reply message
  */
-static void zcrypt_msgtype6_receive_ep11(struct ap_device *ap_dev,
+static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq,
 					 struct ap_message *msg,
 					 struct ap_message *reply)
 {
@@ -981,11 +1002,11 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0);
 /**
  * The request distributor calls this function if it picked the PCIXCC/CEX2C
  * device to handle a modexpo request.
- * @zdev: pointer to zcrypt_device structure that identifies the
+ * @zq: pointer to zcrypt_queue structure that identifies the
  *	  PCIXCC/CEX2C device to the request distributor
  * @mex: pointer to the modexpo request buffer
  */
-static long zcrypt_msgtype6_modexpo(struct zcrypt_device *zdev,
+static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq,
 				  struct ica_rsa_modexpo *mex)
 {
 	struct ap_message ap_msg;
@@ -1002,21 +1023,21 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_device *zdev,
 	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
 				atomic_inc_return(&zcrypt_step);
 	ap_msg.private = &resp_type;
-	rc = ICAMEX_msg_to_type6MEX_msgX(zdev, &ap_msg, mex);
+	rc = ICAMEX_msg_to_type6MEX_msgX(zq, &ap_msg, mex);
 	if (rc)
 		goto out_free;
 	init_completion(&resp_type.work);
-	ap_queue_message(zdev->ap_dev, &ap_msg);
+	ap_queue_message(zq->queue, &ap_msg);
 	rc = wait_for_completion_interruptible(&resp_type.work);
 	if (rc == 0) {
 		rc = ap_msg.rc;
 		if (rc == 0)
-			rc = convert_response_ica(zdev, &ap_msg,
+			rc = convert_response_ica(zq, &ap_msg,
 						  mex->outputdata,
 						  mex->outputdatalength);
 	} else
 		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		ap_cancel_message(zq->queue, &ap_msg);
 out_free:
 	free_page((unsigned long) ap_msg.message);
 	return rc;
@@ -1025,11 +1046,11 @@ out_free:
 /**
  * The request distributor calls this function if it picked the PCIXCC/CEX2C
  * device to handle a modexpo_crt request.
- * @zdev: pointer to zcrypt_device structure that identifies the
+ * @zq: pointer to zcrypt_queue structure that identifies the
  *	  PCIXCC/CEX2C device to the request distributor
  * @crt: pointer to the modexpoc_crt request buffer
  */
-static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_device *zdev,
+static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq,
 				      struct ica_rsa_modexpo_crt *crt)
 {
 	struct ap_message ap_msg;
@@ -1046,21 +1067,22 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_device *zdev,
 	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
 				atomic_inc_return(&zcrypt_step);
 	ap_msg.private = &resp_type;
-	rc = ICACRT_msg_to_type6CRT_msgX(zdev, &ap_msg, crt);
+	rc = ICACRT_msg_to_type6CRT_msgX(zq, &ap_msg, crt);
 	if (rc)
 		goto out_free;
 	init_completion(&resp_type.work);
-	ap_queue_message(zdev->ap_dev, &ap_msg);
+	ap_queue_message(zq->queue, &ap_msg);
 	rc = wait_for_completion_interruptible(&resp_type.work);
 	if (rc == 0) {
 		rc = ap_msg.rc;
 		if (rc == 0)
-			rc = convert_response_ica(zdev, &ap_msg,
+			rc = convert_response_ica(zq, &ap_msg,
 						  crt->outputdata,
 						  crt->outputdatalength);
-	} else
+	} else {
 		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		ap_cancel_message(zq->queue, &ap_msg);
+	}
 out_free:
 	free_page((unsigned long) ap_msg.message);
 	return rc;
@@ -1068,7 +1090,7 @@ out_free:
 
 unsigned int get_cprb_fc(struct ica_xcRB *xcRB,
 				struct ap_message *ap_msg,
-				int *func_code)
+				unsigned int *func_code, unsigned short **dom)
 {
 	struct response_type resp_type = {
 		.type = PCIXCC_RESPONSE_TYPE_XCRB,
@@ -1088,7 +1110,7 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB,
 		return -ENOMEM;
 	}
 	memcpy(ap_msg->private, &resp_type, sizeof(resp_type));
-	rc = XCRB_msg_to_type6CPRB_msgX(ap_msg, xcRB, func_code);
+	rc = XCRB_msg_to_type6CPRB_msgX(ap_msg, xcRB, func_code, dom);
 	if (rc) {
 		kzfree(ap_msg->message);
 		kzfree(ap_msg->private);
@@ -1099,11 +1121,11 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB,
 /**
  * The request distributor calls this function if it picked the PCIXCC/CEX2C
  * device to handle a send_cprb request.
- * @zdev: pointer to zcrypt_device structure that identifies the
+ * @zq: pointer to zcrypt_queue structure that identifies the
  *	  PCIXCC/CEX2C device to the request distributor
  * @xcRB: pointer to the send_cprb request buffer
  */
-static long zcrypt_msgtype6_send_cprb(struct zcrypt_device *zdev,
+static long zcrypt_msgtype6_send_cprb(struct zcrypt_queue *zq,
 				    struct ica_xcRB *xcRB,
 				    struct ap_message *ap_msg)
 {
@@ -1111,15 +1133,15 @@ static long zcrypt_msgtype6_send_cprb(struct zcrypt_device *zdev,
 	struct response_type *rtype = (struct response_type *)(ap_msg->private);
 
 	init_completion(&rtype->work);
-	ap_queue_message(zdev->ap_dev, ap_msg);
+	ap_queue_message(zq->queue, ap_msg);
 	rc = wait_for_completion_interruptible(&rtype->work);
 	if (rc == 0) {
 		rc = ap_msg->rc;
 		if (rc == 0)
-			rc = convert_response_xcrb(zdev, ap_msg, xcRB);
+			rc = convert_response_xcrb(zq, ap_msg, xcRB);
 	} else
 		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, ap_msg);
+		ap_cancel_message(zq->queue, ap_msg);
 
 	kzfree(ap_msg->message);
 	kzfree(ap_msg->private);
@@ -1128,7 +1150,7 @@ static long zcrypt_msgtype6_send_cprb(struct zcrypt_device *zdev,
 
 unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb,
 				    struct ap_message *ap_msg,
-				    int *func_code)
+				    unsigned int *func_code)
 {
 	struct response_type resp_type = {
 		.type = PCIXCC_RESPONSE_TYPE_EP11,
@@ -1159,11 +1181,11 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb,
 /**
  * The request distributor calls this function if it picked the CEX4P
  * device to handle a send_ep11_cprb request.
- * @zdev: pointer to zcrypt_device structure that identifies the
+ * @zq: pointer to zcrypt_queue structure that identifies the
  *	  CEX4P device to the request distributor
  * @xcRB: pointer to the ep11 user request block
  */
-static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_device *zdev,
+static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_queue *zq,
 					   struct ep11_urb *xcrb,
 					   struct ap_message *ap_msg)
 {
@@ -1196,7 +1218,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_device *zdev,
 	 */
 	if (!((msg->cprbx.flags & 0x80) == 0x80)) {
 		msg->cprbx.target_id = (unsigned int)
-					AP_QID_QUEUE(zdev->ap_dev->qid);
+					AP_QID_QUEUE(zq->queue->qid);
 
 		if ((msg->pld_lenfmt & 0x80) == 0x80) { /*ext.len.fmt 2 or 3*/
 			switch (msg->pld_lenfmt & 0x03) {
@@ -1214,26 +1236,27 @@ static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_device *zdev,
 		}
 		payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt);
 		payload_hdr->dom_val = (unsigned int)
-					AP_QID_QUEUE(zdev->ap_dev->qid);
+					AP_QID_QUEUE(zq->queue->qid);
 	}
 
 	init_completion(&rtype->work);
-	ap_queue_message(zdev->ap_dev, ap_msg);
+	ap_queue_message(zq->queue, ap_msg);
 	rc = wait_for_completion_interruptible(&rtype->work);
 	if (rc == 0) {
 		rc = ap_msg->rc;
 		if (rc == 0)
-			rc = convert_response_ep11_xcrb(zdev, ap_msg, xcrb);
+			rc = convert_response_ep11_xcrb(zq, ap_msg, xcrb);
 	} else
 		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, ap_msg);
+		ap_cancel_message(zq->queue, ap_msg);
 
 	kzfree(ap_msg->message);
 	kzfree(ap_msg->private);
 	return rc;
 }
 
-unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code)
+unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code,
+						   unsigned int *domain)
 {
 	struct response_type resp_type = {
 		.type = PCIXCC_RESPONSE_TYPE_XCRB,
@@ -1253,7 +1276,7 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code)
 	}
 	memcpy(ap_msg->private, &resp_type, sizeof(resp_type));
 
-	rng_type6CPRB_msgX(ap_msg, ZCRYPT_RNG_BUFFER_SIZE);
+	rng_type6CPRB_msgX(ap_msg, ZCRYPT_RNG_BUFFER_SIZE, domain);
 
 	*func_code = HWRNG;
 	return 0;
@@ -1262,11 +1285,11 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code)
 /**
  * The request distributor calls this function if it picked the PCIXCC/CEX2C
  * device to generate random data.
- * @zdev: pointer to zcrypt_device structure that identifies the
+ * @zq: pointer to zcrypt_queue structure that identifies the
  *	  PCIXCC/CEX2C device to the request distributor
  * @buffer: pointer to a memory page to return random data
  */
-static long zcrypt_msgtype6_rng(struct zcrypt_device *zdev,
+static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq,
 				char *buffer, struct ap_message *ap_msg)
 {
 	struct {
@@ -1281,18 +1304,18 @@ static long zcrypt_msgtype6_rng(struct zcrypt_device *zdev,
 	struct response_type *rtype = (struct response_type *)(ap_msg->private);
 	int rc;
 
-	msg->cprbx.domain = AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid);
 
 	init_completion(&rtype->work);
-	ap_queue_message(zdev->ap_dev, ap_msg);
+	ap_queue_message(zq->queue, ap_msg);
 	rc = wait_for_completion_interruptible(&rtype->work);
 	if (rc == 0) {
 		rc = ap_msg->rc;
 		if (rc == 0)
-			rc = convert_response_rng(zdev, ap_msg, buffer);
+			rc = convert_response_rng(zq, ap_msg, buffer);
 	} else
 		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, ap_msg);
+		ap_cancel_message(zq->queue, ap_msg);
 
 	kzfree(ap_msg->message);
 	kzfree(ap_msg->private);
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h
index a360dbe3c7d2..7a0d5b57821f 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.h
+++ b/drivers/s390/crypto/zcrypt_msgtype6.h
@@ -116,9 +116,11 @@ struct type86_fmt2_ext {
 	unsigned int	  offset4;	/* 0x00000000			*/
 } __packed;
 
-unsigned int get_cprb_fc(struct ica_xcRB *, struct ap_message *, int *);
-unsigned int get_ep11cprb_fc(struct ep11_urb *, struct ap_message *, int *);
-unsigned int get_rng_fc(struct ap_message *, int *);
+unsigned int get_cprb_fc(struct ica_xcRB *, struct ap_message *,
+			 unsigned int *, unsigned short **);
+unsigned int get_ep11cprb_fc(struct ep11_urb *, struct ap_message *,
+			     unsigned int *);
+unsigned int get_rng_fc(struct ap_message *, int *, unsigned int *);
 
 #define LOW	10
 #define MEDIUM	100
@@ -134,7 +136,8 @@ int speed_idx_ep11(int);
  * @ap_msg: pointer to AP message
  */
 static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg,
-				      unsigned int random_number_length)
+				      unsigned int random_number_length,
+				      unsigned int *domain)
 {
 	struct {
 		struct type6_hdr hdr;
@@ -172,6 +175,7 @@ static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg,
 	msg->verb_length = 0x02;
 	msg->key_length = 0x02;
 	ap_msg->length = sizeof(*msg);
+	*domain = (unsigned short)msg->cprbx.domain;
 }
 
 void zcrypt_msgtype6_init(void);
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c
index 43de39c74944..26ceaa696765 100644
--- a/drivers/s390/crypto/zcrypt_pcixcc.c
+++ b/drivers/s390/crypto/zcrypt_pcixcc.c
@@ -32,6 +32,7 @@
 #include <linux/slab.h>
 #include <linux/atomic.h>
 #include <asm/uaccess.h>
+#include <linux/mod_devicetable.h>
 
 #include "ap_bus.h"
 #include "zcrypt_api.h"
@@ -62,142 +63,34 @@ struct response_type {
 #define PCIXCC_RESPONSE_TYPE_ICA  0
 #define PCIXCC_RESPONSE_TYPE_XCRB 1
 
-static struct ap_device_id zcrypt_pcixcc_ids[] = {
-	{ AP_DEVICE(AP_DEVICE_TYPE_PCIXCC) },
-	{ AP_DEVICE(AP_DEVICE_TYPE_CEX2C) },
-	{ AP_DEVICE(AP_DEVICE_TYPE_CEX3C) },
-	{ /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_pcixcc_ids);
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("PCIXCC Cryptographic Coprocessor device driver, " \
 		   "Copyright IBM Corp. 2001, 2012");
 MODULE_LICENSE("GPL");
 
-static int zcrypt_pcixcc_probe(struct ap_device *ap_dev);
-static void zcrypt_pcixcc_remove(struct ap_device *ap_dev);
-
-static struct ap_driver zcrypt_pcixcc_driver = {
-	.probe = zcrypt_pcixcc_probe,
-	.remove = zcrypt_pcixcc_remove,
-	.ids = zcrypt_pcixcc_ids,
-	.request_timeout = PCIXCC_CLEANUP_TIME,
+static struct ap_device_id zcrypt_pcixcc_card_ids[] = {
+	{ .dev_type = AP_DEVICE_TYPE_PCIXCC,
+	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX2C,
+	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX3C,
+	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
+	{ /* end of list */ },
 };
 
-/**
- * Micro-code detection function. Its sends a message to a pcixcc card
- * to find out the microcode level.
- * @ap_dev: pointer to the AP device.
- */
-static int zcrypt_pcixcc_mcl(struct ap_device *ap_dev)
-{
-	static unsigned char msg[] = {
-		0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x00,
-		0x00,0x00,0x01,0xC4,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x07,0x24,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0xDC,0x02,0x00,0x00,0x00,0x54,0x32,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xE8,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x0A,
-		0x4D,0x52,0x50,0x20,0x20,0x20,0x20,0x20,
-		0x00,0x42,0x00,0x01,0x02,0x03,0x04,0x05,
-		0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,
-		0x0E,0x0F,0x00,0x11,0x22,0x33,0x44,0x55,
-		0x66,0x77,0x88,0x99,0xAA,0xBB,0xCC,0xDD,
-		0xEE,0xFF,0xFF,0xEE,0xDD,0xCC,0xBB,0xAA,
-		0x99,0x88,0x77,0x66,0x55,0x44,0x33,0x22,
-		0x11,0x00,0x01,0x23,0x45,0x67,0x89,0xAB,
-		0xCD,0xEF,0xFE,0xDC,0xBA,0x98,0x76,0x54,
-		0x32,0x10,0x00,0x9A,0x00,0x98,0x00,0x00,
-		0x1E,0x00,0x00,0x94,0x00,0x00,0x00,0x00,
-		0x04,0x00,0x00,0x8C,0x00,0x00,0x00,0x40,
-		0x02,0x00,0x00,0x40,0xBA,0xE8,0x23,0x3C,
-		0x75,0xF3,0x91,0x61,0xD6,0x73,0x39,0xCF,
-		0x7B,0x6D,0x8E,0x61,0x97,0x63,0x9E,0xD9,
-		0x60,0x55,0xD6,0xC7,0xEF,0xF8,0x1E,0x63,
-		0x95,0x17,0xCC,0x28,0x45,0x60,0x11,0xC5,
-		0xC4,0x4E,0x66,0xC6,0xE6,0xC3,0xDE,0x8A,
-		0x19,0x30,0xCF,0x0E,0xD7,0xAA,0xDB,0x01,
-		0xD8,0x00,0xBB,0x8F,0x39,0x9F,0x64,0x28,
-		0xF5,0x7A,0x77,0x49,0xCC,0x6B,0xA3,0x91,
-		0x97,0x70,0xE7,0x60,0x1E,0x39,0xE1,0xE5,
-		0x33,0xE1,0x15,0x63,0x69,0x08,0x80,0x4C,
-		0x67,0xC4,0x41,0x8F,0x48,0xDF,0x26,0x98,
-		0xF1,0xD5,0x8D,0x88,0xD9,0x6A,0xA4,0x96,
-		0xC5,0x84,0xD9,0x30,0x49,0x67,0x7D,0x19,
-		0xB1,0xB3,0x45,0x4D,0xB2,0x53,0x9A,0x47,
-		0x3C,0x7C,0x55,0xBF,0xCC,0x85,0x00,0x36,
-		0xF1,0x3D,0x93,0x53
-	};
-	unsigned long long psmid;
-	struct CPRBX *cprbx;
-	char *reply;
-	int rc, i;
-
-	reply = (void *) get_zeroed_page(GFP_KERNEL);
-	if (!reply)
-		return -ENOMEM;
+MODULE_DEVICE_TABLE(ap, zcrypt_pcixcc_card_ids);
 
-	rc = ap_send(ap_dev->qid, 0x0102030405060708ULL, msg, sizeof(msg));
-	if (rc)
-		goto out_free;
-
-	/* Wait for the test message to complete. */
-	for (i = 0; i < 6; i++) {
-		msleep(300);
-		rc = ap_recv(ap_dev->qid, &psmid, reply, 4096);
-		if (rc == 0 && psmid == 0x0102030405060708ULL)
-			break;
-	}
-
-	if (i >= 6) {
-		/* Got no answer. */
-		rc = -ENODEV;
-		goto out_free;
-	}
+static struct ap_device_id zcrypt_pcixcc_queue_ids[] = {
+	{ .dev_type = AP_DEVICE_TYPE_PCIXCC,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX2C,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX3C,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ /* end of list */ },
+};
 
-	cprbx = (struct CPRBX *) (reply + 48);
-	if (cprbx->ccp_rtcode == 8 && cprbx->ccp_rscode == 33)
-		rc = ZCRYPT_PCIXCC_MCL2;
-	else
-		rc = ZCRYPT_PCIXCC_MCL3;
-out_free:
-	free_page((unsigned long) reply);
-	return rc;
-}
+MODULE_DEVICE_TABLE(ap, zcrypt_pcixcc_queue_ids);
 
 /**
  * Large random number detection function. Its sends a message to a pcixcc
@@ -206,10 +99,11 @@ out_free:
  *
  * Returns 1 if large random numbers are supported, 0 if not and < 0 on error.
  */
-static int zcrypt_pcixcc_rng_supported(struct ap_device *ap_dev)
+static int zcrypt_pcixcc_rng_supported(struct ap_queue *aq)
 {
 	struct ap_message ap_msg;
 	unsigned long long psmid;
+	unsigned int domain;
 	struct {
 		struct type86_hdr hdr;
 		struct type86_fmt2_ext fmt2;
@@ -231,12 +125,12 @@ static int zcrypt_pcixcc_rng_supported(struct ap_device *ap_dev)
 	if (!ap_msg.message)
 		return -ENOMEM;
 
-	rng_type6CPRB_msgX(&ap_msg, 4);
+	rng_type6CPRB_msgX(&ap_msg, 4, &domain);
 
 	msg = ap_msg.message;
-	msg->cprbx.domain = AP_QID_QUEUE(ap_dev->qid);
+	msg->cprbx.domain = AP_QID_QUEUE(aq->qid);
 
-	rc = ap_send(ap_dev->qid, 0x0102030405060708ULL, ap_msg.message,
+	rc = ap_send(aq->qid, 0x0102030405060708ULL, ap_msg.message,
 		     ap_msg.length);
 	if (rc)
 		goto out_free;
@@ -244,7 +138,7 @@ static int zcrypt_pcixcc_rng_supported(struct ap_device *ap_dev)
 	/* Wait for the test message to complete. */
 	for (i = 0; i < 2 * HZ; i++) {
 		msleep(1000 / HZ);
-		rc = ap_recv(ap_dev->qid, &psmid, ap_msg.message, 4096);
+		rc = ap_recv(aq->qid, &psmid, ap_msg.message, 4096);
 		if (rc == 0 && psmid == 0x0102030405060708ULL)
 			break;
 	}
@@ -266,120 +160,168 @@ out_free:
 }
 
 /**
- * Probe function for PCIXCC/CEX2C cards. It always accepts the AP device
- * since the bus_match already checked the hardware type. The PCIXCC
- * cards come in two flavours: micro code level 2 and micro code level 3.
- * This is checked by sending a test message to the device.
- * @ap_dev: pointer to the AP device.
+ * Probe function for PCIXCC/CEX2C card devices. It always accepts the
+ * AP device since the bus_match already checked the hardware type. The
+ * PCIXCC cards come in two flavours: micro code level 2 and micro code
+ * level 3. This is checked by sending a test message to the device.
+ * @ap_dev: pointer to the AP card device.
  */
-static int zcrypt_pcixcc_probe(struct ap_device *ap_dev)
+static int zcrypt_pcixcc_card_probe(struct ap_device *ap_dev)
 {
-	struct zcrypt_device *zdev;
 	/*
 	 * Normalized speed ratings per crypto adapter
 	 * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
 	 */
-	int PCIXCC_MCL2_SPEED_IDX[] = {10, 10, 10, 10, 10, 10, 10, 10};
-	int PCIXCC_MCL3_SPEED_IDX[] = { 8,  8,	8,  8,	8,  8,	8,  8};
-	int CEX2C_SPEED_IDX[] = {1000, 1400, 2400, 1100, 1500, 2600, 100, 12};
-	int CEX3C_SPEED_IDX[] = { 500,	700, 1400,  550,  800, 1500,  80, 10};
+	static const int CEX2C_SPEED_IDX[] = {
+		1000, 1400, 2400, 1100, 1500, 2600, 100, 12};
+	static const int CEX3C_SPEED_IDX[] = {
+		500,  700, 1400,  550,	800, 1500,  80, 10};
+
+	struct ap_card *ac = to_ap_card(&ap_dev->device);
+	struct zcrypt_card *zc;
 	int rc = 0;
 
-	zdev = zcrypt_device_alloc(PCIXCC_MAX_XCRB_MESSAGE_SIZE);
-	if (!zdev)
+	zc = zcrypt_card_alloc();
+	if (!zc)
 		return -ENOMEM;
-	zdev->ap_dev = ap_dev;
-	zdev->online = 1;
-	switch (ap_dev->device_type) {
-	case AP_DEVICE_TYPE_PCIXCC:
-		rc = zcrypt_pcixcc_mcl(ap_dev);
-		if (rc < 0) {
-			zcrypt_device_free(zdev);
-			return rc;
-		}
-		zdev->user_space_type = rc;
-		if (rc == ZCRYPT_PCIXCC_MCL2) {
-			zdev->type_string = "PCIXCC_MCL2";
-			memcpy(zdev->speed_rating, PCIXCC_MCL2_SPEED_IDX,
-			       sizeof(PCIXCC_MCL2_SPEED_IDX));
-			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD;
-			zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
-			zdev->max_exp_bit_length = PCIXCC_MAX_MOD_SIZE;
-		} else {
-			zdev->type_string = "PCIXCC_MCL3";
-			memcpy(zdev->speed_rating, PCIXCC_MCL3_SPEED_IDX,
-			       sizeof(PCIXCC_MCL3_SPEED_IDX));
-			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE;
-			zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
-			zdev->max_exp_bit_length = PCIXCC_MAX_MOD_SIZE;
-		}
-		break;
+	zc->card = ac;
+	ac->private = zc;
+	switch (ac->ap_dev.device_type) {
 	case AP_DEVICE_TYPE_CEX2C:
-		zdev->user_space_type = ZCRYPT_CEX2C;
-		zdev->type_string = "CEX2C";
-		memcpy(zdev->speed_rating, CEX2C_SPEED_IDX,
+		zc->user_space_type = ZCRYPT_CEX2C;
+		zc->type_string = "CEX2C";
+		memcpy(zc->speed_rating, CEX2C_SPEED_IDX,
 		       sizeof(CEX2C_SPEED_IDX));
-		zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE;
-		zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
-		zdev->max_exp_bit_length = PCIXCC_MAX_MOD_SIZE;
+		zc->min_mod_size = PCIXCC_MIN_MOD_SIZE;
+		zc->max_mod_size = PCIXCC_MAX_MOD_SIZE;
+		zc->max_exp_bit_length = PCIXCC_MAX_MOD_SIZE;
 		break;
 	case AP_DEVICE_TYPE_CEX3C:
-		zdev->user_space_type = ZCRYPT_CEX3C;
-		zdev->type_string = "CEX3C";
-		memcpy(zdev->speed_rating, CEX3C_SPEED_IDX,
+		zc->user_space_type = ZCRYPT_CEX3C;
+		zc->type_string = "CEX3C";
+		memcpy(zc->speed_rating, CEX3C_SPEED_IDX,
 		       sizeof(CEX3C_SPEED_IDX));
-		zdev->min_mod_size = CEX3C_MIN_MOD_SIZE;
-		zdev->max_mod_size = CEX3C_MAX_MOD_SIZE;
-		zdev->max_exp_bit_length = CEX3C_MAX_MOD_SIZE;
+		zc->min_mod_size = CEX3C_MIN_MOD_SIZE;
+		zc->max_mod_size = CEX3C_MAX_MOD_SIZE;
+		zc->max_exp_bit_length = CEX3C_MAX_MOD_SIZE;
 		break;
 	default:
-		goto out_free;
+		zcrypt_card_free(zc);
+		return -ENODEV;
 	}
-	zdev->load = zdev->speed_rating[0];
+	zc->online = 1;
 
-	rc = zcrypt_pcixcc_rng_supported(ap_dev);
+	rc = zcrypt_card_register(zc);
+	if (rc) {
+		ac->private = NULL;
+		zcrypt_card_free(zc);
+	}
+
+	return rc;
+}
+
+/**
+ * This is called to remove the PCIXCC/CEX2C card driver information
+ * if an AP card device is removed.
+ */
+static void zcrypt_pcixcc_card_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_card *zc = to_ap_card(&ap_dev->device)->private;
+
+	if (zc)
+		zcrypt_card_unregister(zc);
+}
+
+static struct ap_driver zcrypt_pcixcc_card_driver = {
+	.probe = zcrypt_pcixcc_card_probe,
+	.remove = zcrypt_pcixcc_card_remove,
+	.ids = zcrypt_pcixcc_card_ids,
+};
+
+/**
+ * Probe function for PCIXCC/CEX2C queue devices. It always accepts the
+ * AP device since the bus_match already checked the hardware type. The
+ * PCIXCC cards come in two flavours: micro code level 2 and micro code
+ * level 3. This is checked by sending a test message to the device.
+ * @ap_dev: pointer to the AP card device.
+ */
+static int zcrypt_pcixcc_queue_probe(struct ap_device *ap_dev)
+{
+	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
+	struct zcrypt_queue *zq;
+	int rc;
+
+	zq = zcrypt_queue_alloc(PCIXCC_MAX_XCRB_MESSAGE_SIZE);
+	if (!zq)
+		return -ENOMEM;
+	zq->queue = aq;
+	zq->online = 1;
+	atomic_set(&zq->load, 0);
+	rc = zcrypt_pcixcc_rng_supported(aq);
 	if (rc < 0) {
-		zcrypt_device_free(zdev);
+		zcrypt_queue_free(zq);
 		return rc;
 	}
 	if (rc)
-		zdev->ops = zcrypt_msgtype(MSGTYPE06_NAME,
-					   MSGTYPE06_VARIANT_DEFAULT);
+		zq->ops = zcrypt_msgtype(MSGTYPE06_NAME,
+					 MSGTYPE06_VARIANT_DEFAULT);
 	else
-		zdev->ops = zcrypt_msgtype(MSGTYPE06_NAME,
-					   MSGTYPE06_VARIANT_NORNG);
-	ap_device_init_reply(ap_dev, &zdev->reply);
-	ap_dev->private = zdev;
-	rc = zcrypt_device_register(zdev);
-	if (rc)
-		goto out_free;
-	return 0;
-
- out_free:
-	ap_dev->private = NULL;
-	zcrypt_device_free(zdev);
+		zq->ops = zcrypt_msgtype(MSGTYPE06_NAME,
+					 MSGTYPE06_VARIANT_NORNG);
+	ap_queue_init_reply(aq, &zq->reply);
+	aq->request_timeout = PCIXCC_CLEANUP_TIME,
+	aq->private = zq;
+	rc = zcrypt_queue_register(zq);
+	if (rc) {
+		aq->private = NULL;
+		zcrypt_queue_free(zq);
+	}
 	return rc;
 }
 
 /**
- * This is called to remove the extended PCIXCC/CEX2C driver information
- * if an AP device is removed.
+ * This is called to remove the PCIXCC/CEX2C queue driver information
+ * if an AP queue device is removed.
  */
-static void zcrypt_pcixcc_remove(struct ap_device *ap_dev)
+static void zcrypt_pcixcc_queue_remove(struct ap_device *ap_dev)
 {
-	struct zcrypt_device *zdev = ap_dev->private;
+	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
+	struct zcrypt_queue *zq = aq->private;
 
-	zcrypt_device_unregister(zdev);
+	ap_queue_remove(aq);
+	if (zq)
+		zcrypt_queue_unregister(zq);
 }
 
+static struct ap_driver zcrypt_pcixcc_queue_driver = {
+	.probe = zcrypt_pcixcc_queue_probe,
+	.remove = zcrypt_pcixcc_queue_remove,
+	.suspend = ap_queue_suspend,
+	.resume = ap_queue_resume,
+	.ids = zcrypt_pcixcc_queue_ids,
+};
+
 int __init zcrypt_pcixcc_init(void)
 {
-	return ap_driver_register(&zcrypt_pcixcc_driver, THIS_MODULE, "pcixcc");
+	int rc;
+
+	rc = ap_driver_register(&zcrypt_pcixcc_card_driver,
+				THIS_MODULE, "pcixcccard");
+	if (rc)
+		return rc;
+
+	rc = ap_driver_register(&zcrypt_pcixcc_queue_driver,
+				THIS_MODULE, "pcixccqueue");
+	if (rc)
+		ap_driver_unregister(&zcrypt_pcixcc_card_driver);
+
+	return rc;
 }
 
 void zcrypt_pcixcc_exit(void)
 {
-	ap_driver_unregister(&zcrypt_pcixcc_driver);
+	ap_driver_unregister(&zcrypt_pcixcc_queue_driver);
+	ap_driver_unregister(&zcrypt_pcixcc_card_driver);
 }
 
 module_init(zcrypt_pcixcc_init);
diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c
new file mode 100644
index 000000000000..c70b2528d9a3
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_queue.c
@@ -0,0 +1,221 @@
+/*
+ *  zcrypt 2.1.0
+ *
+ *  Copyright IBM Corp. 2001, 2012
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *	       Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *  MSGTYPE restruct:		  Holger Dengler <hd@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/compat.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/uaccess.h>
+#include <linux/hw_random.h>
+#include <linux/debugfs.h>
+#include <asm/debug.h>
+
+#include "zcrypt_debug.h"
+#include "zcrypt_api.h"
+
+#include "zcrypt_msgtype6.h"
+#include "zcrypt_msgtype50.h"
+
+/*
+ * Device attributes common for all crypto queue devices.
+ */
+
+static ssize_t zcrypt_queue_online_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct zcrypt_queue *zq = to_ap_queue(dev)->private;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", zq->online);
+}
+
+static ssize_t zcrypt_queue_online_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct zcrypt_queue *zq = to_ap_queue(dev)->private;
+	struct zcrypt_card *zc = zq->zcard;
+	int online;
+
+	if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
+		return -EINVAL;
+
+	if (online && !zc->online)
+		return -EINVAL;
+	zq->online = online;
+	ZCRYPT_DBF_DEV(DBF_INFO, zq, "dev%02x%04xo%dman",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid), online);
+	if (!online)
+		ap_flush_queue(zq->queue);
+	return count;
+}
+
+static DEVICE_ATTR(online, 0644, zcrypt_queue_online_show,
+		   zcrypt_queue_online_store);
+
+static struct attribute *zcrypt_queue_attrs[] = {
+	&dev_attr_online.attr,
+	NULL,
+};
+
+static struct attribute_group zcrypt_queue_attr_group = {
+	.attrs = zcrypt_queue_attrs,
+};
+
+void zcrypt_queue_force_online(struct zcrypt_queue *zq, int online)
+{
+	zq->online = online;
+	if (!online)
+		ap_flush_queue(zq->queue);
+}
+
+struct zcrypt_queue *zcrypt_queue_alloc(size_t max_response_size)
+{
+	struct zcrypt_queue *zq;
+
+	zq = kzalloc(sizeof(struct zcrypt_queue), GFP_KERNEL);
+	if (!zq)
+		return NULL;
+	zq->reply.message = kmalloc(max_response_size, GFP_KERNEL);
+	if (!zq->reply.message)
+		goto out_free;
+	zq->reply.length = max_response_size;
+	INIT_LIST_HEAD(&zq->list);
+	zq->dbf_area = zcrypt_dbf_devices;
+	kref_init(&zq->refcount);
+	return zq;
+
+out_free:
+	kfree(zq);
+	return NULL;
+}
+EXPORT_SYMBOL(zcrypt_queue_alloc);
+
+void zcrypt_queue_free(struct zcrypt_queue *zq)
+{
+	kfree(zq->reply.message);
+	kfree(zq);
+}
+EXPORT_SYMBOL(zcrypt_queue_free);
+
+static void zcrypt_queue_release(struct kref *kref)
+{
+	struct zcrypt_queue *zq =
+		container_of(kref, struct zcrypt_queue, refcount);
+	zcrypt_queue_free(zq);
+}
+
+void zcrypt_queue_get(struct zcrypt_queue *zq)
+{
+	kref_get(&zq->refcount);
+}
+EXPORT_SYMBOL(zcrypt_queue_get);
+
+int zcrypt_queue_put(struct zcrypt_queue *zq)
+{
+	return kref_put(&zq->refcount, zcrypt_queue_release);
+}
+EXPORT_SYMBOL(zcrypt_queue_put);
+
+/**
+ * zcrypt_queue_register() - Register a crypto queue device.
+ * @zq: Pointer to a crypto queue device
+ *
+ * Register a crypto queue device. Returns 0 if successful.
+ */
+int zcrypt_queue_register(struct zcrypt_queue *zq)
+{
+	struct zcrypt_card *zc;
+	int rc;
+
+	spin_lock(&zcrypt_list_lock);
+	zc = zq->queue->card->private;
+	zcrypt_card_get(zc);
+	zq->zcard = zc;
+	zq->online = 1;	/* New devices are online by default. */
+	ZCRYPT_DBF_DEV(DBF_INFO, zq, "dev%02x%04xo%dreg",
+		       AP_QID_CARD(zq->queue->qid),
+		       AP_QID_QUEUE(zq->queue->qid),
+		       zq->online);
+	list_add_tail(&zq->list, &zc->zqueues);
+	zcrypt_device_count++;
+	spin_unlock(&zcrypt_list_lock);
+
+	rc = sysfs_create_group(&zq->queue->ap_dev.device.kobj,
+				&zcrypt_queue_attr_group);
+	if (rc)
+		goto out;
+	get_device(&zq->queue->ap_dev.device);
+
+	if (zq->ops->rng) {
+		rc = zcrypt_rng_device_add();
+		if (rc)
+			goto out_unregister;
+	}
+	return 0;
+
+out_unregister:
+	sysfs_remove_group(&zq->queue->ap_dev.device.kobj,
+			   &zcrypt_queue_attr_group);
+	put_device(&zq->queue->ap_dev.device);
+out:
+	spin_lock(&zcrypt_list_lock);
+	list_del_init(&zq->list);
+	spin_unlock(&zcrypt_list_lock);
+	zcrypt_card_put(zc);
+	return rc;
+}
+EXPORT_SYMBOL(zcrypt_queue_register);
+
+/**
+ * zcrypt_queue_unregister(): Unregister a crypto queue device.
+ * @zq: Pointer to crypto queue device
+ *
+ * Unregister a crypto queue device.
+ */
+void zcrypt_queue_unregister(struct zcrypt_queue *zq)
+{
+	struct zcrypt_card *zc;
+
+	zc = zq->zcard;
+	spin_lock(&zcrypt_list_lock);
+	list_del_init(&zq->list);
+	zcrypt_device_count--;
+	spin_unlock(&zcrypt_list_lock);
+	zcrypt_card_put(zc);
+	if (zq->ops->rng)
+		zcrypt_rng_device_remove();
+	sysfs_remove_group(&zq->queue->ap_dev.device.kobj,
+			   &zcrypt_queue_attr_group);
+	put_device(&zq->queue->ap_dev.device);
+	zcrypt_queue_put(zq);
+}
+EXPORT_SYMBOL(zcrypt_queue_unregister);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index ed84c07f6a51..8a57f0b1242d 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -175,7 +175,8 @@ struct ap_device_id {
 	kernel_ulong_t driver_info;
 };
 
-#define AP_DEVICE_ID_MATCH_DEVICE_TYPE		0x01
+#define AP_DEVICE_ID_MATCH_CARD_TYPE		0x01
+#define AP_DEVICE_ID_MATCH_QUEUE_TYPE		0x02
 
 /* s390 css bus devices (subchannels) */
 struct css_device_id {
-- 
cgit v1.2.3


From c297eb42690b904fb5b78dd9ad001bafe25f49ec Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 2 Dec 2016 14:01:55 +0100
Subject: libceph: always signal completion when done

r_safe_completion is currently, and has always been, signaled only if
on-disk ack was requested.  It's there for fsync and syncfs, which wait
for in-flight writes to flush - all data write requests set ONDISK.

However, the pool perm check code introduced in 4.2 sends a write
request with only ACK set.  An unfortunately timed syncfs can then hang
forever: r_safe_completion won't be signaled because only an unsafe
reply was requested.

We could patch ceph_osdc_sync() to skip !ONDISK write requests, but
that is somewhat incomplete and yet another special case.  Instead,
rename this completion to r_done_completion and always signal it when
the OSD client is done with the request, whether unsafe, safe, or
error.  This is a bit cleaner and helps with the cancellation code.

Reported-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/file.c                  |  2 +-
 include/linux/ceph/osd_client.h |  2 +-
 net/ceph/osd_client.c           | 25 +++++++++++--------------
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 12ce2b562d14..f633165f3fdc 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -864,7 +864,7 @@ void ceph_sync_write_wait(struct inode *inode)
 
 		dout("sync_write_wait on tid %llu (until %llu)\n",
 		     req->r_tid, last_tid);
-		wait_for_completion(&req->r_safe_completion);
+		wait_for_completion(&req->r_done_completion);
 		ceph_osdc_put_request(req);
 
 		spin_lock(&ci->i_unsafe_lock);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index a8e66344bacc..03a6653d329a 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -176,7 +176,7 @@ struct ceph_osd_request {
 	struct kref       r_kref;
 	bool              r_mempool;
 	struct completion r_completion;
-	struct completion r_safe_completion;  /* fsync waiter */
+	struct completion r_done_completion;  /* fsync waiter */
 	ceph_osdc_callback_t r_callback;
 	ceph_osdc_unsafe_callback_t r_unsafe_callback;
 	struct list_head  r_unsafe_item;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5d812a26f05a..5a8e8670ea59 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -460,7 +460,7 @@ static void request_init(struct ceph_osd_request *req)
 
 	kref_init(&req->r_kref);
 	init_completion(&req->r_completion);
-	init_completion(&req->r_safe_completion);
+	init_completion(&req->r_done_completion);
 	RB_CLEAR_NODE(&req->r_node);
 	RB_CLEAR_NODE(&req->r_mc_node);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
@@ -1772,7 +1772,7 @@ static void complete_request(struct ceph_osd_request *req, int err)
 	req->r_result = err;
 	__finish_request(req);
 	__complete_request(req);
-	complete_all(&req->r_safe_completion);
+	complete_all(&req->r_done_completion);
 	ceph_osdc_put_request(req);
 }
 
@@ -1797,7 +1797,9 @@ static void cancel_request(struct ceph_osd_request *req)
 	dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
 
 	cancel_map_check(req);
-	finish_request(req);
+	__finish_request(req);
+	complete_all(&req->r_done_completion);
+	ceph_osdc_put_request(req);
 }
 
 static void check_pool_dne(struct ceph_osd_request *req)
@@ -2808,12 +2810,12 @@ static bool done_request(const struct ceph_osd_request *req,
  * ->r_unsafe_callback is set?	yes			no
  *
  * first reply is OK (needed	r_cb/r_completion,	r_cb/r_completion,
- * any or needed/got safe)	r_safe_completion	r_safe_completion
+ * any or needed/got safe)	r_done_completion	r_done_completion
  *
  * first reply is unsafe	r_unsafe_cb(true)	(nothing)
  *
  * when we get the safe reply	r_unsafe_cb(false),	r_cb/r_completion,
- *				r_safe_completion	r_safe_completion
+ *				r_done_completion	r_done_completion
  */
 static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
 {
@@ -2934,8 +2936,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
 			dout("req %p tid %llu cb\n", req, req->r_tid);
 			__complete_request(req);
 		}
-		if (m.flags & CEPH_OSD_FLAG_ONDISK)
-			complete_all(&req->r_safe_completion);
+		complete_all(&req->r_done_completion);
 		ceph_osdc_put_request(req);
 	} else {
 		if (req->r_unsafe_callback) {
@@ -3471,9 +3472,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 EXPORT_SYMBOL(ceph_osdc_start_request);
 
 /*
- * Unregister a registered request.  The request is not completed (i.e.
- * no callbacks or wakeups) - higher layers are supposed to know what
- * they are canceling.
+ * Unregister a registered request.  The request is not completed:
+ * ->r_result isn't set and __complete_request() isn't called.
  */
 void ceph_osdc_cancel_request(struct ceph_osd_request *req)
 {
@@ -3500,9 +3500,6 @@ static int wait_request_timeout(struct ceph_osd_request *req,
 	if (left <= 0) {
 		left = left ?: -ETIMEDOUT;
 		ceph_osdc_cancel_request(req);
-
-		/* kludge - need to to wake ceph_osdc_sync() */
-		complete_all(&req->r_safe_completion);
 	} else {
 		left = req->r_result; /* completed */
 	}
@@ -3549,7 +3546,7 @@ again:
 			up_read(&osdc->lock);
 			dout("%s waiting on req %p tid %llu last_tid %llu\n",
 			     __func__, req, req->r_tid, last_tid);
-			wait_for_completion(&req->r_safe_completion);
+			wait_for_completion(&req->r_done_completion);
 			ceph_osdc_put_request(req);
 			goto again;
 		}
-- 
cgit v1.2.3


From c7be96af89d4b53211862d8599b2430e8900ed92 Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hpe.com>
Date: Wed, 14 Dec 2016 15:04:10 -0800
Subject: signals: avoid unnecessary taking of sighand->siglock

When running certain database workload on a high-end system with many
CPUs, it was found that spinlock contention in the sigprocmask syscalls
became a significant portion of the overall CPU cycles as shown below.

  9.30%  9.30%  905387  dataserver  /proc/kcore 0x7fff8163f4d2
  [k] _raw_spin_lock_irq
            |
            ---_raw_spin_lock_irq
               |
               |--99.34%-- __set_current_blocked
               |          sigprocmask
               |          sys_rt_sigprocmask
               |          system_call_fastpath
               |          |
               |          |--50.63%-- __swapcontext
               |          |          |
               |          |          |--99.91%-- upsleepgeneric
               |          |
               |          |--49.36%-- __setcontext
               |          |          ktskRun

Looking further into the swapcontext function in glibc, it was found that
the function always call sigprocmask() without checking if there are
changes in the signal mask.

A check was added to the __set_current_blocked() function to avoid taking
the sighand->siglock spinlock if there is no change in the signal mask.
This will prevent unneeded spinlock contention when many threads are
trying to call sigprocmask().

With this patch applied, the spinlock contention in sigprocmask() was
gone.

Link: http://lkml.kernel.org/r/1474979209-11867-1-git-send-email-Waiman.Long@hpe.com
Signed-off-by: Waiman Long <Waiman.Long@hpe.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stas Sergeev <stsp@list.ru>
Cc: Scott J Norton <scott.norton@hpe.com>
Cc: Douglas Hatch <doug.hatch@hpe.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 17 +++++++++++++++++
 kernel/signal.c        |  7 +++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index b63f63eaa39c..5308304993be 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -97,6 +97,23 @@ static inline int sigisemptyset(sigset_t *set)
 	}
 }
 
+static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2)
+{
+	switch (_NSIG_WORDS) {
+	case 4:
+		return	(set1->sig[3] == set2->sig[3]) &&
+			(set1->sig[2] == set2->sig[2]) &&
+			(set1->sig[1] == set2->sig[1]) &&
+			(set1->sig[0] == set2->sig[0]);
+	case 2:
+		return	(set1->sig[1] == set2->sig[1]) &&
+			(set1->sig[0] == set2->sig[0]);
+	case 1:
+		return	set1->sig[0] == set2->sig[0];
+	}
+	return 0;
+}
+
 #define sigmask(sig)	(1UL << ((sig) - 1))
 
 #ifndef __HAVE_ARCH_SIG_SETOPS
diff --git a/kernel/signal.c b/kernel/signal.c
index 29a410780aa9..ae60996fedff 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2491,6 +2491,13 @@ void __set_current_blocked(const sigset_t *newset)
 {
 	struct task_struct *tsk = current;
 
+	/*
+	 * In case the signal mask hasn't changed, there is nothing we need
+	 * to do. The current->blocked shouldn't be modified by other task.
+	 */
+	if (sigequalsets(&tsk->blocked, newset))
+		return;
+
 	spin_lock_irq(&tsk->sighand->siglock);
 	__set_task_blocked(tsk, newset);
 	spin_unlock_irq(&tsk->sighand->siglock);
-- 
cgit v1.2.3


From 69f58384791ac6da4165ce8e6defd6f408f4afdf Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Wed, 14 Dec 2016 15:04:16 -0800
Subject: Revert "kdump, vmcoreinfo: report memory sections virtual addresses"

This reverts commit 0549a3c02efb ("kdump, vmcoreinfo: report memory
sections virtual addresses").

Commit 0549a3c02efb tells the userspace utility makedumpfile the
randomized base address of these memmory sections when mm kaslr is
enabled.  However the following patch "kexec: export the value of
phys_base instead of symbol address" makes makedumpfile not need these
addresses any more.

Besides we should use VMCOREINFO_NUMBER to export the value of the
variable so that we can use the existing number_table mechanism of
Makedumpfile to fetch it.  So revert it now.  If needed we can add it
later.

http://lists.infradead.org/pipermail/kexec/2016-October/017540.html
Link: http://lkml.kernel.org/r/1478568596-30060-1-git-send-email-bhe@redhat.com
Signed-off-by: Baoquan He <bhe@redhat.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Xunlei Pang <xlpang@redhat.com>
Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Eugene Surovegin <surovegin@google.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Atsushi Kumagai <ats-kumagai@wm.jp.nec.com>
Cc: Dave Anderson <anderson@redhat.com>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/machine_kexec_64.c | 3 ---
 include/linux/kexec.h              | 6 ------
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 8c1f218926d7..5a294e48b185 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -337,9 +337,6 @@ void arch_crash_save_vmcoreinfo(void)
 #endif
 	vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
 			      kaslr_offset());
-	VMCOREINFO_PAGE_OFFSET(PAGE_OFFSET);
-	VMCOREINFO_VMALLOC_START(VMALLOC_START);
-	VMCOREINFO_VMEMMAP_START(VMEMMAP_START);
 }
 
 /* arch-dependent functionality related to kexec file-based syscall */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 406c33dcae13..d7437777baaa 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -259,12 +259,6 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 	vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
 #define VMCOREINFO_CONFIG(name) \
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
-#define VMCOREINFO_PAGE_OFFSET(value) \
-	vmcoreinfo_append_str("PAGE_OFFSET=%lx\n", (unsigned long)value)
-#define VMCOREINFO_VMALLOC_START(value) \
-	vmcoreinfo_append_str("VMALLOC_START=%lx\n", (unsigned long)value)
-#define VMCOREINFO_VMEMMAP_START(value) \
-	vmcoreinfo_append_str("VMEMMAP_START=%lx\n", (unsigned long)value)
 
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
-- 
cgit v1.2.3


From 0495c3d367944e4af053983ff3cdf256b567b053 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 14 Dec 2016 15:05:23 -0800
Subject: dma: add calls for dma_map_page_attrs and dma_unmap_page_attrs

Add support for mapping and unmapping a page with attributes.

The primary use for this is currently to allow for us to pass the
DMA_ATTR_SKIP_CPU_SYNC attribute when mapping and unmapping a page.  On
some architectures such as ARM the synchronization has significant
overhead and if we are already taking care of the sync_for_cpu and
sync_for_device from the driver there isn't much need to handle this in
the map/unmap calls as well.

Link: http://lkml.kernel.org/r/20161110113601.76501.46095.stgit@ahduyck-blue-test.jf.intel.com
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dma-mapping.h | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 08528afdf58b..10c5a17b1f51 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -243,29 +243,33 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
 		ops->unmap_sg(dev, sg, nents, dir, attrs);
 }
 
-static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
-				      size_t offset, size_t size,
-				      enum dma_data_direction dir)
+static inline dma_addr_t dma_map_page_attrs(struct device *dev,
+					    struct page *page,
+					    size_t offset, size_t size,
+					    enum dma_data_direction dir,
+					    unsigned long attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 	dma_addr_t addr;
 
 	kmemcheck_mark_initialized(page_address(page) + offset, size);
 	BUG_ON(!valid_dma_direction(dir));
-	addr = ops->map_page(dev, page, offset, size, dir, 0);
+	addr = ops->map_page(dev, page, offset, size, dir, attrs);
 	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
 
 	return addr;
 }
 
-static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
-				  size_t size, enum dma_data_direction dir)
+static inline void dma_unmap_page_attrs(struct device *dev,
+					dma_addr_t addr, size_t size,
+					enum dma_data_direction dir,
+					unsigned long attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->unmap_page)
-		ops->unmap_page(dev, addr, size, dir, 0);
+		ops->unmap_page(dev, addr, size, dir, attrs);
 	debug_dma_unmap_page(dev, addr, size, dir, false);
 }
 
@@ -385,6 +389,8 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)
 #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0)
 #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0)
+#define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0)
+#define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
 
 extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
 			   void *cpu_addr, dma_addr_t dma_addr, size_t size);
-- 
cgit v1.2.3


From 44fdffd70504c15b617686753dfdf9eb0ddf3729 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 14 Dec 2016 15:05:26 -0800
Subject: mm: add support for releasing multiple instances of a page

Add a function that allows us to batch free a page that has multiple
references outstanding.  Specifically this function can be used to drop
a page being used in the page frag alloc cache.  With this drivers can
make use of functionality similar to the page frag alloc cache without
having to do any workarounds for the fact that there is no function that
frees multiple references.

Link: http://lkml.kernel.org/r/20161110113606.76501.70752.stgit@ahduyck-blue-test.jf.intel.com
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hans-Christian Noren Egtvedt <egtvedt@samfundet.no>
Cc: Helge Deller <deller@gmx.de>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Keguang Zhang <keguang.zhang@gmail.com>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Steven Miao <realmz6@gmail.com>
Cc: Tobias Klauser <tklauser@distanz.ch>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  2 ++
 mm/page_alloc.c     | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f8041f9de31e..4175dca4ac39 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -506,6 +506,8 @@ extern void free_hot_cold_page(struct page *page, bool cold);
 extern void free_hot_cold_page_list(struct list_head *list, bool cold);
 
 struct page_frag_cache;
+extern void __page_frag_drain(struct page *page, unsigned int order,
+			      unsigned int count);
 extern void *__alloc_page_frag(struct page_frag_cache *nc,
 			       unsigned int fragsz, gfp_t gfp_mask);
 extern void __free_page_frag(void *addr);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f64e7bcb43b7..2c6d5f64feca 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3925,6 +3925,20 @@ static struct page *__page_frag_refill(struct page_frag_cache *nc,
 	return page;
 }
 
+void __page_frag_drain(struct page *page, unsigned int order,
+		       unsigned int count)
+{
+	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
+
+	if (page_ref_sub_and_test(page, count)) {
+		if (order == 0)
+			free_hot_cold_page(page, false);
+		else
+			__free_pages_ok(page, order);
+	}
+}
+EXPORT_SYMBOL(__page_frag_drain);
+
 void *__alloc_page_frag(struct page_frag_cache *nc,
 			unsigned int fragsz, gfp_t gfp_mask)
 {
-- 
cgit v1.2.3


From d1bd8ead126668a2d6c42d97cc3664e95b3fa1dc Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 14 Dec 2016 15:05:52 -0800
Subject: kdb: remove unused kdb_event handling

kdb_event state variable is only set but never checked in the kernel
code.

http://www.spinics.net/lists/kdb/msg01733.html suggests that this
variable affected WARN_CONSOLE_UNLOCKED() in the original
implementation.  But this check never went upstream.

The semantic is unclear and racy.  The value is updated after the
kdb_printf_lock is acquired and after it is released.  It should be
symmetric at minimum.  The value should be manipulated either inside or
outside the locked area.

Fortunately, it seems that the original function is gone and we could
simply remove the state variable.

Link: http://lkml.kernel.org/r/1480412276-16690-2-git-send-email-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kdb.h         | 1 -
 kernel/debug/kdb/kdb_io.c   | 2 --
 kernel/debug/kdb/kdb_main.c | 1 -
 3 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 410decacff8f..eb706188dc23 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -77,7 +77,6 @@ extern int kdb_poll_idx;
  * number whenever the kernel debugger is entered.
  */
 extern int kdb_initial_cpu;
-extern atomic_t kdb_event;
 
 /* Types and messages used for dynamically added kdb shell commands */
 
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 98c9011eac78..46f477bebe0c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -576,7 +576,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 		KDB_STATE_SET(PRINTF_LOCK);
 		spin_lock_irqsave(&kdb_printf_lock, flags);
 		got_printf_lock = 1;
-		atomic_inc(&kdb_event);
 	} else {
 		__acquire(kdb_printf_lock);
 	}
@@ -851,7 +850,6 @@ kdb_print_out:
 		got_printf_lock = 0;
 		spin_unlock_irqrestore(&kdb_printf_lock, flags);
 		KDB_STATE_CLEAR(PRINTF_LOCK);
-		atomic_dec(&kdb_event);
 	} else {
 		__release(kdb_printf_lock);
 	}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 2a20c0dfdafc..ca183919d302 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -60,7 +60,6 @@ int kdb_grep_trailing;
  * Kernel debugger state flags
  */
 int kdb_flags;
-atomic_t kdb_event;
 
 /*
  * kdb_lock protects updates to kdb_initial_cpu.  Used to
-- 
cgit v1.2.3


From 34aaff40b42148b23dcde40152480e25c7d2d759 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 14 Dec 2016 15:05:58 -0800
Subject: kdb: call vkdb_printf() from vprintk_default() only when wanted

kdb_trap_printk allows to pass normal printk() messages to kdb via
vkdb_printk().  For example, it is used to get backtrace using the
classic show_stack(), see kdb_show_stack().

vkdb_printf() tries to avoid a potential infinite loop by disabling the
trap.  But this approach is racy, for example:

CPU1					CPU2

vkdb_printf()
  // assume that kdb_trap_printk == 0
  saved_trap_printk = kdb_trap_printk;
  kdb_trap_printk = 0;

					kdb_show_stack()
					  kdb_trap_printk++;

Problem1: Now, a nested printk() on CPU0 calls vkdb_printf()
	  even when it should have been disabled. It will not
	  cause a deadlock but...

   // using the outdated saved value: 0
   kdb_trap_printk = saved_trap_printk;

					  kdb_trap_printk--;

Problem2: Now, kdb_trap_printk == -1 and will stay like this.
   It means that all messages will get passed to kdb from
   now on.

This patch removes the racy saved_trap_printk handling.  Instead, the
recursion is prevented by a check for the locked CPU.

The solution is still kind of racy.  A non-related printk(), from
another process, might get trapped by vkdb_printf().  And the wanted
printk() might not get trapped because kdb_printf_cpu is assigned.  But
this problem existed even with the original code.

A proper solution would be to get_cpu() before setting kdb_trap_printk
and trap messages only from this CPU.  I am not sure if it is worth the
effort, though.

In fact, the race is very theoretical.  When kdb is running any of the
commands that use kdb_trap_printk there is a single active CPU and the
other CPUs should be in a holding pen inside kgdb_cpu_enter().

The only time this is violated is when there is a timeout waiting for
the other CPUs to report to the holding pen.

Finally, note that the situation is a bit schizophrenic.  vkdb_printf()
explicitly allows recursion but only from KDB code that calls
kdb_printf() directly.  On the other hand, the generic printk()
recursion is not allowed because it might cause an infinite loop.  This
is why we could not hide the decision inside vkdb_printf() easily.

Link: http://lkml.kernel.org/r/1480412276-16690-4-git-send-email-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kdb.h       | 1 +
 kernel/debug/kdb/kdb_io.c | 9 ++-------
 kernel/printk/printk.c    | 3 ++-
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index eb706188dc23..68bd88223417 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -161,6 +161,7 @@ enum kdb_msgsrc {
 };
 
 extern int kdb_trap_printk;
+extern int kdb_printf_cpu;
 extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
 				      va_list args);
 extern __printf(1, 2) int kdb_printf(const char *, ...);
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index daa76154fb1b..e74be38245ad 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -30,6 +30,7 @@
 char kdb_prompt_str[CMD_BUFLEN];
 
 int kdb_trap_printk;
+int kdb_printf_cpu = -1;
 
 static int kgdb_transition_check(char *buffer)
 {
@@ -554,24 +555,19 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 	int linecount;
 	int colcount;
 	int logging, saved_loglevel = 0;
-	int saved_trap_printk;
 	int retlen = 0;
 	int fnd, len;
 	int this_cpu, old_cpu;
-	static int kdb_printf_cpu = -1;
 	char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
 	char *moreprompt = "more> ";
 	struct console *c = console_drivers;
 	unsigned long uninitialized_var(flags);
 
-	local_irq_save(flags);
-	saved_trap_printk = kdb_trap_printk;
-	kdb_trap_printk = 0;
-
 	/* Serialize kdb_printf if multiple cpus try to write at once.
 	 * But if any cpu goes recursive in kdb, just print the output,
 	 * even if it is interleaved with any other text.
 	 */
+	local_irq_save(flags);
 	this_cpu = smp_processor_id();
 	for (;;) {
 		old_cpu = cmpxchg(&kdb_printf_cpu, -1, this_cpu);
@@ -849,7 +845,6 @@ kdb_print_out:
 		console_loglevel = saved_loglevel;
 	/* kdb_printf_cpu locked the code above. */
 	smp_store_release(&kdb_printf_cpu, old_cpu);
-	kdb_trap_printk = saved_trap_printk;
 	local_irq_restore(flags);
 	return retlen;
 }
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 577f2288d19f..a3ce35e0fa1e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1926,7 +1926,8 @@ int vprintk_default(const char *fmt, va_list args)
 	int r;
 
 #ifdef CONFIG_KGDB_KDB
-	if (unlikely(kdb_trap_printk)) {
+	/* Allow to pass printk() to kdb but avoid a recursion. */
+	if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) {
 		r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
 		return r;
 	}
-- 
cgit v1.2.3


From 249e52e35580fcfe5dad53a7dcd7c1252788749c Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@oracle.com>
Date: Wed, 14 Dec 2016 15:06:21 -0800
Subject: kernel/watchdog.c: move shared definitions to nmi.h

Patch series "Clean up watchdog handlers", v2.

This is an attempt to cleanup watchdog handlers.  Right now,
kernel/watchdog.c implements both softlockup and hardlockup detectors.
Softlockup code is generic.  Hardlockup code is arch specific.  Some
architectures don't use hardlockup detectors.  They use their own
watchdog detectors.  To make both these combination work, we have
numerous #ifdefs in kernel/watchdog.c.

We are trying here to make these handlers independent of each other.
Also provide an interface for architectures to implement their own
handlers.  watchdog_nmi_enable and watchdog_nmi_disable will be defined
as weak such that architectures can override its definitions.

Thanks to Don Zickus for his suggestions.
Here are our previous discussions
http://www.spinics.net/lists/sparclinux/msg16543.html
http://www.spinics.net/lists/sparclinux/msg16441.html

This patch (of 3):

Move shared macros and definitions to nmi.h so that watchdog.c, new file
watchdog_hld.c or any other architecture specific handler can use those
definitions.

Link: http://lkml.kernel.org/r/1478034826-43888-2-git-send-email-babu.moger@oracle.com
Signed-off-by: Babu Moger <babu.moger@oracle.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Josh Hunt <johunt@akamai.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nmi.h | 24 ++++++++++++++++++++++++
 kernel/watchdog.c   | 28 ++++------------------------
 2 files changed, 28 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a78c35cff1ae..aacca824a6ae 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,23 @@
 #include <linux/sched.h>
 #include <asm/irq.h>
 
+/*
+ * The run state of the lockup detectors is controlled by the content of the
+ * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
+ * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
+ *
+ * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
+ * are variables that are only used as an 'interface' between the parameters
+ * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
+ * 'watchdog_thresh' variable is handled differently because its value is not
+ * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
+ * is equal zero.
+ */
+#define NMI_WATCHDOG_ENABLED_BIT   0
+#define SOFT_WATCHDOG_ENABLED_BIT  1
+#define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
+#define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
+
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
  * 
@@ -91,9 +108,16 @@ extern int nmi_watchdog_enabled;
 extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
+extern unsigned long watchdog_enabled;
 extern unsigned long *watchdog_cpumask_bits;
+#ifdef CONFIG_SMP
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;
+#else
+#define sysctl_softlockup_all_cpu_backtrace 0
+#define sysctl_hardlockup_all_cpu_backtrace 0
+#endif
+extern bool is_hardlockup(void);
 struct ctl_table;
 extern int proc_watchdog(struct ctl_table *, int ,
 			 void __user *, size_t *, loff_t *);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6d1020c03d41..d21049496e26 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -27,29 +27,12 @@
 #include <linux/perf_event.h>
 #include <linux/kthread.h>
 
-/*
- * The run state of the lockup detectors is controlled by the content of the
- * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
- * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
- *
- * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
- * are variables that are only used as an 'interface' between the parameters
- * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
- * 'watchdog_thresh' variable is handled differently because its value is not
- * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
- * is equal zero.
- */
-#define NMI_WATCHDOG_ENABLED_BIT   0
-#define SOFT_WATCHDOG_ENABLED_BIT  1
-#define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
-#define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
-
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
 #else
-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
 #endif
 int __read_mostly nmi_watchdog_enabled;
 int __read_mostly soft_watchdog_enabled;
@@ -59,9 +42,6 @@ int __read_mostly watchdog_thresh = 10;
 #ifdef CONFIG_SMP
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
-#else
-#define sysctl_softlockup_all_cpu_backtrace 0
-#define sysctl_hardlockup_all_cpu_backtrace 0
 #endif
 static struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -289,7 +269,7 @@ void touch_softlockup_watchdog_sync(void)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 /* watchdog detector functions */
-static bool is_hardlockup(void)
+bool is_hardlockup(void)
 {
 	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 
-- 
cgit v1.2.3


From 5b56d49fc31dbb0487e14ead790fc81ca9fb2c99 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Wed, 14 Dec 2016 15:06:52 -0800
Subject: mm: add locked parameter to get_user_pages_remote()

Patch series "mm: unexport __get_user_pages_unlocked()".

This patch series continues the cleanup of get_user_pages*() functions
taking advantage of the fact we can now pass gup_flags as we please.

It firstly adds an additional 'locked' parameter to
get_user_pages_remote() to allow for its callers to utilise
VM_FAULT_RETRY functionality.  This is necessary as the invocation of
__get_user_pages_unlocked() in process_vm_rw_single_vec() makes use of
this and no other existing higher level function would allow it to do
so.

Secondly existing callers of __get_user_pages_unlocked() are replaced
with the appropriate higher-level replacement -
get_user_pages_unlocked() if the current task and memory descriptor are
referenced, or get_user_pages_remote() if other task/memory descriptors
are referenced (having acquiring mmap_sem.)

This patch (of 2):

Add a int *locked parameter to get_user_pages_remote() to allow
VM_FAULT_RETRY faulting behaviour similar to get_user_pages_[un]locked().

Taking into account the previous adjustments to get_user_pages*()
functions allowing for the passing of gup_flags, we are now in a
position where __get_user_pages_unlocked() need only be exported for his
ability to allow VM_FAULT_RETRY behaviour, this adjustment allows us to
subsequently unexport __get_user_pages_unlocked() as well as allowing
for future flexibility in the use of get_user_pages_remote().

[sfr@canb.auug.org.au: merge fix for get_user_pages_remote API change]
  Link: http://lkml.kernel.org/r/20161122210511.024ec341@canb.auug.org.au
Link: http://lkml.kernel.org/r/20161027095141.2569-2-lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  2 +-
 drivers/infiniband/core/umem_odp.c      |  2 +-
 drivers/vfio/vfio_iommu_type1.c         |  2 +-
 fs/exec.c                               |  2 +-
 include/linux/mm.h                      |  2 +-
 kernel/events/uprobes.c                 |  4 ++--
 mm/gup.c                                | 12 ++++++++----
 mm/memory.c                             |  2 +-
 security/tomoyo/domain.c                |  2 +-
 10 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 7d066a91d778..6f5dfabb3096 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -759,7 +759,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
 	down_read(&mm->mmap_sem);
 	while (pinned < npages) {
 		ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
-					    flags, pvec + pinned, NULL);
+					    flags, pvec + pinned, NULL, NULL);
 		if (ret < 0)
 			break;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 107ddf51065e..d068af2ec3a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -515,7 +515,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 					 obj->userptr.ptr + pinned * PAGE_SIZE,
 					 npages - pinned,
 					 flags,
-					 pvec + pinned, NULL);
+					 pvec + pinned, NULL, NULL);
 				if (ret < 0)
 					break;
 
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 1f0fe3217f23..6b079a31dced 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -578,7 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 		 */
 		npages = get_user_pages_remote(owning_process, owning_mm,
 				user_virt, gup_num_pages,
-				flags, local_page_list, NULL);
+				flags, local_page_list, NULL, NULL);
 		up_read(&owning_mm->mmap_sem);
 
 		if (npages < 0)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 9815e45c23c4..f3726ba12aa6 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -362,7 +362,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 
 		down_read(&mm->mmap_sem);
 		ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
-					    NULL);
+					    NULL, NULL);
 		up_read(&mm->mmap_sem);
 	}
 
diff --git a/fs/exec.c b/fs/exec.c
index 923c57d96899..eac60886b0ab 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -209,7 +209,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 	 * doing the exec and bprm->mm is the new process's mm.
 	 */
 	ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
-			&page, NULL);
+			&page, NULL, NULL);
 	if (ret <= 0)
 		return NULL;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a92c8d73aeaf..cc154454675a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1274,7 +1274,7 @@ extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long start, unsigned long nr_pages,
 			    unsigned int gup_flags, struct page **pages,
-			    struct vm_area_struct **vmas);
+			    struct vm_area_struct **vmas, int *locked);
 long get_user_pages(unsigned long start, unsigned long nr_pages,
 			    unsigned int gup_flags, struct page **pages,
 			    struct vm_area_struct **vmas);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f9ec9add2164..215871bda3a2 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -301,7 +301,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
 retry:
 	/* Read the page with vaddr into memory */
 	ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page,
-			&vma);
+			&vma, NULL);
 	if (ret <= 0)
 		return ret;
 
@@ -1712,7 +1712,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
 	 * essentially a kernel access to the memory.
 	 */
 	result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
-			NULL);
+			NULL, NULL);
 	if (result < 0)
 		return result;
 
diff --git a/mm/gup.c b/mm/gup.c
index e50178c58b97..b64c907aa4f0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -917,6 +917,9 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
  *		only intends to ensure the pages are faulted in.
  * @vmas:	array of pointers to vmas corresponding to each page.
  *		Or NULL if the caller does not require them.
+ * @locked:	pointer to lock flag indicating whether lock is held and
+ *		subsequently whether VM_FAULT_RETRY functionality can be
+ *		utilised. Lock must initially be held.
  *
  * Returns number of pages pinned. This may be fewer than the number
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
@@ -960,10 +963,10 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, unsigned long nr_pages,
 		unsigned int gup_flags, struct page **pages,
-		struct vm_area_struct **vmas)
+		struct vm_area_struct **vmas, int *locked)
 {
 	return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
-				       NULL, false,
+				       locked, true,
 				       gup_flags | FOLL_TOUCH | FOLL_REMOTE);
 }
 EXPORT_SYMBOL(get_user_pages_remote);
@@ -971,8 +974,9 @@ EXPORT_SYMBOL(get_user_pages_remote);
 /*
  * This is the same as get_user_pages_remote(), just with a
  * less-flexible calling convention where we assume that the task
- * and mm being operated on are the current task's.  We also
- * obviously don't pass FOLL_REMOTE in here.
+ * and mm being operated on are the current task's and don't allow
+ * passing of a locked parameter.  We also obviously don't pass
+ * FOLL_REMOTE in here.
  */
 long get_user_pages(unsigned long start, unsigned long nr_pages,
 		unsigned int gup_flags, struct page **pages,
diff --git a/mm/memory.c b/mm/memory.c
index c264f7cd3e47..3a6a1239c42b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3919,7 +3919,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
 		struct page *page = NULL;
 
 		ret = get_user_pages_remote(tsk, mm, addr, 1,
-				gup_flags, &page, &vma);
+				gup_flags, &page, &vma, NULL);
 		if (ret <= 0) {
 #ifndef CONFIG_HAVE_IOREMAP_PROT
 			break;
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 682b73af7766..838ffa78cfda 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -881,7 +881,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
 	 * the execve().
 	 */
 	if (get_user_pages_remote(current, bprm->mm, pos, 1,
-				FOLL_FORCE, &page, NULL) <= 0)
+				FOLL_FORCE, &page, NULL, NULL) <= 0)
 		return false;
 #else
 	page = bprm->page[pos / PAGE_SIZE];
-- 
cgit v1.2.3


From 8b7457ef9a9eb46cd1675d40d8e1fd3c47a38395 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Wed, 14 Dec 2016 15:06:55 -0800
Subject: mm: unexport __get_user_pages_unlocked()

Unexport the low-level __get_user_pages_unlocked() function and replaces
invocations with calls to more appropriate higher-level functions.

In hva_to_pfn_slow() we are able to replace __get_user_pages_unlocked()
with get_user_pages_unlocked() since we can now pass gup_flags.

In async_pf_execute() and process_vm_rw_single_vec() we need to pass
different tsk, mm arguments so get_user_pages_remote() is the sane
replacement in these cases (having added manual acquisition and release
of mmap_sem.)

Additionally get_user_pages_remote() reintroduces use of the FOLL_TOUCH
flag.  However, this flag was originally silently dropped by commit
1e9877902dc7 ("mm/gup: Introduce get_user_pages_remote()"), so this
appears to have been unintentional and reintroducing it is therefore not
an issue.

[akpm@linux-foundation.org: coding-style fixes]
Link: http://lkml.kernel.org/r/20161027095141.2569-3-lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h     |  3 ---
 mm/gup.c               |  8 ++++----
 mm/nommu.c             |  8 ++++----
 mm/process_vm_access.c | 12 ++++++++----
 virt/kvm/async_pf.c    | 10 +++++++---
 virt/kvm/kvm_main.c    |  5 ++---
 6 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cc154454675a..7b2d14ed3815 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1280,9 +1280,6 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 			    struct vm_area_struct **vmas);
 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages, int *locked);
-long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
-			       unsigned long start, unsigned long nr_pages,
-			       struct page **pages, unsigned int gup_flags);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
diff --git a/mm/gup.c b/mm/gup.c
index b64c907aa4f0..55315555489d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -865,9 +865,10 @@ EXPORT_SYMBOL(get_user_pages_locked);
  * caller if required (just like with __get_user_pages). "FOLL_GET"
  * is set implicitly if "pages" is non-NULL.
  */
-__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
-					       unsigned long start, unsigned long nr_pages,
-					       struct page **pages, unsigned int gup_flags)
+static __always_inline long __get_user_pages_unlocked(struct task_struct *tsk,
+		struct mm_struct *mm, unsigned long start,
+		unsigned long nr_pages, struct page **pages,
+		unsigned int gup_flags)
 {
 	long ret;
 	int locked = 1;
@@ -879,7 +880,6 @@ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct m
 		up_read(&mm->mmap_sem);
 	return ret;
 }
-EXPORT_SYMBOL(__get_user_pages_unlocked);
 
 /*
  * get_user_pages_unlocked() is suitable to replace the form:
diff --git a/mm/nommu.c b/mm/nommu.c
index 9720e0bab029..c299a7fbca70 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -176,9 +176,10 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
 }
 EXPORT_SYMBOL(get_user_pages_locked);
 
-long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
-			       unsigned long start, unsigned long nr_pages,
-			       struct page **pages, unsigned int gup_flags)
+static long __get_user_pages_unlocked(struct task_struct *tsk,
+			struct mm_struct *mm, unsigned long start,
+			unsigned long nr_pages, struct page **pages,
+			unsigned int gup_flags)
 {
 	long ret;
 	down_read(&mm->mmap_sem);
@@ -187,7 +188,6 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
 	up_read(&mm->mmap_sem);
 	return ret;
 }
-EXPORT_SYMBOL(__get_user_pages_unlocked);
 
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 			     struct page **pages, unsigned int gup_flags)
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index be8dc8d1edb9..84d0c7eada2b 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -88,7 +88,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
 	ssize_t rc = 0;
 	unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
 		/ sizeof(struct pages *);
-	unsigned int flags = FOLL_REMOTE;
+	unsigned int flags = 0;
 
 	/* Work out address and page range required */
 	if (len == 0)
@@ -100,15 +100,19 @@ static int process_vm_rw_single_vec(unsigned long addr,
 
 	while (!rc && nr_pages && iov_iter_count(iter)) {
 		int pages = min(nr_pages, max_pages_per_loop);
+		int locked = 1;
 		size_t bytes;
 
 		/*
 		 * Get the pages we're interested in.  We must
-		 * add FOLL_REMOTE because task/mm might not
+		 * access remotely because task/mm might not
 		 * current/current->mm
 		 */
-		pages = __get_user_pages_unlocked(task, mm, pa, pages,
-						  process_pages, flags);
+		down_read(&mm->mmap_sem);
+		pages = get_user_pages_remote(task, mm, pa, pages, flags,
+					      process_pages, NULL, &locked);
+		if (locked)
+			up_read(&mm->mmap_sem);
 		if (pages <= 0)
 			return -EFAULT;
 
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index efeceb0a222d..3815e940fbea 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -76,16 +76,20 @@ static void async_pf_execute(struct work_struct *work)
 	struct kvm_vcpu *vcpu = apf->vcpu;
 	unsigned long addr = apf->addr;
 	gva_t gva = apf->gva;
+	int locked = 1;
 
 	might_sleep();
 
 	/*
 	 * This work is run asynchromously to the task which owns
 	 * mm and might be done in another context, so we must
-	 * use FOLL_REMOTE.
+	 * access remotely.
 	 */
-	__get_user_pages_unlocked(NULL, mm, addr, 1, NULL,
-			FOLL_WRITE | FOLL_REMOTE);
+	down_read(&mm->mmap_sem);
+	get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL,
+			&locked);
+	if (locked)
+		up_read(&mm->mmap_sem);
 
 	kvm_async_page_present_sync(vcpu, apf);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 823544c166be..de102cae7125 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1418,13 +1418,12 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
 		npages = get_user_page_nowait(addr, write_fault, page);
 		up_read(&current->mm->mmap_sem);
 	} else {
-		unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON;
+		unsigned int flags = FOLL_HWPOISON;
 
 		if (write_fault)
 			flags |= FOLL_WRITE;
 
-		npages = __get_user_pages_unlocked(current, current->mm, addr, 1,
-						   page, flags);
+		npages = get_user_pages_unlocked(addr, 1, page, flags);
 	}
 	if (npages != 1)
 		return npages;
-- 
cgit v1.2.3


From 82b0f8c39a3869b6fd2a10e180a862248736ec6f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Dec 2016 15:06:58 -0800
Subject: mm: join struct fault_env and vm_fault

Currently we have two different structures for passing fault information
around - struct vm_fault and struct fault_env.  DAX will need more
information in struct vm_fault to handle its faults so the content of
that structure would become event closer to fault_env.  Furthermore it
would need to generate struct fault_env to be able to call some of the
generic functions.  So at this point I don't think there's much use in
keeping these two structures separate.  Just embed into struct vm_fault
all that is needed to use it for both purposes.

Link: http://lkml.kernel.org/r/1479460644-25076-2-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/Locking |   2 +-
 fs/userfaultfd.c                  |  22 +-
 include/linux/huge_mm.h           |  10 +-
 include/linux/mm.h                |  28 +-
 include/linux/userfaultfd_k.h     |   4 +-
 mm/filemap.c                      |  14 +-
 mm/huge_memory.c                  | 173 ++++++------
 mm/internal.h                     |   2 +-
 mm/khugepaged.c                   |  20 +-
 mm/memory.c                       | 568 +++++++++++++++++++-------------------
 mm/nommu.c                        |   2 +-
 11 files changed, 423 insertions(+), 422 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 1b5f15653b1b..69e2387ca278 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -556,7 +556,7 @@ till "end_pgoff". ->map_pages() is called with page table locked and must
 not block.  If it's not possible to reach a page without blocking,
 filesystem should skip it. Filesystem should use do_set_pte() to setup
 page table entry. Pointer to entry associated with the page is passed in
-"pte" field in fault_env structure. Pointers to entries for other offsets
+"pte" field in vm_fault structure. Pointers to entries for other offsets
 should be calculated relative to "pte".
 
 	->page_mkwrite() is called when a previously read-only pte is
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 85959d8324df..d96e2f30084b 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -257,9 +257,9 @@ out:
  * fatal_signal_pending()s, and the mmap_sem must be released before
  * returning it.
  */
-int handle_userfault(struct fault_env *fe, unsigned long reason)
+int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 {
-	struct mm_struct *mm = fe->vma->vm_mm;
+	struct mm_struct *mm = vmf->vma->vm_mm;
 	struct userfaultfd_ctx *ctx;
 	struct userfaultfd_wait_queue uwq;
 	int ret;
@@ -268,7 +268,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 	BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
 	ret = VM_FAULT_SIGBUS;
-	ctx = fe->vma->vm_userfaultfd_ctx.ctx;
+	ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
 	if (!ctx)
 		goto out;
 
@@ -301,17 +301,18 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 	 * without first stopping userland access to the memory. For
 	 * VM_UFFD_MISSING userfaults this is enough for now.
 	 */
-	if (unlikely(!(fe->flags & FAULT_FLAG_ALLOW_RETRY))) {
+	if (unlikely(!(vmf->flags & FAULT_FLAG_ALLOW_RETRY))) {
 		/*
 		 * Validate the invariant that nowait must allow retry
 		 * to be sure not to return SIGBUS erroneously on
 		 * nowait invocations.
 		 */
-		BUG_ON(fe->flags & FAULT_FLAG_RETRY_NOWAIT);
+		BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
 #ifdef CONFIG_DEBUG_VM
 		if (printk_ratelimit()) {
 			printk(KERN_WARNING
-			       "FAULT_FLAG_ALLOW_RETRY missing %x\n", fe->flags);
+			       "FAULT_FLAG_ALLOW_RETRY missing %x\n",
+			       vmf->flags);
 			dump_stack();
 		}
 #endif
@@ -323,7 +324,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 	 * and wait.
 	 */
 	ret = VM_FAULT_RETRY;
-	if (fe->flags & FAULT_FLAG_RETRY_NOWAIT)
+	if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
 		goto out;
 
 	/* take the reference before dropping the mmap_sem */
@@ -331,11 +332,11 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 
 	init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
 	uwq.wq.private = current;
-	uwq.msg = userfault_msg(fe->address, fe->flags, reason);
+	uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
 	uwq.ctx = ctx;
 
 	return_to_userland =
-		(fe->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
+		(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
 		(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
 
 	spin_lock(&ctx->fault_pending_wqh.lock);
@@ -353,7 +354,8 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 			  TASK_KILLABLE);
 	spin_unlock(&ctx->fault_pending_wqh.lock);
 
-	must_wait = userfaultfd_must_wait(ctx, fe->address, fe->flags, reason);
+	must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
+					  reason);
 	up_read(&mm->mmap_sem);
 
 	if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 1f782aa1d8e6..97e478d6b690 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -1,12 +1,12 @@
 #ifndef _LINUX_HUGE_MM_H
 #define _LINUX_HUGE_MM_H
 
-extern int do_huge_pmd_anonymous_page(struct fault_env *fe);
+extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 			 struct vm_area_struct *vma);
-extern void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd);
-extern int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd);
+extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
+extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
 extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 					  unsigned long addr,
 					  pmd_t *pmd,
@@ -142,7 +142,7 @@ static inline int hpage_nr_pages(struct page *page)
 	return 1;
 }
 
-extern int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd);
+extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
 
@@ -212,7 +212,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 	return NULL;
 }
 
-static inline int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd)
+static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
 	return 0;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b2d14ed3815..de5bcead2511 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -292,10 +292,16 @@ extern pgprot_t protection_map[16];
  * pgoff should be used in favour of virtual_address, if possible.
  */
 struct vm_fault {
+	struct vm_area_struct *vma;	/* Target VMA */
 	unsigned int flags;		/* FAULT_FLAG_xxx flags */
 	gfp_t gfp_mask;			/* gfp mask to be used for allocations */
 	pgoff_t pgoff;			/* Logical page offset based on vma */
-	void __user *virtual_address;	/* Faulting virtual address */
+	unsigned long address;		/* Faulting virtual address */
+	void __user *virtual_address;	/* Faulting virtual address masked by
+					 * PAGE_MASK */
+	pmd_t *pmd;			/* Pointer to pmd entry matching
+					 * the 'address'
+					 */
 
 	struct page *cow_page;		/* Handler may choose to COW */
 	struct page *page;		/* ->fault handlers should return a
@@ -309,19 +315,7 @@ struct vm_fault {
 					 * VM_FAULT_DAX_LOCKED and fill in
 					 * entry here.
 					 */
-};
-
-/*
- * Page fault context: passes though page fault handler instead of endless list
- * of function arguments.
- */
-struct fault_env {
-	struct vm_area_struct *vma;	/* Target VMA */
-	unsigned long address;		/* Faulting virtual address */
-	unsigned int flags;		/* FAULT_FLAG_xxx flags */
-	pmd_t *pmd;			/* Pointer to pmd entry matching
-					 * the 'address'
-					 */
+	/* These three entries are valid only while holding ptl lock */
 	pte_t *pte;			/* Pointer to pte entry matching
 					 * the 'address'. NULL if the page
 					 * table hasn't been allocated.
@@ -351,7 +345,7 @@ struct vm_operations_struct {
 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
 	int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
 						pmd_t *, unsigned int flags);
-	void (*map_pages)(struct fault_env *fe,
+	void (*map_pages)(struct vm_fault *vmf,
 			pgoff_t start_pgoff, pgoff_t end_pgoff);
 
 	/* notification that a previously read-only page is about to become
@@ -625,7 +619,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 	return pte;
 }
 
-int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
+int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page);
 #endif
 
@@ -2094,7 +2088,7 @@ extern void truncate_inode_pages_final(struct address_space *);
 
 /* generic vm_area_ops exported for stackable file systems */
 extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
-extern void filemap_map_pages(struct fault_env *fe,
+extern void filemap_map_pages(struct vm_fault *vmf,
 		pgoff_t start_pgoff, pgoff_t end_pgoff);
 extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index dd66a952e8cd..11b92b047a1e 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -27,7 +27,7 @@
 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
-extern int handle_userfault(struct fault_env *fe, unsigned long reason);
+extern int handle_userfault(struct vm_fault *vmf, unsigned long reason);
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 			    unsigned long src_start, unsigned long len);
@@ -55,7 +55,7 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
-static inline int handle_userfault(struct fault_env *fe, unsigned long reason)
+static inline int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 {
 	return VM_FAULT_SIGBUS;
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index 69568388c699..235021e361eb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2164,12 +2164,12 @@ page_not_uptodate:
 }
 EXPORT_SYMBOL(filemap_fault);
 
-void filemap_map_pages(struct fault_env *fe,
+void filemap_map_pages(struct vm_fault *vmf,
 		pgoff_t start_pgoff, pgoff_t end_pgoff)
 {
 	struct radix_tree_iter iter;
 	void **slot;
-	struct file *file = fe->vma->vm_file;
+	struct file *file = vmf->vma->vm_file;
 	struct address_space *mapping = file->f_mapping;
 	pgoff_t last_pgoff = start_pgoff;
 	loff_t size;
@@ -2225,11 +2225,11 @@ repeat:
 		if (file->f_ra.mmap_miss > 0)
 			file->f_ra.mmap_miss--;
 
-		fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
-		if (fe->pte)
-			fe->pte += iter.index - last_pgoff;
+		vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT;
+		if (vmf->pte)
+			vmf->pte += iter.index - last_pgoff;
 		last_pgoff = iter.index;
-		if (alloc_set_pte(fe, NULL, page))
+		if (alloc_set_pte(vmf, NULL, page))
 			goto unlock;
 		unlock_page(page);
 		goto next;
@@ -2239,7 +2239,7 @@ skip:
 		put_page(page);
 next:
 		/* Huge page is mapped? No need to proceed. */
-		if (pmd_trans_huge(*fe->pmd))
+		if (pmd_trans_huge(*vmf->pmd))
 			break;
 		if (iter.index == end_pgoff)
 			break;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cee42cf05477..10eedbf14421 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -542,13 +542,13 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
 
-static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
+static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 		gfp_t gfp)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct mem_cgroup *memcg;
 	pgtable_t pgtable;
-	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
@@ -573,9 +573,9 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
 	 */
 	__SetPageUptodate(page);
 
-	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
-	if (unlikely(!pmd_none(*fe->pmd))) {
-		spin_unlock(fe->ptl);
+	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+	if (unlikely(!pmd_none(*vmf->pmd))) {
+		spin_unlock(vmf->ptl);
 		mem_cgroup_cancel_charge(page, memcg, true);
 		put_page(page);
 		pte_free(vma->vm_mm, pgtable);
@@ -586,11 +586,11 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
 		if (userfaultfd_missing(vma)) {
 			int ret;
 
-			spin_unlock(fe->ptl);
+			spin_unlock(vmf->ptl);
 			mem_cgroup_cancel_charge(page, memcg, true);
 			put_page(page);
 			pte_free(vma->vm_mm, pgtable);
-			ret = handle_userfault(fe, VM_UFFD_MISSING);
+			ret = handle_userfault(vmf, VM_UFFD_MISSING);
 			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 			return ret;
 		}
@@ -600,11 +600,11 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
 		page_add_new_anon_rmap(page, vma, haddr, true);
 		mem_cgroup_commit_charge(page, memcg, false, true);
 		lru_cache_add_active_or_unevictable(page, vma);
-		pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, pgtable);
-		set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
+		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 		atomic_long_inc(&vma->vm_mm->nr_ptes);
-		spin_unlock(fe->ptl);
+		spin_unlock(vmf->ptl);
 		count_vm_event(THP_FAULT_ALLOC);
 	}
 
@@ -651,12 +651,12 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 	return true;
 }
 
-int do_huge_pmd_anonymous_page(struct fault_env *fe)
+int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	gfp_t gfp;
 	struct page *page;
-	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 
 	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
 		return VM_FAULT_FALLBACK;
@@ -664,7 +664,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
 		return VM_FAULT_OOM;
 	if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
 		return VM_FAULT_OOM;
-	if (!(fe->flags & FAULT_FLAG_WRITE) &&
+	if (!(vmf->flags & FAULT_FLAG_WRITE) &&
 			!mm_forbids_zeropage(vma->vm_mm) &&
 			transparent_hugepage_use_zero_page()) {
 		pgtable_t pgtable;
@@ -680,22 +680,22 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
 			count_vm_event(THP_FAULT_FALLBACK);
 			return VM_FAULT_FALLBACK;
 		}
-		fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
+		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
 		ret = 0;
 		set = false;
-		if (pmd_none(*fe->pmd)) {
+		if (pmd_none(*vmf->pmd)) {
 			if (userfaultfd_missing(vma)) {
-				spin_unlock(fe->ptl);
-				ret = handle_userfault(fe, VM_UFFD_MISSING);
+				spin_unlock(vmf->ptl);
+				ret = handle_userfault(vmf, VM_UFFD_MISSING);
 				VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 			} else {
 				set_huge_zero_page(pgtable, vma->vm_mm, vma,
-						   haddr, fe->pmd, zero_page);
-				spin_unlock(fe->ptl);
+						   haddr, vmf->pmd, zero_page);
+				spin_unlock(vmf->ptl);
 				set = true;
 			}
 		} else
-			spin_unlock(fe->ptl);
+			spin_unlock(vmf->ptl);
 		if (!set)
 			pte_free(vma->vm_mm, pgtable);
 		return ret;
@@ -707,7 +707,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
 		return VM_FAULT_FALLBACK;
 	}
 	prep_transhuge_page(page);
-	return __do_huge_pmd_anonymous_page(fe, page, gfp);
+	return __do_huge_pmd_anonymous_page(vmf, page, gfp);
 }
 
 static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -879,30 +879,30 @@ out:
 	return ret;
 }
 
-void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd)
+void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
 {
 	pmd_t entry;
 	unsigned long haddr;
 
-	fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd);
-	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
+	vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
+	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
 		goto unlock;
 
 	entry = pmd_mkyoung(orig_pmd);
-	haddr = fe->address & HPAGE_PMD_MASK;
-	if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry,
-				fe->flags & FAULT_FLAG_WRITE))
-		update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd);
+	haddr = vmf->address & HPAGE_PMD_MASK;
+	if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry,
+				vmf->flags & FAULT_FLAG_WRITE))
+		update_mmu_cache_pmd(vmf->vma, vmf->address, vmf->pmd);
 
 unlock:
-	spin_unlock(fe->ptl);
+	spin_unlock(vmf->ptl);
 }
 
-static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
+static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
 		struct page *page)
 {
-	struct vm_area_struct *vma = fe->vma;
-	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+	struct vm_area_struct *vma = vmf->vma;
+	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	struct mem_cgroup *memcg;
 	pgtable_t pgtable;
 	pmd_t _pmd;
@@ -921,7 +921,7 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
 		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
 					       __GFP_OTHER_NODE, vma,
-					       fe->address, page_to_nid(page));
+					       vmf->address, page_to_nid(page));
 		if (unlikely(!pages[i] ||
 			     mem_cgroup_try_charge(pages[i], vma->vm_mm,
 				     GFP_KERNEL, &memcg, false))) {
@@ -952,15 +952,15 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
 	mmun_end   = haddr + HPAGE_PMD_SIZE;
 	mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 
-	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
-	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
+	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
 		goto out_free_pages;
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 
-	pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
+	pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
 	/* leave pmd empty until pte is filled */
 
-	pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, fe->pmd);
+	pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, vmf->pmd);
 	pmd_populate(vma->vm_mm, &_pmd, pgtable);
 
 	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -969,20 +969,20 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		memcg = (void *)page_private(pages[i]);
 		set_page_private(pages[i], 0);
-		page_add_new_anon_rmap(pages[i], fe->vma, haddr, false);
+		page_add_new_anon_rmap(pages[i], vmf->vma, haddr, false);
 		mem_cgroup_commit_charge(pages[i], memcg, false, false);
 		lru_cache_add_active_or_unevictable(pages[i], vma);
-		fe->pte = pte_offset_map(&_pmd, haddr);
-		VM_BUG_ON(!pte_none(*fe->pte));
-		set_pte_at(vma->vm_mm, haddr, fe->pte, entry);
-		pte_unmap(fe->pte);
+		vmf->pte = pte_offset_map(&_pmd, haddr);
+		VM_BUG_ON(!pte_none(*vmf->pte));
+		set_pte_at(vma->vm_mm, haddr, vmf->pte, entry);
+		pte_unmap(vmf->pte);
 	}
 	kfree(pages);
 
 	smp_wmb(); /* make pte visible before pmd */
-	pmd_populate(vma->vm_mm, fe->pmd, pgtable);
+	pmd_populate(vma->vm_mm, vmf->pmd, pgtable);
 	page_remove_rmap(page, true);
-	spin_unlock(fe->ptl);
+	spin_unlock(vmf->ptl);
 
 	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 
@@ -993,7 +993,7 @@ out:
 	return ret;
 
 out_free_pages:
-	spin_unlock(fe->ptl);
+	spin_unlock(vmf->ptl);
 	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
 		memcg = (void *)page_private(pages[i]);
@@ -1005,23 +1005,23 @@ out_free_pages:
 	goto out;
 }
 
-int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
+int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL, *new_page;
 	struct mem_cgroup *memcg;
-	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 	gfp_t huge_gfp;			/* for allocation and charge */
 	int ret = 0;
 
-	fe->ptl = pmd_lockptr(vma->vm_mm, fe->pmd);
+	vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
 	VM_BUG_ON_VMA(!vma->anon_vma, vma);
 	if (is_huge_zero_pmd(orig_pmd))
 		goto alloc;
-	spin_lock(fe->ptl);
-	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
+	spin_lock(vmf->ptl);
+	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
 		goto out_unlock;
 
 	page = pmd_page(orig_pmd);
@@ -1034,13 +1034,13 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
 		pmd_t entry;
 		entry = pmd_mkyoung(orig_pmd);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-		if (pmdp_set_access_flags(vma, haddr, fe->pmd, entry,  1))
-			update_mmu_cache_pmd(vma, fe->address, fe->pmd);
+		if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry,  1))
+			update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 		ret |= VM_FAULT_WRITE;
 		goto out_unlock;
 	}
 	get_page(page);
-	spin_unlock(fe->ptl);
+	spin_unlock(vmf->ptl);
 alloc:
 	if (transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow()) {
@@ -1053,12 +1053,12 @@ alloc:
 		prep_transhuge_page(new_page);
 	} else {
 		if (!page) {
-			split_huge_pmd(vma, fe->pmd, fe->address);
+			split_huge_pmd(vma, vmf->pmd, vmf->address);
 			ret |= VM_FAULT_FALLBACK;
 		} else {
-			ret = do_huge_pmd_wp_page_fallback(fe, orig_pmd, page);
+			ret = do_huge_pmd_wp_page_fallback(vmf, orig_pmd, page);
 			if (ret & VM_FAULT_OOM) {
-				split_huge_pmd(vma, fe->pmd, fe->address);
+				split_huge_pmd(vma, vmf->pmd, vmf->address);
 				ret |= VM_FAULT_FALLBACK;
 			}
 			put_page(page);
@@ -1070,7 +1070,7 @@ alloc:
 	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
 					huge_gfp, &memcg, true))) {
 		put_page(new_page);
-		split_huge_pmd(vma, fe->pmd, fe->address);
+		split_huge_pmd(vma, vmf->pmd, vmf->address);
 		if (page)
 			put_page(page);
 		ret |= VM_FAULT_FALLBACK;
@@ -1090,11 +1090,11 @@ alloc:
 	mmun_end   = haddr + HPAGE_PMD_SIZE;
 	mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 
-	spin_lock(fe->ptl);
+	spin_lock(vmf->ptl);
 	if (page)
 		put_page(page);
-	if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) {
-		spin_unlock(fe->ptl);
+	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) {
+		spin_unlock(vmf->ptl);
 		mem_cgroup_cancel_charge(new_page, memcg, true);
 		put_page(new_page);
 		goto out_mn;
@@ -1102,12 +1102,12 @@ alloc:
 		pmd_t entry;
 		entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-		pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
+		pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr, true);
 		mem_cgroup_commit_charge(new_page, memcg, false, true);
 		lru_cache_add_active_or_unevictable(new_page, vma);
-		set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
-		update_mmu_cache_pmd(vma, fe->address, fe->pmd);
+		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
+		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 		if (!page) {
 			add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 		} else {
@@ -1117,13 +1117,13 @@ alloc:
 		}
 		ret |= VM_FAULT_WRITE;
 	}
-	spin_unlock(fe->ptl);
+	spin_unlock(vmf->ptl);
 out_mn:
 	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 out:
 	return ret;
 out_unlock:
-	spin_unlock(fe->ptl);
+	spin_unlock(vmf->ptl);
 	return ret;
 }
 
@@ -1196,12 +1196,12 @@ out:
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
+int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct anon_vma *anon_vma = NULL;
 	struct page *page;
-	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	int page_nid = -1, this_nid = numa_node_id();
 	int target_nid, last_cpupid = -1;
 	bool page_locked;
@@ -1209,8 +1209,8 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
 	bool was_writable;
 	int flags = 0;
 
-	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
-	if (unlikely(!pmd_same(pmd, *fe->pmd)))
+	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+	if (unlikely(!pmd_same(pmd, *vmf->pmd)))
 		goto out_unlock;
 
 	/*
@@ -1218,9 +1218,9 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
 	 * without disrupting NUMA hinting information. Do not relock and
 	 * check_same as the page may no longer be mapped.
 	 */
-	if (unlikely(pmd_trans_migrating(*fe->pmd))) {
-		page = pmd_page(*fe->pmd);
-		spin_unlock(fe->ptl);
+	if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
+		page = pmd_page(*vmf->pmd);
+		spin_unlock(vmf->ptl);
 		wait_on_page_locked(page);
 		goto out;
 	}
@@ -1253,7 +1253,7 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
 
 	/* Migration could have started since the pmd_trans_migrating check */
 	if (!page_locked) {
-		spin_unlock(fe->ptl);
+		spin_unlock(vmf->ptl);
 		wait_on_page_locked(page);
 		page_nid = -1;
 		goto out;
@@ -1264,12 +1264,12 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
 	 * to serialises splits
 	 */
 	get_page(page);
-	spin_unlock(fe->ptl);
+	spin_unlock(vmf->ptl);
 	anon_vma = page_lock_anon_vma_read(page);
 
 	/* Confirm the PMD did not change while page_table_lock was released */
-	spin_lock(fe->ptl);
-	if (unlikely(!pmd_same(pmd, *fe->pmd))) {
+	spin_lock(vmf->ptl);
+	if (unlikely(!pmd_same(pmd, *vmf->pmd))) {
 		unlock_page(page);
 		put_page(page);
 		page_nid = -1;
@@ -1287,9 +1287,9 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
 	 * Migrate the THP to the requested node, returns with page unlocked
 	 * and access rights restored.
 	 */
-	spin_unlock(fe->ptl);
+	spin_unlock(vmf->ptl);
 	migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
-				fe->pmd, pmd, fe->address, page, target_nid);
+				vmf->pmd, pmd, vmf->address, page, target_nid);
 	if (migrated) {
 		flags |= TNF_MIGRATED;
 		page_nid = target_nid;
@@ -1304,18 +1304,19 @@ clear_pmdnuma:
 	pmd = pmd_mkyoung(pmd);
 	if (was_writable)
 		pmd = pmd_mkwrite(pmd);
-	set_pmd_at(vma->vm_mm, haddr, fe->pmd, pmd);
-	update_mmu_cache_pmd(vma, fe->address, fe->pmd);
+	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
+	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 	unlock_page(page);
 out_unlock:
-	spin_unlock(fe->ptl);
+	spin_unlock(vmf->ptl);
 
 out:
 	if (anon_vma)
 		page_unlock_anon_vma_read(anon_vma);
 
 	if (page_nid != -1)
-		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, fe->flags);
+		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
+				vmf->flags);
 
 	return 0;
 }
diff --git a/mm/internal.h b/mm/internal.h
index 537ac9951f5f..093b1eacc91b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -36,7 +36,7 @@
 /* Do not use these with a slab allocator */
 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
-int do_swap_page(struct fault_env *fe, pte_t orig_pte);
+int do_swap_page(struct vm_fault *vmf, pte_t orig_pte);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 09460955e818..d950c2509161 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -877,7 +877,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 {
 	pte_t pteval;
 	int swapped_in = 0, ret = 0;
-	struct fault_env fe = {
+	struct vm_fault vmf = {
 		.vma = vma,
 		.address = address,
 		.flags = FAULT_FLAG_ALLOW_RETRY,
@@ -889,19 +889,19 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 		trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
 		return false;
 	}
-	fe.pte = pte_offset_map(pmd, address);
-	for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
-			fe.pte++, fe.address += PAGE_SIZE) {
-		pteval = *fe.pte;
+	vmf.pte = pte_offset_map(pmd, address);
+	for (; vmf.address < address + HPAGE_PMD_NR*PAGE_SIZE;
+			vmf.pte++, vmf.address += PAGE_SIZE) {
+		pteval = *vmf.pte;
 		if (!is_swap_pte(pteval))
 			continue;
 		swapped_in++;
-		ret = do_swap_page(&fe, pteval);
+		ret = do_swap_page(&vmf, pteval);
 
 		/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
 		if (ret & VM_FAULT_RETRY) {
 			down_read(&mm->mmap_sem);
-			if (hugepage_vma_revalidate(mm, address, &fe.vma)) {
+			if (hugepage_vma_revalidate(mm, address, &vmf.vma)) {
 				/* vma is no longer available, don't continue to swapin */
 				trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
 				return false;
@@ -915,10 +915,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 			return false;
 		}
 		/* pte is unmapped now, we need to map it */
-		fe.pte = pte_offset_map(pmd, fe.address);
+		vmf.pte = pte_offset_map(pmd, vmf.address);
 	}
-	fe.pte--;
-	pte_unmap(fe.pte);
+	vmf.pte--;
+	pte_unmap(vmf.pte);
 	trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 1);
 	return true;
 }
diff --git a/mm/memory.c b/mm/memory.c
index 3a6a1239c42b..512e1c359193 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2070,11 +2070,11 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
  * case, all we need to do here is to mark the page as writable and update
  * any related book-keeping.
  */
-static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
+static inline int wp_page_reuse(struct vm_fault *vmf, pte_t orig_pte,
 			struct page *page, int page_mkwrite, int dirty_shared)
-	__releases(fe->ptl)
+	__releases(vmf->ptl)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	pte_t entry;
 	/*
 	 * Clear the pages cpupid information as the existing
@@ -2084,12 +2084,12 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
 	if (page)
 		page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
 
-	flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
+	flush_cache_page(vma, vmf->address, pte_pfn(orig_pte));
 	entry = pte_mkyoung(orig_pte);
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-	if (ptep_set_access_flags(vma, fe->address, fe->pte, entry, 1))
-		update_mmu_cache(vma, fe->address, fe->pte);
-	pte_unmap_unlock(fe->pte, fe->ptl);
+	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
+		update_mmu_cache(vma, vmf->address, vmf->pte);
+	pte_unmap_unlock(vmf->pte, vmf->ptl);
 
 	if (dirty_shared) {
 		struct address_space *mapping;
@@ -2135,15 +2135,15 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
  *   held to the old page, as well as updating the rmap.
  * - In any case, unlock the PTL and drop the reference we took to the old page.
  */
-static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
+static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte,
 		struct page *old_page)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct mm_struct *mm = vma->vm_mm;
 	struct page *new_page = NULL;
 	pte_t entry;
 	int page_copied = 0;
-	const unsigned long mmun_start = fe->address & PAGE_MASK;
+	const unsigned long mmun_start = vmf->address & PAGE_MASK;
 	const unsigned long mmun_end = mmun_start + PAGE_SIZE;
 	struct mem_cgroup *memcg;
 
@@ -2151,15 +2151,16 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
 		goto oom;
 
 	if (is_zero_pfn(pte_pfn(orig_pte))) {
-		new_page = alloc_zeroed_user_highpage_movable(vma, fe->address);
+		new_page = alloc_zeroed_user_highpage_movable(vma,
+							      vmf->address);
 		if (!new_page)
 			goto oom;
 	} else {
 		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
-				fe->address);
+				vmf->address);
 		if (!new_page)
 			goto oom;
-		cow_user_page(new_page, old_page, fe->address, vma);
+		cow_user_page(new_page, old_page, vmf->address, vma);
 	}
 
 	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
@@ -2172,8 +2173,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
 	/*
 	 * Re-check the pte - we dropped the lock
 	 */
-	fe->pte = pte_offset_map_lock(mm, fe->pmd, fe->address, &fe->ptl);
-	if (likely(pte_same(*fe->pte, orig_pte))) {
+	vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
+	if (likely(pte_same(*vmf->pte, orig_pte))) {
 		if (old_page) {
 			if (!PageAnon(old_page)) {
 				dec_mm_counter_fast(mm,
@@ -2183,7 +2184,7 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
 		} else {
 			inc_mm_counter_fast(mm, MM_ANONPAGES);
 		}
-		flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
+		flush_cache_page(vma, vmf->address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		/*
@@ -2192,8 +2193,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
 		 * seen in the presence of one thread doing SMC and another
 		 * thread doing COW.
 		 */
-		ptep_clear_flush_notify(vma, fe->address, fe->pte);
-		page_add_new_anon_rmap(new_page, vma, fe->address, false);
+		ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
+		page_add_new_anon_rmap(new_page, vma, vmf->address, false);
 		mem_cgroup_commit_charge(new_page, memcg, false, false);
 		lru_cache_add_active_or_unevictable(new_page, vma);
 		/*
@@ -2201,8 +2202,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
 		 * mmu page tables (such as kvm shadow page tables), we want the
 		 * new page to be mapped directly into the secondary page table.
 		 */
-		set_pte_at_notify(mm, fe->address, fe->pte, entry);
-		update_mmu_cache(vma, fe->address, fe->pte);
+		set_pte_at_notify(mm, vmf->address, vmf->pte, entry);
+		update_mmu_cache(vma, vmf->address, vmf->pte);
 		if (old_page) {
 			/*
 			 * Only after switching the pte to the new page may
@@ -2239,7 +2240,7 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
 	if (new_page)
 		put_page(new_page);
 
-	pte_unmap_unlock(fe->pte, fe->ptl);
+	pte_unmap_unlock(vmf->pte, vmf->ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	if (old_page) {
 		/*
@@ -2267,43 +2268,43 @@ oom:
  * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
  * mapping
  */
-static int wp_pfn_shared(struct fault_env *fe,  pte_t orig_pte)
+static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 
 	if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
-		struct vm_fault vmf = {
+		struct vm_fault vmf2 = {
 			.page = NULL,
-			.pgoff = linear_page_index(vma, fe->address),
+			.pgoff = linear_page_index(vma, vmf->address),
 			.virtual_address =
-				(void __user *)(fe->address & PAGE_MASK),
+				(void __user *)(vmf->address & PAGE_MASK),
 			.flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
 		};
 		int ret;
 
-		pte_unmap_unlock(fe->pte, fe->ptl);
-		ret = vma->vm_ops->pfn_mkwrite(vma, &vmf);
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		ret = vma->vm_ops->pfn_mkwrite(vma, &vmf2);
 		if (ret & VM_FAULT_ERROR)
 			return ret;
-		fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
-				&fe->ptl);
+		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+				vmf->address, &vmf->ptl);
 		/*
 		 * We might have raced with another page fault while we
 		 * released the pte_offset_map_lock.
 		 */
-		if (!pte_same(*fe->pte, orig_pte)) {
-			pte_unmap_unlock(fe->pte, fe->ptl);
+		if (!pte_same(*vmf->pte, orig_pte)) {
+			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			return 0;
 		}
 	}
-	return wp_page_reuse(fe, orig_pte, NULL, 0, 0);
+	return wp_page_reuse(vmf, orig_pte, NULL, 0, 0);
 }
 
-static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
+static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte,
 		struct page *old_page)
-	__releases(fe->ptl)
+	__releases(vmf->ptl)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	int page_mkwrite = 0;
 
 	get_page(old_page);
@@ -2311,8 +2312,8 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
 	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
 		int tmp;
 
-		pte_unmap_unlock(fe->pte, fe->ptl);
-		tmp = do_page_mkwrite(vma, old_page, fe->address);
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		tmp = do_page_mkwrite(vma, old_page, vmf->address);
 		if (unlikely(!tmp || (tmp &
 				      (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
 			put_page(old_page);
@@ -2324,18 +2325,18 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
 		 * they did, we just return, as we can count on the
 		 * MMU to tell us if they didn't also make it writable.
 		 */
-		fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
-						 &fe->ptl);
-		if (!pte_same(*fe->pte, orig_pte)) {
+		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+						vmf->address, &vmf->ptl);
+		if (!pte_same(*vmf->pte, orig_pte)) {
 			unlock_page(old_page);
-			pte_unmap_unlock(fe->pte, fe->ptl);
+			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			put_page(old_page);
 			return 0;
 		}
 		page_mkwrite = 1;
 	}
 
-	return wp_page_reuse(fe, orig_pte, old_page, page_mkwrite, 1);
+	return wp_page_reuse(vmf, orig_pte, old_page, page_mkwrite, 1);
 }
 
 /*
@@ -2356,13 +2357,13 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
  * but allow concurrent faults), with pte both mapped and locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
-	__releases(fe->ptl)
+static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte)
+	__releases(vmf->ptl)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct page *old_page;
 
-	old_page = vm_normal_page(vma, fe->address, orig_pte);
+	old_page = vm_normal_page(vma, vmf->address, orig_pte);
 	if (!old_page) {
 		/*
 		 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
@@ -2373,10 +2374,10 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
 		 */
 		if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 				     (VM_WRITE|VM_SHARED))
-			return wp_pfn_shared(fe, orig_pte);
+			return wp_pfn_shared(vmf, orig_pte);
 
-		pte_unmap_unlock(fe->pte, fe->ptl);
-		return wp_page_copy(fe, orig_pte, old_page);
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		return wp_page_copy(vmf, orig_pte, old_page);
 	}
 
 	/*
@@ -2387,13 +2388,13 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
 		int total_mapcount;
 		if (!trylock_page(old_page)) {
 			get_page(old_page);
-			pte_unmap_unlock(fe->pte, fe->ptl);
+			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			lock_page(old_page);
-			fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd,
-					fe->address, &fe->ptl);
-			if (!pte_same(*fe->pte, orig_pte)) {
+			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+					vmf->address, &vmf->ptl);
+			if (!pte_same(*vmf->pte, orig_pte)) {
 				unlock_page(old_page);
-				pte_unmap_unlock(fe->pte, fe->ptl);
+				pte_unmap_unlock(vmf->pte, vmf->ptl);
 				put_page(old_page);
 				return 0;
 			}
@@ -2411,12 +2412,12 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
 				page_move_anon_rmap(old_page, vma);
 			}
 			unlock_page(old_page);
-			return wp_page_reuse(fe, orig_pte, old_page, 0, 0);
+			return wp_page_reuse(vmf, orig_pte, old_page, 0, 0);
 		}
 		unlock_page(old_page);
 	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 					(VM_WRITE|VM_SHARED))) {
-		return wp_page_shared(fe, orig_pte, old_page);
+		return wp_page_shared(vmf, orig_pte, old_page);
 	}
 
 	/*
@@ -2424,8 +2425,8 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
 	 */
 	get_page(old_page);
 
-	pte_unmap_unlock(fe->pte, fe->ptl);
-	return wp_page_copy(fe, orig_pte, old_page);
+	pte_unmap_unlock(vmf->pte, vmf->ptl);
+	return wp_page_copy(vmf, orig_pte, old_page);
 }
 
 static void unmap_mapping_range_vma(struct vm_area_struct *vma,
@@ -2513,9 +2514,9 @@ EXPORT_SYMBOL(unmap_mapping_range);
  * We return with the mmap_sem locked or unlocked in the same cases
  * as does filemap_fault().
  */
-int do_swap_page(struct fault_env *fe, pte_t orig_pte)
+int do_swap_page(struct vm_fault *vmf, pte_t orig_pte)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct page *page, *swapcache;
 	struct mem_cgroup *memcg;
 	swp_entry_t entry;
@@ -2524,17 +2525,18 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
 	int exclusive = 0;
 	int ret = 0;
 
-	if (!pte_unmap_same(vma->vm_mm, fe->pmd, fe->pte, orig_pte))
+	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, orig_pte))
 		goto out;
 
 	entry = pte_to_swp_entry(orig_pte);
 	if (unlikely(non_swap_entry(entry))) {
 		if (is_migration_entry(entry)) {
-			migration_entry_wait(vma->vm_mm, fe->pmd, fe->address);
+			migration_entry_wait(vma->vm_mm, vmf->pmd,
+					     vmf->address);
 		} else if (is_hwpoison_entry(entry)) {
 			ret = VM_FAULT_HWPOISON;
 		} else {
-			print_bad_pte(vma, fe->address, orig_pte, NULL);
+			print_bad_pte(vma, vmf->address, orig_pte, NULL);
 			ret = VM_FAULT_SIGBUS;
 		}
 		goto out;
@@ -2542,16 +2544,16 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
 	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
 	page = lookup_swap_cache(entry);
 	if (!page) {
-		page = swapin_readahead(entry,
-					GFP_HIGHUSER_MOVABLE, vma, fe->address);
+		page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vma,
+					vmf->address);
 		if (!page) {
 			/*
 			 * Back out if somebody else faulted in this pte
 			 * while we released the pte lock.
 			 */
-			fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd,
-					fe->address, &fe->ptl);
-			if (likely(pte_same(*fe->pte, orig_pte)))
+			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+					vmf->address, &vmf->ptl);
+			if (likely(pte_same(*vmf->pte, orig_pte)))
 				ret = VM_FAULT_OOM;
 			delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 			goto unlock;
@@ -2573,7 +2575,7 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
 	}
 
 	swapcache = page;
-	locked = lock_page_or_retry(page, vma->vm_mm, fe->flags);
+	locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
 
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 	if (!locked) {
@@ -2590,7 +2592,7 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
 	if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
 		goto out_page;
 
-	page = ksm_might_need_to_copy(page, vma, fe->address);
+	page = ksm_might_need_to_copy(page, vma, vmf->address);
 	if (unlikely(!page)) {
 		ret = VM_FAULT_OOM;
 		page = swapcache;
@@ -2606,9 +2608,9 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
 	/*
 	 * Back out if somebody else already faulted in this pte.
 	 */
-	fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
-			&fe->ptl);
-	if (unlikely(!pte_same(*fe->pte, orig_pte)))
+	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
+			&vmf->ptl);
+	if (unlikely(!pte_same(*vmf->pte, orig_pte)))
 		goto out_nomap;
 
 	if (unlikely(!PageUptodate(page))) {
@@ -2629,22 +2631,22 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
 	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
-	if ((fe->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
+	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
-		fe->flags &= ~FAULT_FLAG_WRITE;
+		vmf->flags &= ~FAULT_FLAG_WRITE;
 		ret |= VM_FAULT_WRITE;
 		exclusive = RMAP_EXCLUSIVE;
 	}
 	flush_icache_page(vma, page);
 	if (pte_swp_soft_dirty(orig_pte))
 		pte = pte_mksoft_dirty(pte);
-	set_pte_at(vma->vm_mm, fe->address, fe->pte, pte);
+	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
 	if (page == swapcache) {
-		do_page_add_anon_rmap(page, vma, fe->address, exclusive);
+		do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
 		mem_cgroup_commit_charge(page, memcg, true, false);
 		activate_page(page);
 	} else { /* ksm created a completely new copy */
-		page_add_new_anon_rmap(page, vma, fe->address, false);
+		page_add_new_anon_rmap(page, vma, vmf->address, false);
 		mem_cgroup_commit_charge(page, memcg, false, false);
 		lru_cache_add_active_or_unevictable(page, vma);
 	}
@@ -2667,22 +2669,22 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
 		put_page(swapcache);
 	}
 
-	if (fe->flags & FAULT_FLAG_WRITE) {
-		ret |= do_wp_page(fe, pte);
+	if (vmf->flags & FAULT_FLAG_WRITE) {
+		ret |= do_wp_page(vmf, pte);
 		if (ret & VM_FAULT_ERROR)
 			ret &= VM_FAULT_ERROR;
 		goto out;
 	}
 
 	/* No need to invalidate - it was non-present before */
-	update_mmu_cache(vma, fe->address, fe->pte);
+	update_mmu_cache(vma, vmf->address, vmf->pte);
 unlock:
-	pte_unmap_unlock(fe->pte, fe->ptl);
+	pte_unmap_unlock(vmf->pte, vmf->ptl);
 out:
 	return ret;
 out_nomap:
 	mem_cgroup_cancel_charge(page, memcg, false);
-	pte_unmap_unlock(fe->pte, fe->ptl);
+	pte_unmap_unlock(vmf->pte, vmf->ptl);
 out_page:
 	unlock_page(page);
 out_release:
@@ -2733,9 +2735,9 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static int do_anonymous_page(struct fault_env *fe)
+static int do_anonymous_page(struct vm_fault *vmf)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct mem_cgroup *memcg;
 	struct page *page;
 	pte_t entry;
@@ -2745,7 +2747,7 @@ static int do_anonymous_page(struct fault_env *fe)
 		return VM_FAULT_SIGBUS;
 
 	/* Check if we need to add a guard page to the stack */
-	if (check_stack_guard_page(vma, fe->address) < 0)
+	if (check_stack_guard_page(vma, vmf->address) < 0)
 		return VM_FAULT_SIGSEGV;
 
 	/*
@@ -2758,26 +2760,26 @@ static int do_anonymous_page(struct fault_env *fe)
 	 *
 	 * Here we only have down_read(mmap_sem).
 	 */
-	if (pte_alloc(vma->vm_mm, fe->pmd, fe->address))
+	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
 		return VM_FAULT_OOM;
 
 	/* See the comment in pte_alloc_one_map() */
-	if (unlikely(pmd_trans_unstable(fe->pmd)))
+	if (unlikely(pmd_trans_unstable(vmf->pmd)))
 		return 0;
 
 	/* Use the zero-page for reads */
-	if (!(fe->flags & FAULT_FLAG_WRITE) &&
+	if (!(vmf->flags & FAULT_FLAG_WRITE) &&
 			!mm_forbids_zeropage(vma->vm_mm)) {
-		entry = pte_mkspecial(pfn_pte(my_zero_pfn(fe->address),
+		entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
 						vma->vm_page_prot));
-		fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
-				&fe->ptl);
-		if (!pte_none(*fe->pte))
+		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+				vmf->address, &vmf->ptl);
+		if (!pte_none(*vmf->pte))
 			goto unlock;
 		/* Deliver the page fault to userland, check inside PT lock */
 		if (userfaultfd_missing(vma)) {
-			pte_unmap_unlock(fe->pte, fe->ptl);
-			return handle_userfault(fe, VM_UFFD_MISSING);
+			pte_unmap_unlock(vmf->pte, vmf->ptl);
+			return handle_userfault(vmf, VM_UFFD_MISSING);
 		}
 		goto setpte;
 	}
@@ -2785,7 +2787,7 @@ static int do_anonymous_page(struct fault_env *fe)
 	/* Allocate our own private page. */
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
-	page = alloc_zeroed_user_highpage_movable(vma, fe->address);
+	page = alloc_zeroed_user_highpage_movable(vma, vmf->address);
 	if (!page)
 		goto oom;
 
@@ -2803,30 +2805,30 @@ static int do_anonymous_page(struct fault_env *fe)
 	if (vma->vm_flags & VM_WRITE)
 		entry = pte_mkwrite(pte_mkdirty(entry));
 
-	fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
-			&fe->ptl);
-	if (!pte_none(*fe->pte))
+	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
+			&vmf->ptl);
+	if (!pte_none(*vmf->pte))
 		goto release;
 
 	/* Deliver the page fault to userland, check inside PT lock */
 	if (userfaultfd_missing(vma)) {
-		pte_unmap_unlock(fe->pte, fe->ptl);
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		mem_cgroup_cancel_charge(page, memcg, false);
 		put_page(page);
-		return handle_userfault(fe, VM_UFFD_MISSING);
+		return handle_userfault(vmf, VM_UFFD_MISSING);
 	}
 
 	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-	page_add_new_anon_rmap(page, vma, fe->address, false);
+	page_add_new_anon_rmap(page, vma, vmf->address, false);
 	mem_cgroup_commit_charge(page, memcg, false, false);
 	lru_cache_add_active_or_unevictable(page, vma);
 setpte:
-	set_pte_at(vma->vm_mm, fe->address, fe->pte, entry);
+	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
 
 	/* No need to invalidate - it was non-present before */
-	update_mmu_cache(vma, fe->address, fe->pte);
+	update_mmu_cache(vma, vmf->address, vmf->pte);
 unlock:
-	pte_unmap_unlock(fe->pte, fe->ptl);
+	pte_unmap_unlock(vmf->pte, vmf->ptl);
 	return 0;
 release:
 	mem_cgroup_cancel_charge(page, memcg, false);
@@ -2843,62 +2845,62 @@ oom:
  * released depending on flags and vma->vm_ops->fault() return value.
  * See filemap_fault() and __lock_page_retry().
  */
-static int __do_fault(struct fault_env *fe, pgoff_t pgoff,
+static int __do_fault(struct vm_fault *vmf, pgoff_t pgoff,
 		struct page *cow_page, struct page **page, void **entry)
 {
-	struct vm_area_struct *vma = fe->vma;
-	struct vm_fault vmf;
+	struct vm_area_struct *vma = vmf->vma;
+	struct vm_fault vmf2;
 	int ret;
 
-	vmf.virtual_address = (void __user *)(fe->address & PAGE_MASK);
-	vmf.pgoff = pgoff;
-	vmf.flags = fe->flags;
-	vmf.page = NULL;
-	vmf.gfp_mask = __get_fault_gfp_mask(vma);
-	vmf.cow_page = cow_page;
+	vmf2.virtual_address = (void __user *)(vmf->address & PAGE_MASK);
+	vmf2.pgoff = pgoff;
+	vmf2.flags = vmf->flags;
+	vmf2.page = NULL;
+	vmf2.gfp_mask = __get_fault_gfp_mask(vma);
+	vmf2.cow_page = cow_page;
 
-	ret = vma->vm_ops->fault(vma, &vmf);
+	ret = vma->vm_ops->fault(vma, &vmf2);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		return ret;
 	if (ret & VM_FAULT_DAX_LOCKED) {
-		*entry = vmf.entry;
+		*entry = vmf2.entry;
 		return ret;
 	}
 
-	if (unlikely(PageHWPoison(vmf.page))) {
+	if (unlikely(PageHWPoison(vmf2.page))) {
 		if (ret & VM_FAULT_LOCKED)
-			unlock_page(vmf.page);
-		put_page(vmf.page);
+			unlock_page(vmf2.page);
+		put_page(vmf2.page);
 		return VM_FAULT_HWPOISON;
 	}
 
 	if (unlikely(!(ret & VM_FAULT_LOCKED)))
-		lock_page(vmf.page);
+		lock_page(vmf2.page);
 	else
-		VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
+		VM_BUG_ON_PAGE(!PageLocked(vmf2.page), vmf2.page);
 
-	*page = vmf.page;
+	*page = vmf2.page;
 	return ret;
 }
 
-static int pte_alloc_one_map(struct fault_env *fe)
+static int pte_alloc_one_map(struct vm_fault *vmf)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 
-	if (!pmd_none(*fe->pmd))
+	if (!pmd_none(*vmf->pmd))
 		goto map_pte;
-	if (fe->prealloc_pte) {
-		fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
-		if (unlikely(!pmd_none(*fe->pmd))) {
-			spin_unlock(fe->ptl);
+	if (vmf->prealloc_pte) {
+		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+		if (unlikely(!pmd_none(*vmf->pmd))) {
+			spin_unlock(vmf->ptl);
 			goto map_pte;
 		}
 
 		atomic_long_inc(&vma->vm_mm->nr_ptes);
-		pmd_populate(vma->vm_mm, fe->pmd, fe->prealloc_pte);
-		spin_unlock(fe->ptl);
-		fe->prealloc_pte = 0;
-	} else if (unlikely(pte_alloc(vma->vm_mm, fe->pmd, fe->address))) {
+		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
+		spin_unlock(vmf->ptl);
+		vmf->prealloc_pte = 0;
+	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
 		return VM_FAULT_OOM;
 	}
 map_pte:
@@ -2913,11 +2915,11 @@ map_pte:
 	 * through an atomic read in C, which is what pmd_trans_unstable()
 	 * provides.
 	 */
-	if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd))
+	if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
 		return VM_FAULT_NOPAGE;
 
-	fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
-			&fe->ptl);
+	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
+			&vmf->ptl);
 	return 0;
 }
 
@@ -2935,24 +2937,24 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
 	return true;
 }
 
-static void deposit_prealloc_pte(struct fault_env *fe)
+static void deposit_prealloc_pte(struct vm_fault *vmf)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 
-	pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte);
+	pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 	/*
 	 * We are going to consume the prealloc table,
 	 * count that as nr_ptes.
 	 */
 	atomic_long_inc(&vma->vm_mm->nr_ptes);
-	fe->prealloc_pte = 0;
+	vmf->prealloc_pte = 0;
 }
 
-static int do_set_pmd(struct fault_env *fe, struct page *page)
+static int do_set_pmd(struct vm_fault *vmf, struct page *page)
 {
-	struct vm_area_struct *vma = fe->vma;
-	bool write = fe->flags & FAULT_FLAG_WRITE;
-	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+	struct vm_area_struct *vma = vmf->vma;
+	bool write = vmf->flags & FAULT_FLAG_WRITE;
+	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	pmd_t entry;
 	int i, ret;
 
@@ -2966,15 +2968,15 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
 	 * Archs like ppc64 need additonal space to store information
 	 * related to pte entry. Use the preallocated table for that.
 	 */
-	if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) {
-		fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address);
-		if (!fe->prealloc_pte)
+	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
+		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+		if (!vmf->prealloc_pte)
 			return VM_FAULT_OOM;
 		smp_wmb(); /* See comment in __pte_alloc() */
 	}
 
-	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
-	if (unlikely(!pmd_none(*fe->pmd)))
+	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+	if (unlikely(!pmd_none(*vmf->pmd)))
 		goto out;
 
 	for (i = 0; i < HPAGE_PMD_NR; i++)
@@ -2990,11 +2992,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
 	 * deposit and withdraw with pmd lock held
 	 */
 	if (arch_needs_pgtable_deposit())
-		deposit_prealloc_pte(fe);
+		deposit_prealloc_pte(vmf);
 
-	set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
+	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 
-	update_mmu_cache_pmd(vma, haddr, fe->pmd);
+	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
 
 	/* fault is handled */
 	ret = 0;
@@ -3005,13 +3007,13 @@ out:
 	 * withdraw with pmd lock held.
 	 */
 	if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
-		fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
-							       fe->pmd);
-	spin_unlock(fe->ptl);
+		vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
+								vmf->pmd);
+	spin_unlock(vmf->ptl);
 	return ret;
 }
 #else
-static int do_set_pmd(struct fault_env *fe, struct page *page)
+static int do_set_pmd(struct vm_fault *vmf, struct page *page)
 {
 	BUILD_BUG();
 	return 0;
@@ -3022,41 +3024,42 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
  * alloc_set_pte - setup new PTE entry for given page and add reverse page
  * mapping. If needed, the fucntion allocates page table or use pre-allocated.
  *
- * @fe: fault environment
+ * @vmf: fault environment
  * @memcg: memcg to charge page (only for private mappings)
  * @page: page to map
  *
- * Caller must take care of unlocking fe->ptl, if fe->pte is non-NULL on return.
+ * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on
+ * return.
  *
  * Target users are page handler itself and implementations of
  * vm_ops->map_pages.
  */
-int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
+int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page)
 {
-	struct vm_area_struct *vma = fe->vma;
-	bool write = fe->flags & FAULT_FLAG_WRITE;
+	struct vm_area_struct *vma = vmf->vma;
+	bool write = vmf->flags & FAULT_FLAG_WRITE;
 	pte_t entry;
 	int ret;
 
-	if (pmd_none(*fe->pmd) && PageTransCompound(page) &&
+	if (pmd_none(*vmf->pmd) && PageTransCompound(page) &&
 			IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
 		/* THP on COW? */
 		VM_BUG_ON_PAGE(memcg, page);
 
-		ret = do_set_pmd(fe, page);
+		ret = do_set_pmd(vmf, page);
 		if (ret != VM_FAULT_FALLBACK)
 			goto fault_handled;
 	}
 
-	if (!fe->pte) {
-		ret = pte_alloc_one_map(fe);
+	if (!vmf->pte) {
+		ret = pte_alloc_one_map(vmf);
 		if (ret)
 			goto fault_handled;
 	}
 
 	/* Re-check under ptl */
-	if (unlikely(!pte_none(*fe->pte))) {
+	if (unlikely(!pte_none(*vmf->pte))) {
 		ret = VM_FAULT_NOPAGE;
 		goto fault_handled;
 	}
@@ -3068,24 +3071,24 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
 	/* copy-on-write page */
 	if (write && !(vma->vm_flags & VM_SHARED)) {
 		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-		page_add_new_anon_rmap(page, vma, fe->address, false);
+		page_add_new_anon_rmap(page, vma, vmf->address, false);
 		mem_cgroup_commit_charge(page, memcg, false, false);
 		lru_cache_add_active_or_unevictable(page, vma);
 	} else {
 		inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
 		page_add_file_rmap(page, false);
 	}
-	set_pte_at(vma->vm_mm, fe->address, fe->pte, entry);
+	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
 
 	/* no need to invalidate: a not-present page won't be cached */
-	update_mmu_cache(vma, fe->address, fe->pte);
+	update_mmu_cache(vma, vmf->address, vmf->pte);
 	ret = 0;
 
 fault_handled:
 	/* preallocated pagetable is unused: free it */
-	if (fe->prealloc_pte) {
-		pte_free(fe->vma->vm_mm, fe->prealloc_pte);
-		fe->prealloc_pte = 0;
+	if (vmf->prealloc_pte) {
+		pte_free(vmf->vma->vm_mm, vmf->prealloc_pte);
+		vmf->prealloc_pte = 0;
 	}
 	return ret;
 }
@@ -3154,17 +3157,17 @@ late_initcall(fault_around_debugfs);
  * fault_around_pages() value (and therefore to page order).  This way it's
  * easier to guarantee that we don't cross page table boundaries.
  */
-static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
+static int do_fault_around(struct vm_fault *vmf, pgoff_t start_pgoff)
 {
-	unsigned long address = fe->address, nr_pages, mask;
+	unsigned long address = vmf->address, nr_pages, mask;
 	pgoff_t end_pgoff;
 	int off, ret = 0;
 
 	nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
 	mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
 
-	fe->address = max(address & mask, fe->vma->vm_start);
-	off = ((address - fe->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+	vmf->address = max(address & mask, vmf->vma->vm_start);
+	off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 	start_pgoff -= off;
 
 	/*
@@ -3172,44 +3175,45 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
 	 *  or fault_around_pages() from start_pgoff, depending what is nearest.
 	 */
 	end_pgoff = start_pgoff -
-		((fe->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
+		((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
 		PTRS_PER_PTE - 1;
-	end_pgoff = min3(end_pgoff, vma_pages(fe->vma) + fe->vma->vm_pgoff - 1,
+	end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
 			start_pgoff + nr_pages - 1);
 
-	if (pmd_none(*fe->pmd)) {
-		fe->prealloc_pte = pte_alloc_one(fe->vma->vm_mm, fe->address);
-		if (!fe->prealloc_pte)
+	if (pmd_none(*vmf->pmd)) {
+		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
+						  vmf->address);
+		if (!vmf->prealloc_pte)
 			goto out;
 		smp_wmb(); /* See comment in __pte_alloc() */
 	}
 
-	fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
+	vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
 
 	/* Huge page is mapped? Page fault is solved */
-	if (pmd_trans_huge(*fe->pmd)) {
+	if (pmd_trans_huge(*vmf->pmd)) {
 		ret = VM_FAULT_NOPAGE;
 		goto out;
 	}
 
 	/* ->map_pages() haven't done anything useful. Cold page cache? */
-	if (!fe->pte)
+	if (!vmf->pte)
 		goto out;
 
 	/* check if the page fault is solved */
-	fe->pte -= (fe->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT);
-	if (!pte_none(*fe->pte))
+	vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT);
+	if (!pte_none(*vmf->pte))
 		ret = VM_FAULT_NOPAGE;
-	pte_unmap_unlock(fe->pte, fe->ptl);
+	pte_unmap_unlock(vmf->pte, vmf->ptl);
 out:
-	fe->address = address;
-	fe->pte = NULL;
+	vmf->address = address;
+	vmf->pte = NULL;
 	return ret;
 }
 
-static int do_read_fault(struct fault_env *fe, pgoff_t pgoff)
+static int do_read_fault(struct vm_fault *vmf, pgoff_t pgoff)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct page *fault_page;
 	int ret = 0;
 
@@ -3219,27 +3223,27 @@ static int do_read_fault(struct fault_env *fe, pgoff_t pgoff)
 	 * something).
 	 */
 	if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
-		ret = do_fault_around(fe, pgoff);
+		ret = do_fault_around(vmf, pgoff);
 		if (ret)
 			return ret;
 	}
 
-	ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL);
+	ret = __do_fault(vmf, pgoff, NULL, &fault_page, NULL);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		return ret;
 
-	ret |= alloc_set_pte(fe, NULL, fault_page);
-	if (fe->pte)
-		pte_unmap_unlock(fe->pte, fe->ptl);
+	ret |= alloc_set_pte(vmf, NULL, fault_page);
+	if (vmf->pte)
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
 	unlock_page(fault_page);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		put_page(fault_page);
 	return ret;
 }
 
-static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff)
+static int do_cow_fault(struct vm_fault *vmf, pgoff_t pgoff)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct page *fault_page, *new_page;
 	void *fault_entry;
 	struct mem_cgroup *memcg;
@@ -3248,7 +3252,7 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff)
 	if (unlikely(anon_vma_prepare(vma)))
 		return VM_FAULT_OOM;
 
-	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, fe->address);
+	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
 	if (!new_page)
 		return VM_FAULT_OOM;
 
@@ -3258,17 +3262,17 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff)
 		return VM_FAULT_OOM;
 	}
 
-	ret = __do_fault(fe, pgoff, new_page, &fault_page, &fault_entry);
+	ret = __do_fault(vmf, pgoff, new_page, &fault_page, &fault_entry);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		goto uncharge_out;
 
 	if (!(ret & VM_FAULT_DAX_LOCKED))
-		copy_user_highpage(new_page, fault_page, fe->address, vma);
+		copy_user_highpage(new_page, fault_page, vmf->address, vma);
 	__SetPageUptodate(new_page);
 
-	ret |= alloc_set_pte(fe, memcg, new_page);
-	if (fe->pte)
-		pte_unmap_unlock(fe->pte, fe->ptl);
+	ret |= alloc_set_pte(vmf, memcg, new_page);
+	if (vmf->pte)
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
 	if (!(ret & VM_FAULT_DAX_LOCKED)) {
 		unlock_page(fault_page);
 		put_page(fault_page);
@@ -3284,15 +3288,15 @@ uncharge_out:
 	return ret;
 }
 
-static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff)
+static int do_shared_fault(struct vm_fault *vmf, pgoff_t pgoff)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct page *fault_page;
 	struct address_space *mapping;
 	int dirtied = 0;
 	int ret, tmp;
 
-	ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL);
+	ret = __do_fault(vmf, pgoff, NULL, &fault_page, NULL);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		return ret;
 
@@ -3302,7 +3306,7 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff)
 	 */
 	if (vma->vm_ops->page_mkwrite) {
 		unlock_page(fault_page);
-		tmp = do_page_mkwrite(vma, fault_page, fe->address);
+		tmp = do_page_mkwrite(vma, fault_page, vmf->address);
 		if (unlikely(!tmp ||
 				(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
 			put_page(fault_page);
@@ -3310,9 +3314,9 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff)
 		}
 	}
 
-	ret |= alloc_set_pte(fe, NULL, fault_page);
-	if (fe->pte)
-		pte_unmap_unlock(fe->pte, fe->ptl);
+	ret |= alloc_set_pte(vmf, NULL, fault_page);
+	if (vmf->pte)
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
 					VM_FAULT_RETRY))) {
 		unlock_page(fault_page);
@@ -3350,19 +3354,19 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff)
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-static int do_fault(struct fault_env *fe)
+static int do_fault(struct vm_fault *vmf)
 {
-	struct vm_area_struct *vma = fe->vma;
-	pgoff_t pgoff = linear_page_index(vma, fe->address);
+	struct vm_area_struct *vma = vmf->vma;
+	pgoff_t pgoff = linear_page_index(vma, vmf->address);
 
 	/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
 	if (!vma->vm_ops->fault)
 		return VM_FAULT_SIGBUS;
-	if (!(fe->flags & FAULT_FLAG_WRITE))
-		return do_read_fault(fe, pgoff);
+	if (!(vmf->flags & FAULT_FLAG_WRITE))
+		return do_read_fault(vmf, pgoff);
 	if (!(vma->vm_flags & VM_SHARED))
-		return do_cow_fault(fe, pgoff);
-	return do_shared_fault(fe, pgoff);
+		return do_cow_fault(vmf, pgoff);
+	return do_shared_fault(vmf, pgoff);
 }
 
 static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
@@ -3380,9 +3384,9 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
 	return mpol_misplaced(page, vma, addr);
 }
 
-static int do_numa_page(struct fault_env *fe, pte_t pte)
+static int do_numa_page(struct vm_fault *vmf, pte_t pte)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL;
 	int page_nid = -1;
 	int last_cpupid;
@@ -3400,10 +3404,10 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
 	* page table entry is not accessible, so there would be no
 	* concurrent hardware modifications to the PTE.
 	*/
-	fe->ptl = pte_lockptr(vma->vm_mm, fe->pmd);
-	spin_lock(fe->ptl);
-	if (unlikely(!pte_same(*fe->pte, pte))) {
-		pte_unmap_unlock(fe->pte, fe->ptl);
+	vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd);
+	spin_lock(vmf->ptl);
+	if (unlikely(!pte_same(*vmf->pte, pte))) {
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		goto out;
 	}
 
@@ -3412,18 +3416,18 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
 	pte = pte_mkyoung(pte);
 	if (was_writable)
 		pte = pte_mkwrite(pte);
-	set_pte_at(vma->vm_mm, fe->address, fe->pte, pte);
-	update_mmu_cache(vma, fe->address, fe->pte);
+	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+	update_mmu_cache(vma, vmf->address, vmf->pte);
 
-	page = vm_normal_page(vma, fe->address, pte);
+	page = vm_normal_page(vma, vmf->address, pte);
 	if (!page) {
-		pte_unmap_unlock(fe->pte, fe->ptl);
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		return 0;
 	}
 
 	/* TODO: handle PTE-mapped THP */
 	if (PageCompound(page)) {
-		pte_unmap_unlock(fe->pte, fe->ptl);
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		return 0;
 	}
 
@@ -3447,9 +3451,9 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
 
 	last_cpupid = page_cpupid_last(page);
 	page_nid = page_to_nid(page);
-	target_nid = numa_migrate_prep(page, vma, fe->address, page_nid,
+	target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
 			&flags);
-	pte_unmap_unlock(fe->pte, fe->ptl);
+	pte_unmap_unlock(vmf->pte, vmf->ptl);
 	if (target_nid == -1) {
 		put_page(page);
 		goto out;
@@ -3469,28 +3473,28 @@ out:
 	return 0;
 }
 
-static int create_huge_pmd(struct fault_env *fe)
+static int create_huge_pmd(struct vm_fault *vmf)
 {
-	struct vm_area_struct *vma = fe->vma;
+	struct vm_area_struct *vma = vmf->vma;
 	if (vma_is_anonymous(vma))
-		return do_huge_pmd_anonymous_page(fe);
+		return do_huge_pmd_anonymous_page(vmf);
 	if (vma->vm_ops->pmd_fault)
-		return vma->vm_ops->pmd_fault(vma, fe->address, fe->pmd,
-				fe->flags);
+		return vma->vm_ops->pmd_fault(vma, vmf->address, vmf->pmd,
+				vmf->flags);
 	return VM_FAULT_FALLBACK;
 }
 
-static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
+static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
 {
-	if (vma_is_anonymous(fe->vma))
-		return do_huge_pmd_wp_page(fe, orig_pmd);
-	if (fe->vma->vm_ops->pmd_fault)
-		return fe->vma->vm_ops->pmd_fault(fe->vma, fe->address, fe->pmd,
-				fe->flags);
+	if (vma_is_anonymous(vmf->vma))
+		return do_huge_pmd_wp_page(vmf, orig_pmd);
+	if (vmf->vma->vm_ops->pmd_fault)
+		return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf->address,
+						   vmf->pmd, vmf->flags);
 
 	/* COW handled on pte level: split pmd */
-	VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma);
-	__split_huge_pmd(fe->vma, fe->pmd, fe->address, false, NULL);
+	VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
+	__split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL);
 
 	return VM_FAULT_FALLBACK;
 }
@@ -3515,21 +3519,21 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
  * The mmap_sem may have been released depending on flags and our return value.
  * See filemap_fault() and __lock_page_or_retry().
  */
-static int handle_pte_fault(struct fault_env *fe)
+static int handle_pte_fault(struct vm_fault *vmf)
 {
 	pte_t entry;
 
-	if (unlikely(pmd_none(*fe->pmd))) {
+	if (unlikely(pmd_none(*vmf->pmd))) {
 		/*
 		 * Leave __pte_alloc() until later: because vm_ops->fault may
 		 * want to allocate huge page, and if we expose page table
 		 * for an instant, it will be difficult to retract from
 		 * concurrent faults and from rmap lookups.
 		 */
-		fe->pte = NULL;
+		vmf->pte = NULL;
 	} else {
 		/* See comment in pte_alloc_one_map() */
-		if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd))
+		if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
 			return 0;
 		/*
 		 * A regular pmd is established and it can't morph into a huge
@@ -3537,9 +3541,9 @@ static int handle_pte_fault(struct fault_env *fe)
 		 * mmap_sem read mode and khugepaged takes it in write mode.
 		 * So now it's safe to run pte_offset_map().
 		 */
-		fe->pte = pte_offset_map(fe->pmd, fe->address);
+		vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
 
-		entry = *fe->pte;
+		entry = *vmf->pte;
 
 		/*
 		 * some architectures can have larger ptes than wordsize,
@@ -3551,37 +3555,37 @@ static int handle_pte_fault(struct fault_env *fe)
 		 */
 		barrier();
 		if (pte_none(entry)) {
-			pte_unmap(fe->pte);
-			fe->pte = NULL;
+			pte_unmap(vmf->pte);
+			vmf->pte = NULL;
 		}
 	}
 
-	if (!fe->pte) {
-		if (vma_is_anonymous(fe->vma))
-			return do_anonymous_page(fe);
+	if (!vmf->pte) {
+		if (vma_is_anonymous(vmf->vma))
+			return do_anonymous_page(vmf);
 		else
-			return do_fault(fe);
+			return do_fault(vmf);
 	}
 
 	if (!pte_present(entry))
-		return do_swap_page(fe, entry);
+		return do_swap_page(vmf, entry);
 
-	if (pte_protnone(entry) && vma_is_accessible(fe->vma))
-		return do_numa_page(fe, entry);
+	if (pte_protnone(entry) && vma_is_accessible(vmf->vma))
+		return do_numa_page(vmf, entry);
 
-	fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd);
-	spin_lock(fe->ptl);
-	if (unlikely(!pte_same(*fe->pte, entry)))
+	vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
+	spin_lock(vmf->ptl);
+	if (unlikely(!pte_same(*vmf->pte, entry)))
 		goto unlock;
-	if (fe->flags & FAULT_FLAG_WRITE) {
+	if (vmf->flags & FAULT_FLAG_WRITE) {
 		if (!pte_write(entry))
-			return do_wp_page(fe, entry);
+			return do_wp_page(vmf, entry);
 		entry = pte_mkdirty(entry);
 	}
 	entry = pte_mkyoung(entry);
-	if (ptep_set_access_flags(fe->vma, fe->address, fe->pte, entry,
-				fe->flags & FAULT_FLAG_WRITE)) {
-		update_mmu_cache(fe->vma, fe->address, fe->pte);
+	if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
+				vmf->flags & FAULT_FLAG_WRITE)) {
+		update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
 	} else {
 		/*
 		 * This is needed only for protection faults but the arch code
@@ -3589,11 +3593,11 @@ static int handle_pte_fault(struct fault_env *fe)
 		 * This still avoids useless tlb flushes for .text page faults
 		 * with threads.
 		 */
-		if (fe->flags & FAULT_FLAG_WRITE)
-			flush_tlb_fix_spurious_fault(fe->vma, fe->address);
+		if (vmf->flags & FAULT_FLAG_WRITE)
+			flush_tlb_fix_spurious_fault(vmf->vma, vmf->address);
 	}
 unlock:
-	pte_unmap_unlock(fe->pte, fe->ptl);
+	pte_unmap_unlock(vmf->pte, vmf->ptl);
 	return 0;
 }
 
@@ -3606,7 +3610,7 @@ unlock:
 static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 		unsigned int flags)
 {
-	struct fault_env fe = {
+	struct vm_fault vmf = {
 		.vma = vma,
 		.address = address,
 		.flags = flags,
@@ -3619,35 +3623,35 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	pud = pud_alloc(mm, pgd, address);
 	if (!pud)
 		return VM_FAULT_OOM;
-	fe.pmd = pmd_alloc(mm, pud, address);
-	if (!fe.pmd)
+	vmf.pmd = pmd_alloc(mm, pud, address);
+	if (!vmf.pmd)
 		return VM_FAULT_OOM;
-	if (pmd_none(*fe.pmd) && transparent_hugepage_enabled(vma)) {
-		int ret = create_huge_pmd(&fe);
+	if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) {
+		int ret = create_huge_pmd(&vmf);
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;
 	} else {
-		pmd_t orig_pmd = *fe.pmd;
+		pmd_t orig_pmd = *vmf.pmd;
 		int ret;
 
 		barrier();
 		if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
 			if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
-				return do_huge_pmd_numa_page(&fe, orig_pmd);
+				return do_huge_pmd_numa_page(&vmf, orig_pmd);
 
-			if ((fe.flags & FAULT_FLAG_WRITE) &&
+			if ((vmf.flags & FAULT_FLAG_WRITE) &&
 					!pmd_write(orig_pmd)) {
-				ret = wp_huge_pmd(&fe, orig_pmd);
+				ret = wp_huge_pmd(&vmf, orig_pmd);
 				if (!(ret & VM_FAULT_FALLBACK))
 					return ret;
 			} else {
-				huge_pmd_set_accessed(&fe, orig_pmd);
+				huge_pmd_set_accessed(&vmf, orig_pmd);
 				return 0;
 			}
 		}
 	}
 
-	return handle_pte_fault(&fe);
+	return handle_pte_fault(&vmf);
 }
 
 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index c299a7fbca70..9902d811ff4f 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1801,7 +1801,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 EXPORT_SYMBOL(filemap_fault);
 
-void filemap_map_pages(struct fault_env *fe,
+void filemap_map_pages(struct vm_fault *vmf,
 		pgoff_t start_pgoff, pgoff_t end_pgoff)
 {
 	BUG();
-- 
cgit v1.2.3


From 1a29d85eb0f19b7d8271923d8917d7b4f5540b3e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Dec 2016 15:07:01 -0800
Subject: mm: use vmf->address instead of of vmf->virtual_address

Every single user of vmf->virtual_address typed that entry to unsigned
long before doing anything with it so the type of virtual_address does
not really provide us any additional safety.  Just use masked
vmf->address which already has the appropriate type.

Link: http://lkml.kernel.org/r/1479460644-25076-3-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/spufs/file.c     |  8 +++-----
 arch/x86/entry/vdso/vma.c                    |  4 ++--
 drivers/char/agp/alpha-agp.c                 |  3 +--
 drivers/char/mspec.c                         |  2 +-
 drivers/dax/dax.c                            |  3 +--
 drivers/gpu/drm/armada/armada_gem.c          |  5 ++---
 drivers/gpu/drm/drm_vm.c                     | 10 +++++-----
 drivers/gpu/drm/etnaviv/etnaviv_gem.c        |  7 +++----
 drivers/gpu/drm/exynos/exynos_drm_gem.c      |  6 ++----
 drivers/gpu/drm/gma500/framebuffer.c         |  2 +-
 drivers/gpu/drm/gma500/gem.c                 |  5 ++---
 drivers/gpu/drm/i915/i915_gem.c              |  3 +--
 drivers/gpu/drm/msm/msm_gem.c                |  8 +++-----
 drivers/gpu/drm/omapdrm/omap_gem.c           | 20 ++++++++------------
 drivers/gpu/drm/tegra/gem.c                  |  4 ++--
 drivers/gpu/drm/ttm/ttm_bo_vm.c              |  2 +-
 drivers/gpu/drm/udl/udl_gem.c                |  5 ++---
 drivers/gpu/drm/vgem/vgem_drv.c              |  2 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c    |  5 ++---
 drivers/misc/cxl/context.c                   |  5 ++---
 drivers/misc/sgi-gru/grumain.c               |  2 +-
 drivers/staging/android/ion/ion.c            |  2 +-
 drivers/staging/lustre/lustre/llite/vvp_io.c |  6 +++---
 drivers/xen/privcmd.c                        |  2 +-
 fs/dax.c                                     |  4 ++--
 include/linux/mm.h                           |  2 --
 mm/memory.c                                  |  9 ++++-----
 27 files changed, 57 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 06254467e4dd..3a147122bc98 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -236,7 +236,6 @@ static int
 spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct spu_context *ctx	= vma->vm_file->private_data;
-	unsigned long address = (unsigned long)vmf->virtual_address;
 	unsigned long pfn, offset;
 
 	offset = vmf->pgoff << PAGE_SHIFT;
@@ -244,7 +243,7 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 
 	pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
-			address, offset);
+			vmf->address, offset);
 
 	if (spu_acquire(ctx))
 		return VM_FAULT_NOPAGE;
@@ -256,7 +255,7 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
 		pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
 	}
-	vm_insert_pfn(vma, address, pfn);
+	vm_insert_pfn(vma, vmf->address, pfn);
 
 	spu_release(ctx);
 
@@ -355,8 +354,7 @@ static int spufs_ps_fault(struct vm_area_struct *vma,
 		down_read(&current->mm->mmap_sem);
 	} else {
 		area = ctx->spu->problem_phys + ps_offs;
-		vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
-					(area + offset) >> PAGE_SHIFT);
+		vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
 		spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
 	}
 
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index e739002427ed..40121d14d34d 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -109,7 +109,7 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 		return VM_FAULT_SIGBUS;
 
 	if (sym_offset == image->sym_vvar_page) {
-		ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+		ret = vm_insert_pfn(vma, vmf->address,
 				    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
 	} else if (sym_offset == image->sym_pvclock_page) {
 		struct pvclock_vsyscall_time_info *pvti =
@@ -117,7 +117,7 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 		if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
 			ret = vm_insert_pfn(
 				vma,
-				(unsigned long)vmf->virtual_address,
+				vmf->address,
 				__pa(pvti) >> PAGE_SHIFT);
 		}
 	}
diff --git a/drivers/char/agp/alpha-agp.c b/drivers/char/agp/alpha-agp.c
index 199b8e99f7d7..737187865269 100644
--- a/drivers/char/agp/alpha-agp.c
+++ b/drivers/char/agp/alpha-agp.c
@@ -19,8 +19,7 @@ static int alpha_core_agp_vm_fault(struct vm_area_struct *vma,
 	unsigned long pa;
 	struct page *page;
 
-	dma_addr = (unsigned long)vmf->virtual_address - vma->vm_start
-						+ agp->aperture.bus_base;
+	dma_addr = vmf->address - vma->vm_start + agp->aperture.bus_base;
 	pa = agp->ops->translate(agp, dma_addr);
 
 	if (pa == (unsigned long)-EINVAL)
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index f3f92d5fcda0..a697ca0cab1e 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -227,7 +227,7 @@ mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * be because another thread has installed the pte first, so it
 	 * is no problem.
 	 */
-	vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+	vm_insert_pfn(vma, vmf->address, pfn);
 
 	return VM_FAULT_NOPAGE;
 }
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 286447a83dab..26ec39ddf21f 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -328,7 +328,6 @@ static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
 static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
 		struct vm_fault *vmf)
 {
-	unsigned long vaddr = (unsigned long) vmf->virtual_address;
 	struct device *dev = &dax_dev->dev;
 	struct dax_region *dax_region;
 	int rc = VM_FAULT_SIGBUS;
@@ -353,7 +352,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
 
 	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-	rc = vm_insert_mixed(vma, vaddr, pfn);
+	rc = vm_insert_mixed(vma, vmf->address, pfn);
 
 	if (rc == -ENOMEM)
 		return VM_FAULT_OOM;
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 768087ddb046..a293c8be232c 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -17,12 +17,11 @@
 static int armada_gem_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct armada_gem_object *obj = drm_to_armada_gem(vma->vm_private_data);
-	unsigned long addr = (unsigned long)vmf->virtual_address;
 	unsigned long pfn = obj->phys_addr >> PAGE_SHIFT;
 	int ret;
 
-	pfn += (addr - vma->vm_start) >> PAGE_SHIFT;
-	ret = vm_insert_pfn(vma, addr, pfn);
+	pfn += (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+	ret = vm_insert_pfn(vma, vmf->address, pfn);
 
 	switch (ret) {
 	case 0:
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index caa4e4ca616d..bd311c77c254 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -124,8 +124,7 @@ static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		 * Using vm_pgoff as a selector forces us to use this unusual
 		 * addressing scheme.
 		 */
-		resource_size_t offset = (unsigned long)vmf->virtual_address -
-			vma->vm_start;
+		resource_size_t offset = vmf->address - vma->vm_start;
 		resource_size_t baddr = map->offset + offset;
 		struct drm_agp_mem *agpmem;
 		struct page *page;
@@ -195,7 +194,7 @@ static int drm_do_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (!map)
 		return VM_FAULT_SIGBUS;	/* Nothing allocated */
 
-	offset = (unsigned long)vmf->virtual_address - vma->vm_start;
+	offset = vmf->address - vma->vm_start;
 	i = (unsigned long)map->handle + offset;
 	page = vmalloc_to_page((void *)i);
 	if (!page)
@@ -301,7 +300,8 @@ static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (!dma->pagelist)
 		return VM_FAULT_SIGBUS;	/* Nothing allocated */
 
-	offset = (unsigned long)vmf->virtual_address - vma->vm_start;	/* vm_[pg]off[set] should be 0 */
+	offset = vmf->address - vma->vm_start;
+					/* vm_[pg]off[set] should be 0 */
 	page_nr = offset >> PAGE_SHIFT; /* page_nr could just be vmf->pgoff */
 	page = virt_to_page((void *)dma->pagelist[page_nr]);
 
@@ -337,7 +337,7 @@ static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (!entry->pagelist)
 		return VM_FAULT_SIGBUS;	/* Nothing allocated */
 
-	offset = (unsigned long)vmf->virtual_address - vma->vm_start;
+	offset = vmf->address - vma->vm_start;
 	map_offset = map->offset - (unsigned long)dev->sg->virtual;
 	page_offset = (offset >> PAGE_SHIFT) + (map_offset >> PAGE_SHIFT);
 	page = entry->pagelist[page_offset];
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 6f5dfabb3096..114dddbd297b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -202,15 +202,14 @@ int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* We don't use vmf->pgoff since that has the fake offset: */
-	pgoff = ((unsigned long)vmf->virtual_address -
-			vma->vm_start) >> PAGE_SHIFT;
+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
 	page = pages[pgoff];
 
-	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
+	VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
 	     page_to_pfn(page), page_to_pfn(page) << PAGE_SHIFT);
 
-	ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page);
+	ret = vm_insert_page(vma, vmf->address, page);
 
 out:
 	switch (ret) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index ea7a18230888..57b81460fec8 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -455,8 +455,7 @@ int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	pgoff_t page_offset;
 	int ret;
 
-	page_offset = ((unsigned long)vmf->virtual_address -
-			vma->vm_start) >> PAGE_SHIFT;
+	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
 	if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) {
 		DRM_ERROR("invalid page offset\n");
@@ -465,8 +464,7 @@ int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	pfn = page_to_pfn(exynos_gem->pages[page_offset]);
-	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
-			__pfn_to_pfn_t(pfn, PFN_DEV));
+	ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
 
 out:
 	switch (ret) {
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 4071b2d1e8cf..8b44fa542562 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -125,7 +125,7 @@ static int psbfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 				  psbfb->gtt->offset;
 
 	page_num = vma_pages(vma);
-	address = (unsigned long)vmf->virtual_address - (vmf->pgoff << PAGE_SHIFT);
+	address = vmf->address - (vmf->pgoff << PAGE_SHIFT);
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c
index 6d1cb6b370b1..527c62917660 100644
--- a/drivers/gpu/drm/gma500/gem.c
+++ b/drivers/gpu/drm/gma500/gem.c
@@ -197,15 +197,14 @@ int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	/* Page relative to the VMA start - we must calculate this ourselves
 	   because vmf->pgoff is the fake GEM offset */
-	page_offset = ((unsigned long) vmf->virtual_address - vma->vm_start)
-				>> PAGE_SHIFT;
+	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
 	/* CPU view of the page, don't go via the GART for CPU writes */
 	if (r->stolen)
 		pfn = (dev_priv->stolen_base + r->offset) >> PAGE_SHIFT;
 	else
 		pfn = page_to_pfn(r->pages[page_offset]);
-	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+	ret = vm_insert_pfn(vma, vmf->address, pfn);
 
 fail:
 	mutex_unlock(&dev_priv->mmap_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d0dcaf35b429..412f3513f269 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1796,8 +1796,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	int ret;
 
 	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
-		PAGE_SHIFT;
+	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
 
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index cd06cfd94687..d8bc59c7e261 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -225,16 +225,14 @@ int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* We don't use vmf->pgoff since that has the fake offset: */
-	pgoff = ((unsigned long)vmf->virtual_address -
-			vma->vm_start) >> PAGE_SHIFT;
+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
 	pfn = page_to_pfn(pages[pgoff]);
 
-	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
+	VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
 			pfn, pfn << PAGE_SHIFT);
 
-	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
-			__pfn_to_pfn_t(pfn, PFN_DEV));
+	ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
 
 out_unlock:
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index d4e1e11466f8..4a90c690f09e 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -398,8 +398,7 @@ static int fault_1d(struct drm_gem_object *obj,
 	pgoff_t pgoff;
 
 	/* We don't use vmf->pgoff since that has the fake offset: */
-	pgoff = ((unsigned long)vmf->virtual_address -
-			vma->vm_start) >> PAGE_SHIFT;
+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
 	if (omap_obj->pages) {
 		omap_gem_cpu_sync(obj, pgoff);
@@ -409,11 +408,10 @@ static int fault_1d(struct drm_gem_object *obj,
 		pfn = (omap_obj->paddr >> PAGE_SHIFT) + pgoff;
 	}
 
-	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
+	VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
 			pfn, pfn << PAGE_SHIFT);
 
-	return vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
-			__pfn_to_pfn_t(pfn, PFN_DEV));
+	return vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
 }
 
 /* Special handling for the case of faulting in 2d tiled buffers */
@@ -427,7 +425,7 @@ static int fault_2d(struct drm_gem_object *obj,
 	struct page *pages[64];  /* XXX is this too much to have on stack? */
 	unsigned long pfn;
 	pgoff_t pgoff, base_pgoff;
-	void __user *vaddr;
+	unsigned long vaddr;
 	int i, ret, slots;
 
 	/*
@@ -447,8 +445,7 @@ static int fault_2d(struct drm_gem_object *obj,
 	const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE);
 
 	/* We don't use vmf->pgoff since that has the fake offset: */
-	pgoff = ((unsigned long)vmf->virtual_address -
-			vma->vm_start) >> PAGE_SHIFT;
+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
 	/*
 	 * Actual address we start mapping at is rounded down to previous slot
@@ -459,7 +456,7 @@ static int fault_2d(struct drm_gem_object *obj,
 	/* figure out buffer width in slots */
 	slots = omap_obj->width >> priv->usergart[fmt].slot_shift;
 
-	vaddr = vmf->virtual_address - ((pgoff - base_pgoff) << PAGE_SHIFT);
+	vaddr = vmf->address - ((pgoff - base_pgoff) << PAGE_SHIFT);
 
 	entry = &priv->usergart[fmt].entry[priv->usergart[fmt].last];
 
@@ -503,12 +500,11 @@ static int fault_2d(struct drm_gem_object *obj,
 
 	pfn = entry->paddr >> PAGE_SHIFT;
 
-	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
+	VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
 			pfn, pfn << PAGE_SHIFT);
 
 	for (i = n; i > 0; i--) {
-		vm_insert_mixed(vma, (unsigned long)vaddr,
-				__pfn_to_pfn_t(pfn, PFN_DEV));
+		vm_insert_mixed(vma, vaddr, __pfn_to_pfn_t(pfn, PFN_DEV));
 		pfn += priv->usergart[fmt].stride_pfn;
 		vaddr += PAGE_SIZE * m;
 	}
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index c08e5279eeac..7d853e6b5ff0 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -452,10 +452,10 @@ static int tegra_bo_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (!bo->pages)
 		return VM_FAULT_SIGBUS;
 
-	offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT;
+	offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 	page = bo->pages[offset];
 
-	err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page);
+	err = vm_insert_page(vma, vmf->address, page);
 	switch (err) {
 	case -EAGAIN:
 	case 0:
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 4748aedc933a..68ef993ab431 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -101,7 +101,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct page *page;
 	int ret;
 	int i;
-	unsigned long address = (unsigned long)vmf->virtual_address;
+	unsigned long address = vmf->address;
 	int retval = VM_FAULT_NOPAGE;
 	struct ttm_mem_type_manager *man =
 		&bdev->man[bo->mem.mem_type];
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index 818e70712b18..3c0c4bd3f750 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -107,14 +107,13 @@ int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	unsigned int page_offset;
 	int ret = 0;
 
-	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
-		PAGE_SHIFT;
+	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
 	if (!obj->pages)
 		return VM_FAULT_SIGBUS;
 
 	page = obj->pages[page_offset];
-	ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page);
+	ret = vm_insert_page(vma, vmf->address, page);
 	switch (ret) {
 	case -EAGAIN:
 	case 0:
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index f36c14729b55..477e07f0ecb6 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -54,7 +54,7 @@ static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct drm_vgem_gem_object *obj = vma->vm_private_data;
 	/* We don't use vmf->pgoff since that has the fake offset */
-	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	unsigned long vaddr = vmf->address;
 	struct page *page;
 
 	page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping,
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 1db0af6c7f94..ba63ca57ed7e 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -439,13 +439,12 @@ static int videobuf_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct page *page;
 
 	dprintk(3, "fault: fault @ %08lx [vma %08lx-%08lx]\n",
-		(unsigned long)vmf->virtual_address,
-		vma->vm_start, vma->vm_end);
+		vmf->address, vma->vm_start, vma->vm_end);
 
 	page = alloc_page(GFP_USER | __GFP_DMA32);
 	if (!page)
 		return VM_FAULT_OOM;
-	clear_user_highpage(page, (unsigned long)vmf->virtual_address);
+	clear_user_highpage(page, vmf->address);
 	vmf->page = page;
 
 	return 0;
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 5e506c19108a..5d36dcc7f47e 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -117,13 +117,12 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
 static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct cxl_context *ctx = vma->vm_file->private_data;
-	unsigned long address = (unsigned long)vmf->virtual_address;
 	u64 area, offset;
 
 	offset = vmf->pgoff << PAGE_SHIFT;
 
 	pr_devel("%s: pe: %i address: 0x%lx offset: 0x%llx\n",
-			__func__, ctx->pe, address, offset);
+			__func__, ctx->pe, vmf->address, offset);
 
 	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
 		area = ctx->afu->psn_phys;
@@ -155,7 +154,7 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 	}
 
-	vm_insert_pfn(vma, address, (area + offset) >> PAGE_SHIFT);
+	vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
 
 	mutex_unlock(&ctx->status_mutex);
 
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index 33741ad4a74a..af2e077da4b8 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -932,7 +932,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	unsigned long paddr, vaddr;
 	unsigned long expires;
 
-	vaddr = (unsigned long)vmf->virtual_address;
+	vaddr = vmf->address;
 	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
 		vma, vaddr, GSEG_BASE(vaddr));
 	STAT(nopfn);
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index d5cc3070e83f..b653451843c8 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -882,7 +882,7 @@ static int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	BUG_ON(!buffer->pages || !buffer->pages[vmf->pgoff]);
 
 	pfn = page_to_pfn(ion_buffer_page(buffer->pages[vmf->pgoff]));
-	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+	ret = vm_insert_pfn(vma, vmf->address, pfn);
 	mutex_unlock(&buffer->lock);
 	if (ret)
 		return VM_FAULT_ERROR;
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 0b6d388d8aa4..697cbfbe9374 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -1014,7 +1014,7 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 		       "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n",
 		       vmf->page, vmf->page->mapping, vmf->page->index,
 		       (long)vmf->page->flags, page_count(vmf->page),
-		       page_private(vmf->page), vmf->virtual_address);
+		       page_private(vmf->page), (void *)vmf->address);
 		if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
 			lock_page(vmf->page);
 			cfio->ft_flags |= VM_FAULT_LOCKED;
@@ -1025,12 +1025,12 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 	}
 
 	if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
-		CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address);
+		CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", (void *)vmf->address);
 		return -EFAULT;
 	}
 
 	if (cfio->ft_flags & VM_FAULT_OOM) {
-		CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address);
+		CDEBUG(D_PAGE, "got addr %p - OOM\n", (void *)vmf->address);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 702040fe2001..6e3306f4a525 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -602,7 +602,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
 	       vma, vma->vm_start, vma->vm_end,
-	       vmf->pgoff, vmf->virtual_address);
+	       vmf->pgoff, (void *)vmf->address);
 
 	return VM_FAULT_SIGBUS;
 }
diff --git a/fs/dax.c b/fs/dax.c
index 5ae8e11ad786..f1dfae64b11a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -738,7 +738,7 @@ static int dax_insert_mapping(struct address_space *mapping,
 		struct block_device *bdev, sector_t sector, size_t size,
 		void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	unsigned long vaddr = vmf->address;
 	struct blk_dax_ctl dax = {
 		.sector = sector,
 		.size = size,
@@ -948,7 +948,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 {
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
-	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	unsigned long vaddr = vmf->address;
 	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
 	sector_t sector;
 	struct iomap iomap = { 0 };
diff --git a/include/linux/mm.h b/include/linux/mm.h
index de5bcead2511..75fda0de64bf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -297,8 +297,6 @@ struct vm_fault {
 	gfp_t gfp_mask;			/* gfp mask to be used for allocations */
 	pgoff_t pgoff;			/* Logical page offset based on vma */
 	unsigned long address;		/* Faulting virtual address */
-	void __user *virtual_address;	/* Faulting virtual address masked by
-					 * PAGE_MASK */
 	pmd_t *pmd;			/* Pointer to pmd entry matching
 					 * the 'address'
 					 */
diff --git a/mm/memory.c b/mm/memory.c
index 512e1c359193..379836261d4a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2040,7 +2040,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
 	struct vm_fault vmf;
 	int ret;
 
-	vmf.virtual_address = (void __user *)(address & PAGE_MASK);
+	vmf.address = address & PAGE_MASK;
 	vmf.pgoff = page->index;
 	vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
 	vmf.gfp_mask = __get_fault_gfp_mask(vma);
@@ -2276,8 +2276,7 @@ static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte)
 		struct vm_fault vmf2 = {
 			.page = NULL,
 			.pgoff = linear_page_index(vma, vmf->address),
-			.virtual_address =
-				(void __user *)(vmf->address & PAGE_MASK),
+			.address = vmf->address,
 			.flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
 		};
 		int ret;
@@ -2852,7 +2851,7 @@ static int __do_fault(struct vm_fault *vmf, pgoff_t pgoff,
 	struct vm_fault vmf2;
 	int ret;
 
-	vmf2.virtual_address = (void __user *)(vmf->address & PAGE_MASK);
+	vmf2.address = vmf->address;
 	vmf2.pgoff = pgoff;
 	vmf2.flags = vmf->flags;
 	vmf2.page = NULL;
@@ -3612,7 +3611,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 {
 	struct vm_fault vmf = {
 		.vma = vma,
-		.address = address,
+		.address = address & PAGE_MASK,
 		.flags = flags,
 	};
 	struct mm_struct *mm = vma->vm_mm;
-- 
cgit v1.2.3


From 2994302bc8a17180788fac66a47102d338d5d0ec Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Dec 2016 15:07:16 -0800
Subject: mm: add orig_pte field into vm_fault

Add orig_pte field to vm_fault structure to allow ->page_mkwrite
handlers to fully handle the fault.

This also allows us to save some passing of extra arguments around.

Link: http://lkml.kernel.org/r/1479460644-25076-8-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  4 +--
 mm/internal.h      |  2 +-
 mm/khugepaged.c    |  7 ++---
 mm/memory.c        | 82 +++++++++++++++++++++++++++---------------------------
 4 files changed, 47 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 75fda0de64bf..39c17a2efcea 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -298,8 +298,8 @@ struct vm_fault {
 	pgoff_t pgoff;			/* Logical page offset based on vma */
 	unsigned long address;		/* Faulting virtual address */
 	pmd_t *pmd;			/* Pointer to pmd entry matching
-					 * the 'address'
-					 */
+					 * the 'address' */
+	pte_t orig_pte;			/* Value of PTE at the time of fault */
 
 	struct page *cow_page;		/* Handler may choose to COW */
 	struct page *page;		/* ->fault handlers should return a
diff --git a/mm/internal.h b/mm/internal.h
index 093b1eacc91b..44d68895a9b9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -36,7 +36,7 @@
 /* Do not use these with a slab allocator */
 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
-int do_swap_page(struct vm_fault *vmf, pte_t orig_pte);
+int do_swap_page(struct vm_fault *vmf);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f062d54c9683..7434a63cac94 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -875,7 +875,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 					unsigned long address, pmd_t *pmd,
 					int referenced)
 {
-	pte_t pteval;
 	int swapped_in = 0, ret = 0;
 	struct vm_fault vmf = {
 		.vma = vma,
@@ -893,11 +892,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 	vmf.pte = pte_offset_map(pmd, address);
 	for (; vmf.address < address + HPAGE_PMD_NR*PAGE_SIZE;
 			vmf.pte++, vmf.address += PAGE_SIZE) {
-		pteval = *vmf.pte;
-		if (!is_swap_pte(pteval))
+		vmf.orig_pte = *vmf.pte;
+		if (!is_swap_pte(vmf.orig_pte))
 			continue;
 		swapped_in++;
-		ret = do_swap_page(&vmf, pteval);
+		ret = do_swap_page(&vmf);
 
 		/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
 		if (ret & VM_FAULT_RETRY) {
diff --git a/mm/memory.c b/mm/memory.c
index 7ba9cc58dddd..cf74f7ca911b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2070,8 +2070,8 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
  * case, all we need to do here is to mark the page as writable and update
  * any related book-keeping.
  */
-static inline int wp_page_reuse(struct vm_fault *vmf, pte_t orig_pte,
-			struct page *page, int page_mkwrite, int dirty_shared)
+static inline int wp_page_reuse(struct vm_fault *vmf, struct page *page,
+				int page_mkwrite, int dirty_shared)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -2084,8 +2084,8 @@ static inline int wp_page_reuse(struct vm_fault *vmf, pte_t orig_pte,
 	if (page)
 		page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
 
-	flush_cache_page(vma, vmf->address, pte_pfn(orig_pte));
-	entry = pte_mkyoung(orig_pte);
+	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
+	entry = pte_mkyoung(vmf->orig_pte);
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
 		update_mmu_cache(vma, vmf->address, vmf->pte);
@@ -2135,8 +2135,7 @@ static inline int wp_page_reuse(struct vm_fault *vmf, pte_t orig_pte,
  *   held to the old page, as well as updating the rmap.
  * - In any case, unlock the PTL and drop the reference we took to the old page.
  */
-static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte,
-		struct page *old_page)
+static int wp_page_copy(struct vm_fault *vmf, struct page *old_page)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mm_struct *mm = vma->vm_mm;
@@ -2150,7 +2149,7 @@ static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte,
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
 
-	if (is_zero_pfn(pte_pfn(orig_pte))) {
+	if (is_zero_pfn(pte_pfn(vmf->orig_pte))) {
 		new_page = alloc_zeroed_user_highpage_movable(vma,
 							      vmf->address);
 		if (!new_page)
@@ -2174,7 +2173,7 @@ static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte,
 	 * Re-check the pte - we dropped the lock
 	 */
 	vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
-	if (likely(pte_same(*vmf->pte, orig_pte))) {
+	if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
 		if (old_page) {
 			if (!PageAnon(old_page)) {
 				dec_mm_counter_fast(mm,
@@ -2184,7 +2183,7 @@ static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte,
 		} else {
 			inc_mm_counter_fast(mm, MM_ANONPAGES);
 		}
-		flush_cache_page(vma, vmf->address, pte_pfn(orig_pte));
+		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		/*
@@ -2268,7 +2267,7 @@ oom:
  * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
  * mapping
  */
-static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte)
+static int wp_pfn_shared(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
@@ -2286,16 +2285,15 @@ static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte)
 		 * We might have raced with another page fault while we
 		 * released the pte_offset_map_lock.
 		 */
-		if (!pte_same(*vmf->pte, orig_pte)) {
+		if (!pte_same(*vmf->pte, vmf->orig_pte)) {
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			return 0;
 		}
 	}
-	return wp_page_reuse(vmf, orig_pte, NULL, 0, 0);
+	return wp_page_reuse(vmf, NULL, 0, 0);
 }
 
-static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte,
-		struct page *old_page)
+static int wp_page_shared(struct vm_fault *vmf, struct page *old_page)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -2321,7 +2319,7 @@ static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte,
 		 */
 		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 						vmf->address, &vmf->ptl);
-		if (!pte_same(*vmf->pte, orig_pte)) {
+		if (!pte_same(*vmf->pte, vmf->orig_pte)) {
 			unlock_page(old_page);
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			put_page(old_page);
@@ -2330,7 +2328,7 @@ static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte,
 		page_mkwrite = 1;
 	}
 
-	return wp_page_reuse(vmf, orig_pte, old_page, page_mkwrite, 1);
+	return wp_page_reuse(vmf, old_page, page_mkwrite, 1);
 }
 
 /*
@@ -2351,13 +2349,13 @@ static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte,
  * but allow concurrent faults), with pte both mapped and locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte)
+static int do_wp_page(struct vm_fault *vmf)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *old_page;
 
-	old_page = vm_normal_page(vma, vmf->address, orig_pte);
+	old_page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
 	if (!old_page) {
 		/*
 		 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
@@ -2368,10 +2366,10 @@ static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte)
 		 */
 		if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 				     (VM_WRITE|VM_SHARED))
-			return wp_pfn_shared(vmf, orig_pte);
+			return wp_pfn_shared(vmf);
 
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
-		return wp_page_copy(vmf, orig_pte, old_page);
+		return wp_page_copy(vmf, old_page);
 	}
 
 	/*
@@ -2386,7 +2384,7 @@ static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte)
 			lock_page(old_page);
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
-			if (!pte_same(*vmf->pte, orig_pte)) {
+			if (!pte_same(*vmf->pte, vmf->orig_pte)) {
 				unlock_page(old_page);
 				pte_unmap_unlock(vmf->pte, vmf->ptl);
 				put_page(old_page);
@@ -2406,12 +2404,12 @@ static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte)
 				page_move_anon_rmap(old_page, vma);
 			}
 			unlock_page(old_page);
-			return wp_page_reuse(vmf, orig_pte, old_page, 0, 0);
+			return wp_page_reuse(vmf, old_page, 0, 0);
 		}
 		unlock_page(old_page);
 	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 					(VM_WRITE|VM_SHARED))) {
-		return wp_page_shared(vmf, orig_pte, old_page);
+		return wp_page_shared(vmf, old_page);
 	}
 
 	/*
@@ -2420,7 +2418,7 @@ static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte)
 	get_page(old_page);
 
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
-	return wp_page_copy(vmf, orig_pte, old_page);
+	return wp_page_copy(vmf, old_page);
 }
 
 static void unmap_mapping_range_vma(struct vm_area_struct *vma,
@@ -2508,7 +2506,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
  * We return with the mmap_sem locked or unlocked in the same cases
  * as does filemap_fault().
  */
-int do_swap_page(struct vm_fault *vmf, pte_t orig_pte)
+int do_swap_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page, *swapcache;
@@ -2519,10 +2517,10 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte)
 	int exclusive = 0;
 	int ret = 0;
 
-	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, orig_pte))
+	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
 		goto out;
 
-	entry = pte_to_swp_entry(orig_pte);
+	entry = pte_to_swp_entry(vmf->orig_pte);
 	if (unlikely(non_swap_entry(entry))) {
 		if (is_migration_entry(entry)) {
 			migration_entry_wait(vma->vm_mm, vmf->pmd,
@@ -2530,7 +2528,7 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte)
 		} else if (is_hwpoison_entry(entry)) {
 			ret = VM_FAULT_HWPOISON;
 		} else {
-			print_bad_pte(vma, vmf->address, orig_pte, NULL);
+			print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL);
 			ret = VM_FAULT_SIGBUS;
 		}
 		goto out;
@@ -2547,7 +2545,7 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte)
 			 */
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
-			if (likely(pte_same(*vmf->pte, orig_pte)))
+			if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
 				ret = VM_FAULT_OOM;
 			delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 			goto unlock;
@@ -2604,7 +2602,7 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte)
 	 */
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 			&vmf->ptl);
-	if (unlikely(!pte_same(*vmf->pte, orig_pte)))
+	if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte)))
 		goto out_nomap;
 
 	if (unlikely(!PageUptodate(page))) {
@@ -2632,9 +2630,10 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte)
 		exclusive = RMAP_EXCLUSIVE;
 	}
 	flush_icache_page(vma, page);
-	if (pte_swp_soft_dirty(orig_pte))
+	if (pte_swp_soft_dirty(vmf->orig_pte))
 		pte = pte_mksoft_dirty(pte);
 	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+	vmf->orig_pte = pte;
 	if (page == swapcache) {
 		do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
 		mem_cgroup_commit_charge(page, memcg, true, false);
@@ -2664,7 +2663,7 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte)
 	}
 
 	if (vmf->flags & FAULT_FLAG_WRITE) {
-		ret |= do_wp_page(vmf, pte);
+		ret |= do_wp_page(vmf);
 		if (ret & VM_FAULT_ERROR)
 			ret &= VM_FAULT_ERROR;
 		goto out;
@@ -3363,7 +3362,7 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
 	return mpol_misplaced(page, vma, addr);
 }
 
-static int do_numa_page(struct vm_fault *vmf, pte_t pte)
+static int do_numa_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL;
@@ -3371,6 +3370,7 @@ static int do_numa_page(struct vm_fault *vmf, pte_t pte)
 	int last_cpupid;
 	int target_nid;
 	bool migrated = false;
+	pte_t pte = vmf->orig_pte;
 	bool was_writable = pte_write(pte);
 	int flags = 0;
 
@@ -3521,8 +3521,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
 		 * So now it's safe to run pte_offset_map().
 		 */
 		vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
-
-		entry = *vmf->pte;
+		vmf->orig_pte = *vmf->pte;
 
 		/*
 		 * some architectures can have larger ptes than wordsize,
@@ -3533,7 +3532,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
 		 * ptl lock held. So here a barrier will do.
 		 */
 		barrier();
-		if (pte_none(entry)) {
+		if (pte_none(vmf->orig_pte)) {
 			pte_unmap(vmf->pte);
 			vmf->pte = NULL;
 		}
@@ -3546,19 +3545,20 @@ static int handle_pte_fault(struct vm_fault *vmf)
 			return do_fault(vmf);
 	}
 
-	if (!pte_present(entry))
-		return do_swap_page(vmf, entry);
+	if (!pte_present(vmf->orig_pte))
+		return do_swap_page(vmf);
 
-	if (pte_protnone(entry) && vma_is_accessible(vmf->vma))
-		return do_numa_page(vmf, entry);
+	if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
+		return do_numa_page(vmf);
 
 	vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
 	spin_lock(vmf->ptl);
+	entry = vmf->orig_pte;
 	if (unlikely(!pte_same(*vmf->pte, entry)))
 		goto unlock;
 	if (vmf->flags & FAULT_FLAG_WRITE) {
 		if (!pte_write(entry))
-			return do_wp_page(vmf, entry);
+			return do_wp_page(vmf);
 		entry = pte_mkdirty(entry);
 	}
 	entry = pte_mkyoung(entry);
-- 
cgit v1.2.3


From 3917048d4572b9cabf6f8f5ad395eb693717367c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Dec 2016 15:07:18 -0800
Subject: mm: allow full handling of COW faults in ->fault handlers

Patch series "dax: Clear dirty bits after flushing caches", v5.

Patchset to clear dirty bits from radix tree of DAX inodes when caches
for corresponding pfns have been flushed.  In principle, these patches
enable handlers to easily update PTEs and do other work necessary to
finish the fault without duplicating the functionality present in the
generic code.  I'd like to thank Kirill and Ross for reviews of the
series!

This patch (of 20):

To allow full handling of COW faults add memcg field to struct vm_fault
and a return value of ->fault() handler meaning that COW fault is fully
handled and memcg charge must not be canceled.  This will allow us to
remove knowledge about special DAX locking from the generic fault code.

Link: http://lkml.kernel.org/r/1479460644-25076-9-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  4 +++-
 mm/memory.c        | 14 +++++++-------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 39c17a2efcea..6e25f4916d6f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -301,7 +301,8 @@ struct vm_fault {
 					 * the 'address' */
 	pte_t orig_pte;			/* Value of PTE at the time of fault */
 
-	struct page *cow_page;		/* Handler may choose to COW */
+	struct page *cow_page;		/* Page handler may use for COW fault */
+	struct mem_cgroup *memcg;	/* Cgroup cow_page belongs to */
 	struct page *page;		/* ->fault handlers should return a
 					 * page here, unless VM_FAULT_NOPAGE
 					 * is set (which is also implied by
@@ -1103,6 +1104,7 @@ static inline void clear_page_pfmemalloc(struct page *page)
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
 #define VM_FAULT_DAX_LOCKED 0x1000	/* ->fault has locked DAX entry */
+#define VM_FAULT_DONE_COW   0x2000	/* ->fault has fully handled COW */
 
 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
 
diff --git a/mm/memory.c b/mm/memory.c
index cf74f7ca911b..02504cd4ca0e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2844,9 +2844,8 @@ static int __do_fault(struct vm_fault *vmf)
 	int ret;
 
 	ret = vma->vm_ops->fault(vma, vmf);
-	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
-		return ret;
-	if (ret & VM_FAULT_DAX_LOCKED)
+	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
+			    VM_FAULT_DAX_LOCKED | VM_FAULT_DONE_COW)))
 		return ret;
 
 	if (unlikely(PageHWPoison(vmf->page))) {
@@ -3226,7 +3225,6 @@ static int do_read_fault(struct vm_fault *vmf)
 static int do_cow_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	struct mem_cgroup *memcg;
 	int ret;
 
 	if (unlikely(anon_vma_prepare(vma)))
@@ -3237,7 +3235,7 @@ static int do_cow_fault(struct vm_fault *vmf)
 		return VM_FAULT_OOM;
 
 	if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
-				&memcg, false)) {
+				&vmf->memcg, false)) {
 		put_page(vmf->cow_page);
 		return VM_FAULT_OOM;
 	}
@@ -3245,12 +3243,14 @@ static int do_cow_fault(struct vm_fault *vmf)
 	ret = __do_fault(vmf);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		goto uncharge_out;
+	if (ret & VM_FAULT_DONE_COW)
+		return ret;
 
 	if (!(ret & VM_FAULT_DAX_LOCKED))
 		copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
 	__SetPageUptodate(vmf->cow_page);
 
-	ret |= alloc_set_pte(vmf, memcg, vmf->cow_page);
+	ret |= alloc_set_pte(vmf, vmf->memcg, vmf->cow_page);
 	if (vmf->pte)
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 	if (!(ret & VM_FAULT_DAX_LOCKED)) {
@@ -3263,7 +3263,7 @@ static int do_cow_fault(struct vm_fault *vmf)
 		goto uncharge_out;
 	return ret;
 uncharge_out:
-	mem_cgroup_cancel_charge(vmf->cow_page, memcg, false);
+	mem_cgroup_cancel_charge(vmf->cow_page, vmf->memcg, false);
 	put_page(vmf->cow_page);
 	return ret;
 }
-- 
cgit v1.2.3


From 9118c0cbd44262d0015568266f314e645ed6b9ce Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Dec 2016 15:07:21 -0800
Subject: mm: factor out functionality to finish page faults

Introduce finish_fault() as a helper function for finishing page faults.
It is rather thin wrapper around alloc_set_pte() but since we'd want to
call this from DAX code or filesystems, it is still useful to avoid some
boilerplate code.

Link: http://lkml.kernel.org/r/1479460644-25076-10-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 mm/memory.c        | 44 +++++++++++++++++++++++++++++++++++---------
 2 files changed, 36 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6e25f4916d6f..60a230e6ece7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -620,6 +620,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 
 int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page);
+int finish_fault(struct vm_fault *vmf);
 #endif
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index 02504cd4ca0e..22f7f6e38515 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3074,6 +3074,38 @@ fault_handled:
 	return ret;
 }
 
+
+/**
+ * finish_fault - finish page fault once we have prepared the page to fault
+ *
+ * @vmf: structure describing the fault
+ *
+ * This function handles all that is needed to finish a page fault once the
+ * page to fault in is prepared. It handles locking of PTEs, inserts PTE for
+ * given page, adds reverse page mapping, handles memcg charges and LRU
+ * addition. The function returns 0 on success, VM_FAULT_ code in case of
+ * error.
+ *
+ * The function expects the page to be locked and on success it consumes a
+ * reference of a page being mapped (for the PTE which maps it).
+ */
+int finish_fault(struct vm_fault *vmf)
+{
+	struct page *page;
+	int ret;
+
+	/* Did we COW the page? */
+	if ((vmf->flags & FAULT_FLAG_WRITE) &&
+	    !(vmf->vma->vm_flags & VM_SHARED))
+		page = vmf->cow_page;
+	else
+		page = vmf->page;
+	ret = alloc_set_pte(vmf, vmf->memcg, page);
+	if (vmf->pte)
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+	return ret;
+}
+
 static unsigned long fault_around_bytes __read_mostly =
 	rounddown_pow_of_two(65536);
 
@@ -3213,9 +3245,7 @@ static int do_read_fault(struct vm_fault *vmf)
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		return ret;
 
-	ret |= alloc_set_pte(vmf, NULL, vmf->page);
-	if (vmf->pte)
-		pte_unmap_unlock(vmf->pte, vmf->ptl);
+	ret |= finish_fault(vmf);
 	unlock_page(vmf->page);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		put_page(vmf->page);
@@ -3250,9 +3280,7 @@ static int do_cow_fault(struct vm_fault *vmf)
 		copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
 	__SetPageUptodate(vmf->cow_page);
 
-	ret |= alloc_set_pte(vmf, vmf->memcg, vmf->cow_page);
-	if (vmf->pte)
-		pte_unmap_unlock(vmf->pte, vmf->ptl);
+	ret |= finish_fault(vmf);
 	if (!(ret & VM_FAULT_DAX_LOCKED)) {
 		unlock_page(vmf->page);
 		put_page(vmf->page);
@@ -3293,9 +3321,7 @@ static int do_shared_fault(struct vm_fault *vmf)
 		}
 	}
 
-	ret |= alloc_set_pte(vmf, NULL, vmf->page);
-	if (vmf->pte)
-		pte_unmap_unlock(vmf->pte, vmf->ptl);
+	ret |= finish_fault(vmf);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
 					VM_FAULT_RETRY))) {
 		unlock_page(vmf->page);
-- 
cgit v1.2.3


From b1aa812b21084285e9f6098639be9cd5bf9e05d7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Dec 2016 15:07:24 -0800
Subject: mm: move handling of COW faults into DAX code

Move final handling of COW faults from generic code into DAX fault
handler.  That way generic code doesn't have to be aware of
peculiarities of DAX locking so remove that knowledge and make locking
functions private to fs/dax.c.

Link: http://lkml.kernel.org/r/1479460644-25076-11-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c            | 61 +++++++++++++++++++++++++----------------------------
 include/linux/dax.h |  7 ------
 include/linux/mm.h  |  9 +-------
 mm/memory.c         | 13 ++++--------
 4 files changed, 34 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index f1dfae64b11a..e83aa4077df4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -240,6 +240,23 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 	}
 }
 
+static void dax_unlock_mapping_entry(struct address_space *mapping,
+				     pgoff_t index)
+{
+	void *entry, **slot;
+
+	spin_lock_irq(&mapping->tree_lock);
+	entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
+	if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
+			 !slot_locked(mapping, slot))) {
+		spin_unlock_irq(&mapping->tree_lock);
+		return;
+	}
+	unlock_slot(mapping, slot);
+	spin_unlock_irq(&mapping->tree_lock);
+	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
+}
+
 static void put_locked_mapping_entry(struct address_space *mapping,
 				     pgoff_t index, void *entry)
 {
@@ -433,22 +450,6 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
 }
 
-void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
-{
-	void *entry, **slot;
-
-	spin_lock_irq(&mapping->tree_lock);
-	entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
-	if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
-			 !slot_locked(mapping, slot))) {
-		spin_unlock_irq(&mapping->tree_lock);
-		return;
-	}
-	unlock_slot(mapping, slot);
-	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
-}
-
 /*
  * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
  * entry to get unlocked before deleting it.
@@ -500,10 +501,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 	/* This will replace locked radix tree entry with a hole page */
 	page = find_or_create_page(mapping, vmf->pgoff,
 				   vmf->gfp_mask | __GFP_ZERO);
-	if (!page) {
-		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+	if (!page)
 		return VM_FAULT_OOM;
-	}
 	vmf->page = page;
 	return VM_FAULT_LOCKED;
 }
@@ -954,7 +953,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	struct iomap iomap = { 0 };
 	unsigned flags = IOMAP_FAULT;
 	int error, major = 0;
-	int locked_status = 0;
+	int vmf_ret = 0;
 	void *entry;
 
 	/*
@@ -1007,13 +1006,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 		if (error)
 			goto finish_iomap;
-		if (!radix_tree_exceptional_entry(entry)) {
-			vmf->page = entry;
-			locked_status = VM_FAULT_LOCKED;
-		} else {
-			vmf->entry = entry;
-			locked_status = VM_FAULT_DAX_LOCKED;
-		}
+
+		__SetPageUptodate(vmf->cow_page);
+		vmf_ret = finish_fault(vmf);
+		if (!vmf_ret)
+			vmf_ret = VM_FAULT_DONE_COW;
 		goto finish_iomap;
 	}
 
@@ -1030,7 +1027,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
 		if (!(vmf->flags & FAULT_FLAG_WRITE)) {
-			locked_status = dax_load_hole(mapping, entry, vmf);
+			vmf_ret = dax_load_hole(mapping, entry, vmf);
 			break;
 		}
 		/*FALLTHRU*/
@@ -1042,7 +1039,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
  finish_iomap:
 	if (ops->iomap_end) {
-		if (error) {
+		if (error || (vmf_ret & VM_FAULT_ERROR)) {
 			/* keep previous error */
 			ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags,
 					&iomap);
@@ -1052,7 +1049,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		}
 	}
  unlock_entry:
-	if (!locked_status || error)
+	if (vmf_ret != VM_FAULT_LOCKED || error)
 		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  out:
 	if (error == -ENOMEM)
@@ -1060,9 +1057,9 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	/* -EBUSY is fine, somebody else faulted on the same PTE */
 	if (error < 0 && error != -EBUSY)
 		return VM_FAULT_SIGBUS | major;
-	if (locked_status) {
+	if (vmf_ret) {
 		WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */
-		return locked_status;
+		return vmf_ret;
 	}
 	return VM_FAULT_NOPAGE | major;
 }
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 0afade8bd3d7..f97bcfe79472 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -46,7 +46,6 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 
 #ifdef CONFIG_FS_DAX
 struct page *read_dax_sector(struct block_device *bdev, sector_t n);
-void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index);
 int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
 		unsigned int offset, unsigned int length);
 #else
@@ -55,12 +54,6 @@ static inline struct page *read_dax_sector(struct block_device *bdev,
 {
 	return ERR_PTR(-ENXIO);
 }
-/* Shouldn't ever be called when dax is disabled. */
-static inline void dax_unlock_mapping_entry(struct address_space *mapping,
-					    pgoff_t index)
-{
-	BUG();
-}
 static inline int __dax_zero_page_range(struct block_device *bdev,
 		sector_t sector, unsigned int offset, unsigned int length)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60a230e6ece7..59a4da1742e5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -308,12 +308,6 @@ struct vm_fault {
 					 * is set (which is also implied by
 					 * VM_FAULT_ERROR).
 					 */
-	void *entry;			/* ->fault handler can alternatively
-					 * return locked DAX entry. In that
-					 * case handler should return
-					 * VM_FAULT_DAX_LOCKED and fill in
-					 * entry here.
-					 */
 	/* These three entries are valid only while holding ptl lock */
 	pte_t *pte;			/* Pointer to pte entry matching
 					 * the 'address'. NULL if the page
@@ -1104,8 +1098,7 @@ static inline void clear_page_pfmemalloc(struct page *page)
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
-#define VM_FAULT_DAX_LOCKED 0x1000	/* ->fault has locked DAX entry */
-#define VM_FAULT_DONE_COW   0x2000	/* ->fault has fully handled COW */
+#define VM_FAULT_DONE_COW   0x1000	/* ->fault has fully handled COW */
 
 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
 
diff --git a/mm/memory.c b/mm/memory.c
index 22f7f6e38515..ca3b95fa5fd1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2845,7 +2845,7 @@ static int __do_fault(struct vm_fault *vmf)
 
 	ret = vma->vm_ops->fault(vma, vmf);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
-			    VM_FAULT_DAX_LOCKED | VM_FAULT_DONE_COW)))
+			    VM_FAULT_DONE_COW)))
 		return ret;
 
 	if (unlikely(PageHWPoison(vmf->page))) {
@@ -3276,17 +3276,12 @@ static int do_cow_fault(struct vm_fault *vmf)
 	if (ret & VM_FAULT_DONE_COW)
 		return ret;
 
-	if (!(ret & VM_FAULT_DAX_LOCKED))
-		copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
+	copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
 	__SetPageUptodate(vmf->cow_page);
 
 	ret |= finish_fault(vmf);
-	if (!(ret & VM_FAULT_DAX_LOCKED)) {
-		unlock_page(vmf->page);
-		put_page(vmf->page);
-	} else {
-		dax_unlock_mapping_entry(vma->vm_file->f_mapping, vmf->pgoff);
-	}
+	unlock_page(vmf->page);
+	put_page(vmf->page);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		goto uncharge_out;
 	return ret;
-- 
cgit v1.2.3


From 66a6197c118540d454913eef24d68d7491ab5d5f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Dec 2016 15:07:39 -0800
Subject: mm: provide helper for finishing mkwrite faults

Provide a helper function for finishing write faults due to PTE being
read-only.  The helper will be used by DAX to avoid the need of
complicating generic MM code with DAX locking specifics.

Link: http://lkml.kernel.org/r/1479460644-25076-16-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 mm/memory.c        | 67 ++++++++++++++++++++++++++++++++----------------------
 2 files changed, 41 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 59a4da1742e5..cec967e93f95 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -615,6 +615,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page);
 int finish_fault(struct vm_fault *vmf);
+int finish_mkwrite_fault(struct vm_fault *vmf);
 #endif
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index 82e7689e3059..bbc25da48a18 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2269,6 +2269,38 @@ oom:
 	return VM_FAULT_OOM;
 }
 
+/**
+ * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
+ *			  writeable once the page is prepared
+ *
+ * @vmf: structure describing the fault
+ *
+ * This function handles all that is needed to finish a write page fault in a
+ * shared mapping due to PTE being read-only once the mapped page is prepared.
+ * It handles locking of PTE and modifying it. The function returns
+ * VM_FAULT_WRITE on success, 0 when PTE got changed before we acquired PTE
+ * lock.
+ *
+ * The function expects the page to be locked or other protection against
+ * concurrent faults / writeback (such as DAX radix tree locks).
+ */
+int finish_mkwrite_fault(struct vm_fault *vmf)
+{
+	WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
+	vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,
+				       &vmf->ptl);
+	/*
+	 * We might have raced with another page fault while we released the
+	 * pte_offset_map_lock.
+	 */
+	if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		return 0;
+	}
+	wp_page_reuse(vmf);
+	return VM_FAULT_WRITE;
+}
+
 /*
  * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
  * mapping
@@ -2285,16 +2317,7 @@ static int wp_pfn_shared(struct vm_fault *vmf)
 		ret = vma->vm_ops->pfn_mkwrite(vma, vmf);
 		if (ret & VM_FAULT_ERROR)
 			return ret;
-		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-				vmf->address, &vmf->ptl);
-		/*
-		 * We might have raced with another page fault while we
-		 * released the pte_offset_map_lock.
-		 */
-		if (!pte_same(*vmf->pte, vmf->orig_pte)) {
-			pte_unmap_unlock(vmf->pte, vmf->ptl);
-			return 0;
-		}
+		return finish_mkwrite_fault(vmf);
 	}
 	wp_page_reuse(vmf);
 	return VM_FAULT_WRITE;
@@ -2304,7 +2327,6 @@ static int wp_page_shared(struct vm_fault *vmf)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int page_mkwrite = 0;
 
 	get_page(vmf->page);
 
@@ -2318,26 +2340,17 @@ static int wp_page_shared(struct vm_fault *vmf)
 			put_page(vmf->page);
 			return tmp;
 		}
-		/*
-		 * Since we dropped the lock we need to revalidate
-		 * the PTE as someone else may have changed it.  If
-		 * they did, we just return, as we can count on the
-		 * MMU to tell us if they didn't also make it writable.
-		 */
-		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-						vmf->address, &vmf->ptl);
-		if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+		tmp = finish_mkwrite_fault(vmf);
+		if (unlikely(!tmp || (tmp &
+				      (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
 			unlock_page(vmf->page);
-			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			put_page(vmf->page);
-			return 0;
+			return tmp;
 		}
-		page_mkwrite = 1;
-	}
-
-	wp_page_reuse(vmf);
-	if (!page_mkwrite)
+	} else {
+		wp_page_reuse(vmf);
 		lock_page(vmf->page);
+	}
 	fault_dirty_shared_page(vma, vmf->page);
 	put_page(vmf->page);
 
-- 
cgit v1.2.3


From cae1240257d9ba4b40eb240124c530de8ee349bc Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Dec 2016 15:07:45 -0800
Subject: mm: export follow_pte()

DAX will need to implement its own version of page_check_address().  To
avoid duplicating page table walking code, export follow_pte() which
does what we need.

Link: http://lkml.kernel.org/r/1479460644-25076-18-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 ++
 mm/memory.c        | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cec967e93f95..63926492c06a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1210,6 +1210,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
+int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
+	       spinlock_t **ptlp);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
 	unsigned long *pfn);
 int follow_phys(struct vm_area_struct *vma, unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index 8b7f0656a921..edd899d0decb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3817,8 +3817,8 @@ out:
 	return -EINVAL;
 }
 
-static inline int follow_pte(struct mm_struct *mm, unsigned long address,
-			     pte_t **ptepp, spinlock_t **ptlp)
+int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
+	       spinlock_t **ptlp)
 {
 	int res;
 
-- 
cgit v1.2.3


From 91d9c05ac6c788531136888d31ef18c6a0ec160f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 14 Dec 2016 15:08:34 -0800
Subject: radix-tree: move rcu_head into a union with private_list

I want to be able to reference node->parent after freeing node.

Currently node->parent is in a union with rcu_head, so it is overwritten
when the node is put on the RCU list.  We know that private_list is not
referenced after the node is freed, so it is safe for these two members
to share space.

Link: http://lkml.kernel.org/r/1480369871-5271-50-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 14 ++++----------
 lib/radix-tree.c           |  1 +
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 744486057e9e..d04073ac3a9f 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -85,18 +85,12 @@ struct radix_tree_node {
 	unsigned char	offset;		/* Slot offset in parent */
 	unsigned char	count;		/* Total entry count */
 	unsigned char	exceptional;	/* Exceptional entry count */
+	struct radix_tree_node *parent;		/* Used when ascending tree */
+	void *private_data;			/* For tree user */
 	union {
-		struct {
-			/* Used when ascending tree */
-			struct radix_tree_node *parent;
-			/* For tree user */
-			void *private_data;
-		};
-		/* Used when freeing node */
-		struct rcu_head	rcu_head;
+		struct list_head private_list;	/* For tree user */
+		struct rcu_head	rcu_head;	/* Used when freeing node */
 	};
-	/* For tree user */
-	struct list_head private_list;
 	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 60c10361cbfa..8c5911eae5e2 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -325,6 +325,7 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
 		tag_clear(node, i, 0);
 
 	node->slots[0] = NULL;
+	INIT_LIST_HEAD(&node->private_list);
 
 	kmem_cache_free(radix_tree_node_cachep, node);
 }
-- 
cgit v1.2.3


From 148deab223b23734069abcacb5c7118b0e7deadc Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 14 Dec 2016 15:08:49 -0800
Subject: radix-tree: improve multiorder iterators

This fixes several interlinked problems with the iterators in the
presence of multiorder entries.

1. radix_tree_iter_next() would only advance by one slot, which would
   result in the iterators returning the same entry more than once if
   there were sibling entries.

2. radix_tree_next_slot() could return an internal pointer instead of
   a user pointer if a tagged multiorder entry was immediately followed by
   an entry of lower order.

3. radix_tree_next_slot() expanded to a lot more code than it used to
   when multiorder support was compiled in.  And I wasn't comfortable with
   entry_to_node() being in a header file.

Fixing radix_tree_iter_next() for the presence of sibling entries
necessarily involves examining the contents of the radix tree, so we now
need to pass 'slot' to radix_tree_iter_next(), and we need to change the
calling convention so it is called *before* dropping the lock which
protects the tree.  Also rename it to radix_tree_iter_resume(), as some
people thought it was necessary to call radix_tree_iter_next() each time
around the loop.

radix_tree_next_slot() becomes closer to how it looked before multiorder
support was introduced.  It only checks to see if the next entry in the
chunk is a sibling entry or a pointer to a node; this should be rare
enough that handling this case out of line is not a performance impact
(and such impact is amortised by the fact that the entry we just
processed was a multiorder entry).  Also, radix_tree_next_slot() used to
force a new chunk lookup for untagged entries, which is more expensive
than the out of line sibling entry skipping.

Link: http://lkml.kernel.org/r/1480369871-5271-55-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/tests/btrfs-tests.c               |   2 +-
 include/linux/radix-tree.h                 |  71 +++++++--------
 lib/radix-tree.c                           | 138 +++++++++++++++++++++++++----
 mm/khugepaged.c                            |   7 +-
 mm/shmem.c                                 |   6 +-
 tools/testing/radix-tree/iteration_check.c |  12 +--
 tools/testing/radix-tree/multiorder.c      |  28 ++++--
 tools/testing/radix-tree/regression3.c     |   8 +-
 tools/testing/radix-tree/test.h            |   1 +
 9 files changed, 188 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 73076a0ea6a9..00ee006a8aa2 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -162,7 +162,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
 				slot = radix_tree_iter_retry(&iter);
 			continue;
 		}
-		slot = radix_tree_iter_next(&iter);
+		slot = radix_tree_iter_resume(slot, &iter);
 		spin_unlock(&fs_info->buffer_lock);
 		free_extent_buffer_stale(eb);
 		spin_lock(&fs_info->buffer_lock);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index d04073ac3a9f..289d007d487b 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -403,20 +403,17 @@ __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots)
 }
 
 /**
- * radix_tree_iter_next - resume iterating when the chunk may be invalid
- * @iter:	iterator state
+ * radix_tree_iter_resume - resume iterating when the chunk may be invalid
+ * @slot: pointer to current slot
+ * @iter: iterator state
+ * Returns: New slot pointer
  *
  * If the iterator needs to release then reacquire a lock, the chunk may
  * have been invalidated by an insertion or deletion.  Call this function
- * to continue the iteration from the next index.
+ * before releasing the lock to continue the iteration from the next index.
  */
-static inline __must_check
-void **radix_tree_iter_next(struct radix_tree_iter *iter)
-{
-	iter->next_index = __radix_tree_iter_add(iter, 1);
-	iter->tags = 0;
-	return NULL;
-}
+void **__must_check radix_tree_iter_resume(void **slot,
+					struct radix_tree_iter *iter);
 
 /**
  * radix_tree_chunk_size - get current chunk size
@@ -430,10 +427,17 @@ radix_tree_chunk_size(struct radix_tree_iter *iter)
 	return (iter->next_index - iter->index) >> iter_shift(iter);
 }
 
-static inline struct radix_tree_node *entry_to_node(void *ptr)
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
+				unsigned flags);
+#else
+/* Can't happen without sibling entries, but the compiler can't tell that */
+static inline void ** __radix_tree_next_slot(void **slot,
+				struct radix_tree_iter *iter, unsigned flags)
 {
-	return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
+	return slot;
 }
+#endif
 
 /**
  * radix_tree_next_slot - find next slot in chunk
@@ -447,7 +451,7 @@ static inline struct radix_tree_node *entry_to_node(void *ptr)
  * For tagged lookup it also eats @iter->tags.
  *
  * There are several cases where 'slot' can be passed in as NULL to this
- * function.  These cases result from the use of radix_tree_iter_next() or
+ * function.  These cases result from the use of radix_tree_iter_resume() or
  * radix_tree_iter_retry().  In these cases we don't end up dereferencing
  * 'slot' because either:
  * a) we are doing tagged iteration and iter->tags has been set to 0, or
@@ -458,51 +462,31 @@ static __always_inline void **
 radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 {
 	if (flags & RADIX_TREE_ITER_TAGGED) {
-		void *canon = slot;
-
 		iter->tags >>= 1;
 		if (unlikely(!iter->tags))
 			return NULL;
-		while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) &&
-					radix_tree_is_internal_node(slot[1])) {
-			if (entry_to_node(slot[1]) == canon) {
-				iter->tags >>= 1;
-				iter->index = __radix_tree_iter_add(iter, 1);
-				slot++;
-				continue;
-			}
-			iter->next_index = __radix_tree_iter_add(iter, 1);
-			return NULL;
-		}
 		if (likely(iter->tags & 1ul)) {
 			iter->index = __radix_tree_iter_add(iter, 1);
-			return slot + 1;
+			slot++;
+			goto found;
 		}
 		if (!(flags & RADIX_TREE_ITER_CONTIG)) {
 			unsigned offset = __ffs(iter->tags);
 
-			iter->tags >>= offset;
-			iter->index = __radix_tree_iter_add(iter, offset + 1);
-			return slot + offset + 1;
+			iter->tags >>= offset++;
+			iter->index = __radix_tree_iter_add(iter, offset);
+			slot += offset;
+			goto found;
 		}
 	} else {
 		long count = radix_tree_chunk_size(iter);
-		void *canon = slot;
 
 		while (--count > 0) {
 			slot++;
 			iter->index = __radix_tree_iter_add(iter, 1);
 
-			if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) &&
-			    radix_tree_is_internal_node(*slot)) {
-				if (entry_to_node(*slot) == canon)
-					continue;
-				iter->next_index = iter->index;
-				break;
-			}
-
 			if (likely(*slot))
-				return slot;
+				goto found;
 			if (flags & RADIX_TREE_ITER_CONTIG) {
 				/* forbid switching to the next chunk */
 				iter->next_index = 0;
@@ -511,6 +495,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 		}
 	}
 	return NULL;
+
+ found:
+	if (unlikely(radix_tree_is_internal_node(*slot)))
+		return __radix_tree_next_slot(slot, iter, flags);
+	return slot;
 }
 
 /**
@@ -561,6 +550,6 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 	     slot || (slot = radix_tree_next_chunk(root, iter,		\
 			      RADIX_TREE_ITER_TAGGED | tag)) ;		\
 	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_TAGGED))
+				RADIX_TREE_ITER_TAGGED | tag))
 
 #endif /* _LINUX_RADIX_TREE_H */
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index dbf4e8c8e100..0a7ac0fb4a65 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -69,6 +69,11 @@ struct radix_tree_preload {
 };
 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
 
+static inline struct radix_tree_node *entry_to_node(void *ptr)
+{
+	return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
+}
+
 static inline void *node_to_entry(void *ptr)
 {
 	return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
@@ -1104,6 +1109,120 @@ static inline void __set_iter_shift(struct radix_tree_iter *iter,
 #endif
 }
 
+/* Construct iter->tags bit-mask from node->tags[tag] array */
+static void set_iter_tags(struct radix_tree_iter *iter,
+				struct radix_tree_node *node, unsigned offset,
+				unsigned tag)
+{
+	unsigned tag_long = offset / BITS_PER_LONG;
+	unsigned tag_bit  = offset % BITS_PER_LONG;
+
+	iter->tags = node->tags[tag][tag_long] >> tag_bit;
+
+	/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
+	if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
+		/* Pick tags from next element */
+		if (tag_bit)
+			iter->tags |= node->tags[tag][tag_long + 1] <<
+						(BITS_PER_LONG - tag_bit);
+		/* Clip chunk size, here only BITS_PER_LONG tags */
+		iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
+	}
+}
+
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+static void **skip_siblings(struct radix_tree_node **nodep,
+			void **slot, struct radix_tree_iter *iter)
+{
+	void *sib = node_to_entry(slot - 1);
+
+	while (iter->index < iter->next_index) {
+		*nodep = rcu_dereference_raw(*slot);
+		if (*nodep && *nodep != sib)
+			return slot;
+		slot++;
+		iter->index = __radix_tree_iter_add(iter, 1);
+		iter->tags >>= 1;
+	}
+
+	*nodep = NULL;
+	return NULL;
+}
+
+void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
+					unsigned flags)
+{
+	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
+	struct radix_tree_node *node = rcu_dereference_raw(*slot);
+
+	slot = skip_siblings(&node, slot, iter);
+
+	while (radix_tree_is_internal_node(node)) {
+		unsigned offset;
+		unsigned long next_index;
+
+		if (node == RADIX_TREE_RETRY)
+			return slot;
+		node = entry_to_node(node);
+		iter->shift = node->shift;
+
+		if (flags & RADIX_TREE_ITER_TAGGED) {
+			offset = radix_tree_find_next_bit(node, tag, 0);
+			if (offset == RADIX_TREE_MAP_SIZE)
+				return NULL;
+			slot = &node->slots[offset];
+			iter->index = __radix_tree_iter_add(iter, offset);
+			set_iter_tags(iter, node, offset, tag);
+			node = rcu_dereference_raw(*slot);
+		} else {
+			offset = 0;
+			slot = &node->slots[0];
+			for (;;) {
+				node = rcu_dereference_raw(*slot);
+				if (node)
+					break;
+				slot++;
+				offset++;
+				if (offset == RADIX_TREE_MAP_SIZE)
+					return NULL;
+			}
+			iter->index = __radix_tree_iter_add(iter, offset);
+		}
+		if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0))
+			goto none;
+		next_index = (iter->index | shift_maxindex(iter->shift)) + 1;
+		if (next_index < iter->next_index)
+			iter->next_index = next_index;
+	}
+
+	return slot;
+ none:
+	iter->next_index = 0;
+	return NULL;
+}
+EXPORT_SYMBOL(__radix_tree_next_slot);
+#else
+static void **skip_siblings(struct radix_tree_node **nodep,
+			void **slot, struct radix_tree_iter *iter)
+{
+	return slot;
+}
+#endif
+
+void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter)
+{
+	struct radix_tree_node *node;
+
+	slot++;
+	iter->index = __radix_tree_iter_add(iter, 1);
+	node = rcu_dereference_raw(*slot);
+	skip_siblings(&node, slot, iter);
+	iter->next_index = iter->index;
+	iter->tags = 0;
+	return NULL;
+}
+EXPORT_SYMBOL(radix_tree_iter_resume);
+
 /**
  * radix_tree_next_chunk - find next chunk of slots for iteration
  *
@@ -1191,23 +1310,8 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 	iter->next_index = (index | node_maxindex(node)) + 1;
 	__set_iter_shift(iter, node->shift);
 
-	/* Construct iter->tags bit-mask from node->tags[tag] array */
-	if (flags & RADIX_TREE_ITER_TAGGED) {
-		unsigned tag_long, tag_bit;
-
-		tag_long = offset / BITS_PER_LONG;
-		tag_bit  = offset % BITS_PER_LONG;
-		iter->tags = node->tags[tag][tag_long] >> tag_bit;
-		/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
-		if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
-			/* Pick tags from next element */
-			if (tag_bit)
-				iter->tags |= node->tags[tag][tag_long + 1] <<
-						(BITS_PER_LONG - tag_bit);
-			/* Clip chunk size, here only BITS_PER_LONG tags */
-			iter->next_index = index + BITS_PER_LONG;
-		}
-	}
+	if (flags & RADIX_TREE_ITER_TAGGED)
+		set_iter_tags(iter, node, offset, tag);
 
 	return node->slots + offset;
 }
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 7434a63cac94..e32389a97030 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1446,7 +1446,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		radix_tree_replace_slot(&mapping->page_tree, slot,
 				new_page + (index % HPAGE_PMD_NR));
 
-		slot = radix_tree_iter_next(&iter);
+		slot = radix_tree_iter_resume(slot, &iter);
 		index++;
 		continue;
 out_lru:
@@ -1546,7 +1546,6 @@ tree_unlocked:
 				/* Put holes back where they were */
 				radix_tree_delete(&mapping->page_tree,
 						  iter.index);
-				slot = radix_tree_iter_next(&iter);
 				continue;
 			}
 
@@ -1557,11 +1556,11 @@ tree_unlocked:
 			page_ref_unfreeze(page, 2);
 			radix_tree_replace_slot(&mapping->page_tree,
 						slot, page);
+			slot = radix_tree_iter_resume(slot, &iter);
 			spin_unlock_irq(&mapping->tree_lock);
 			putback_lru_page(page);
 			unlock_page(page);
 			spin_lock_irq(&mapping->tree_lock);
-			slot = radix_tree_iter_next(&iter);
 		}
 		VM_BUG_ON(nr_none);
 		spin_unlock_irq(&mapping->tree_lock);
@@ -1641,8 +1640,8 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
 		present++;
 
 		if (need_resched()) {
+			slot = radix_tree_iter_resume(slot, &iter);
 			cond_resched_rcu();
-			slot = radix_tree_iter_next(&iter);
 		}
 	}
 	rcu_read_unlock();
diff --git a/mm/shmem.c b/mm/shmem.c
index abd7403aba41..be11c6dd414a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -661,8 +661,8 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 			swapped++;
 
 		if (need_resched()) {
+			slot = radix_tree_iter_resume(slot, &iter);
 			cond_resched_rcu();
-			slot = radix_tree_iter_next(&iter);
 		}
 	}
 
@@ -2447,8 +2447,8 @@ static void shmem_tag_pins(struct address_space *mapping)
 		}
 
 		if (need_resched()) {
+			slot = radix_tree_iter_resume(slot, &iter);
 			cond_resched_rcu();
-			slot = radix_tree_iter_next(&iter);
 		}
 	}
 	rcu_read_unlock();
@@ -2517,8 +2517,8 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 			spin_unlock_irq(&mapping->tree_lock);
 continue_resched:
 			if (need_resched()) {
+				slot = radix_tree_iter_resume(slot, &iter);
 				cond_resched_rcu();
-				slot = radix_tree_iter_next(&iter);
 			}
 		}
 		rcu_read_unlock();
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
index df71cb841385..f328a66899b4 100644
--- a/tools/testing/radix-tree/iteration_check.c
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -48,8 +48,8 @@ static void *add_entries_fn(void *arg)
 /*
  * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find
  * things that have been removed and randomly resetting our iteration to the
- * next chunk with radix_tree_iter_next().  Both radix_tree_iter_retry() and
- * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a
+ * next chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
  * NULL 'slot' variable.
  */
 static void *tagged_iteration_fn(void *arg)
@@ -79,7 +79,7 @@ static void *tagged_iteration_fn(void *arg)
 			}
 
 			if (rand_r(&seeds[0]) % 50 == 0) {
-				slot = radix_tree_iter_next(&iter);
+				slot = radix_tree_iter_resume(slot, &iter);
 				rcu_read_unlock();
 				rcu_barrier();
 				rcu_read_lock();
@@ -96,8 +96,8 @@ static void *tagged_iteration_fn(void *arg)
 /*
  * Iterate over the entries, doing a radix_tree_iter_retry() as we find things
  * that have been removed and randomly resetting our iteration to the next
- * chunk with radix_tree_iter_next().  Both radix_tree_iter_retry() and
- * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a
+ * chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
  * NULL 'slot' variable.
  */
 static void *untagged_iteration_fn(void *arg)
@@ -127,7 +127,7 @@ static void *untagged_iteration_fn(void *arg)
 			}
 
 			if (rand_r(&seeds[1]) % 50 == 0) {
-				slot = radix_tree_iter_next(&iter);
+				slot = radix_tree_iter_resume(slot, &iter);
 				rcu_read_unlock();
 				rcu_barrier();
 				rcu_read_lock();
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 8d5865c95664..b9be8856d652 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -231,11 +231,14 @@ void multiorder_iteration(void)
 		radix_tree_for_each_slot(slot, &tree, &iter, j) {
 			int height = order[i] / RADIX_TREE_MAP_SHIFT;
 			int shift = height * RADIX_TREE_MAP_SHIFT;
-			int mask = (1 << order[i]) - 1;
+			unsigned long mask = (1UL << order[i]) - 1;
+			struct item *item = *slot;
 
-			assert(iter.index >= (index[i] &~ mask));
-			assert(iter.index <= (index[i] | mask));
+			assert((iter.index | mask) == (index[i] | mask));
 			assert(iter.shift == shift);
+			assert(!radix_tree_is_internal_node(item));
+			assert((item->index | mask) == (index[i] | mask));
+			assert(item->order == order[i]);
 			i++;
 		}
 	}
@@ -269,7 +272,7 @@ void multiorder_tagged_iteration(void)
 		assert(radix_tree_tag_set(&tree, tag_index[i], 1));
 
 	for (j = 0; j < 256; j++) {
-		int mask, k;
+		int k;
 
 		for (i = 0; i < TAG_ENTRIES; i++) {
 			for (k = i; index[k] < tag_index[i]; k++)
@@ -279,12 +282,16 @@ void multiorder_tagged_iteration(void)
 		}
 
 		radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) {
+			unsigned long mask;
+			struct item *item = *slot;
 			for (k = i; index[k] < tag_index[i]; k++)
 				;
-			mask = (1 << order[k]) - 1;
+			mask = (1UL << order[k]) - 1;
 
-			assert(iter.index >= (tag_index[i] &~ mask));
-			assert(iter.index <= (tag_index[i] | mask));
+			assert((iter.index | mask) == (tag_index[i] | mask));
+			assert(!radix_tree_is_internal_node(item));
+			assert((item->index | mask) == (tag_index[i] | mask));
+			assert(item->order == order[k]);
 			i++;
 		}
 	}
@@ -303,12 +310,15 @@ void multiorder_tagged_iteration(void)
 		}
 
 		radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) {
+			struct item *item = *slot;
 			for (k = i; index[k] < tag_index[i]; k++)
 				;
 			mask = (1 << order[k]) - 1;
 
-			assert(iter.index >= (tag_index[i] &~ mask));
-			assert(iter.index <= (tag_index[i] | mask));
+			assert((iter.index | mask) == (tag_index[i] | mask));
+			assert(!radix_tree_is_internal_node(item));
+			assert((item->index | mask) == (tag_index[i] | mask));
+			assert(item->order == order[k]);
 			i++;
 		}
 	}
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
index 1f06ed73d0a8..b594841fae85 100644
--- a/tools/testing/radix-tree/regression3.c
+++ b/tools/testing/radix-tree/regression3.c
@@ -5,7 +5,7 @@
  * In following radix_tree_next_slot current chunk size becomes zero.
  * This isn't checked and it tries to dereference null pointer in slot.
  *
- * Helper radix_tree_iter_next reset slot to NULL and next_index to index + 1,
+ * Helper radix_tree_iter_resume reset slot to NULL and next_index to index + 1,
  * for tagger iteraction it also must reset cached tags in iterator to abort
  * next radix_tree_next_slot and go to slow-path into radix_tree_next_chunk.
  *
@@ -88,7 +88,7 @@ void regression3_test(void)
 		printf("slot %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
 			printf("next at %ld\n", iter.index);
-			slot = radix_tree_iter_next(&iter);
+			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
 
@@ -96,7 +96,7 @@ void regression3_test(void)
 		printf("contig %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
 			printf("next at %ld\n", iter.index);
-			slot = radix_tree_iter_next(&iter);
+			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
 
@@ -106,7 +106,7 @@ void regression3_test(void)
 		printf("tagged %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
 			printf("next at %ld\n", iter.index);
-			slot = radix_tree_iter_next(&iter);
+			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
 
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 423c528aaee9..617416ec3c5e 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -41,6 +41,7 @@ void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag);
 extern int nr_allocated;
 
 /* Normally private parts of lib/radix-tree.c */
+struct radix_tree_node *entry_to_node(void *ptr);
 void radix_tree_dump(struct radix_tree_root *root);
 int root_tag_get(struct radix_tree_root *root, unsigned int tag);
 unsigned long node_maxindex(struct radix_tree_node *);
-- 
cgit v1.2.3


From 478922e2b0f41567e4a530771bfb3f693f857d45 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 14 Dec 2016 15:08:52 -0800
Subject: radix-tree: delete radix_tree_locate_item()

This rather complicated function can be better implemented as an
iterator.  It has only one caller, so move the functionality to the only
place that needs it.  Update the test suite to follow the same pattern.

Link: http://lkml.kernel.org/r/1480369871-5271-56-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Konstantin Khlebnikov <koct9i@gmail.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h      |  1 -
 lib/radix-tree.c                | 99 -----------------------------------------
 mm/shmem.c                      | 26 ++++++++++-
 tools/testing/radix-tree/main.c |  8 ++--
 tools/testing/radix-tree/test.c | 22 +++++++++
 tools/testing/radix-tree/test.h |  2 +
 6 files changed, 53 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 289d007d487b..a13d3f7c6c65 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -296,7 +296,6 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 		unsigned long nr_to_tag,
 		unsigned int fromtag, unsigned int totag);
 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
 
 static inline void radix_tree_preload_end(void)
 {
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 0a7ac0fb4a65..5bdf1bdbca00 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1579,105 +1579,6 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
 
-#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
-#include <linux/sched.h> /* for cond_resched() */
-
-struct locate_info {
-	unsigned long found_index;
-	bool stop;
-};
-
-/*
- * This linear search is at present only useful to shmem_unuse_inode().
- */
-static unsigned long __locate(struct radix_tree_node *slot, void *item,
-			      unsigned long index, struct locate_info *info)
-{
-	unsigned long i;
-
-	do {
-		unsigned int shift = slot->shift;
-
-		for (i = (index >> shift) & RADIX_TREE_MAP_MASK;
-		     i < RADIX_TREE_MAP_SIZE;
-		     i++, index += (1UL << shift)) {
-			struct radix_tree_node *node =
-					rcu_dereference_raw(slot->slots[i]);
-			if (node == RADIX_TREE_RETRY)
-				goto out;
-			if (!radix_tree_is_internal_node(node)) {
-				if (node == item) {
-					info->found_index = index;
-					info->stop = true;
-					goto out;
-				}
-				continue;
-			}
-			node = entry_to_node(node);
-			if (is_sibling_entry(slot, node))
-				continue;
-			slot = node;
-			break;
-		}
-	} while (i < RADIX_TREE_MAP_SIZE);
-
-out:
-	if ((index == 0) && (i == RADIX_TREE_MAP_SIZE))
-		info->stop = true;
-	return index;
-}
-
-/**
- *	radix_tree_locate_item - search through radix tree for item
- *	@root:		radix tree root
- *	@item:		item to be found
- *
- *	Returns index where item was found, or -1 if not found.
- *	Caller must hold no lock (since this time-consuming function needs
- *	to be preemptible), and must check afterwards if item is still there.
- */
-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
-{
-	struct radix_tree_node *node;
-	unsigned long max_index;
-	unsigned long cur_index = 0;
-	struct locate_info info = {
-		.found_index = -1,
-		.stop = false,
-	};
-
-	do {
-		rcu_read_lock();
-		node = rcu_dereference_raw(root->rnode);
-		if (!radix_tree_is_internal_node(node)) {
-			rcu_read_unlock();
-			if (node == item)
-				info.found_index = 0;
-			break;
-		}
-
-		node = entry_to_node(node);
-
-		max_index = node_maxindex(node);
-		if (cur_index > max_index) {
-			rcu_read_unlock();
-			break;
-		}
-
-		cur_index = __locate(node, item, cur_index, &info);
-		rcu_read_unlock();
-		cond_resched();
-	} while (!info.stop && cur_index <= max_index);
-
-	return info.found_index;
-}
-#else
-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
-{
-	return -1;
-}
-#endif /* CONFIG_SHMEM && CONFIG_SWAP */
-
 /**
  *	__radix_tree_delete_node    -    try to free node after clearing a slot
  *	@root:		radix tree root
diff --git a/mm/shmem.c b/mm/shmem.c
index be11c6dd414a..54287d443806 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1049,6 +1049,30 @@ static void shmem_evict_inode(struct inode *inode)
 	clear_inode(inode);
 }
 
+static unsigned long find_swap_entry(struct radix_tree_root *root, void *item)
+{
+	struct radix_tree_iter iter;
+	void **slot;
+	unsigned long found = -1;
+	unsigned int checked = 0;
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, root, &iter, 0) {
+		if (*slot == item) {
+			found = iter.index;
+			break;
+		}
+		checked++;
+		if ((checked % 4096) != 0)
+			continue;
+		slot = radix_tree_iter_resume(slot, &iter);
+		cond_resched_rcu();
+	}
+
+	rcu_read_unlock();
+	return found;
+}
+
 /*
  * If swap found in inode, free it and move page from swapcache to filecache.
  */
@@ -1062,7 +1086,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 	int error = 0;
 
 	radswap = swp_to_radix_entry(swap);
-	index = radix_tree_locate_item(&mapping->page_tree, radswap);
+	index = find_swap_entry(&mapping->page_tree, radswap);
 	if (index == -1)
 		return -EAGAIN;	/* tell shmem_unuse we found nothing */
 
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 76d9c95aa487..a028dae8a043 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -239,7 +239,7 @@ static void __locate_check(struct radix_tree_root *tree, unsigned long index,
 
 	item_insert_order(tree, index, order);
 	item = item_lookup(tree, index);
-	index2 = radix_tree_locate_item(tree, item);
+	index2 = find_item(tree, item);
 	if (index != index2) {
 		printf("index %ld order %d inserted; found %ld\n",
 			index, order, index2);
@@ -273,17 +273,17 @@ static void locate_check(void)
 			     index += (1UL << order)) {
 				__locate_check(&tree, index + offset, order);
 			}
-			if (radix_tree_locate_item(&tree, &tree) != -1)
+			if (find_item(&tree, &tree) != -1)
 				abort();
 
 			item_kill_tree(&tree);
 		}
 	}
 
-	if (radix_tree_locate_item(&tree, &tree) != -1)
+	if (find_item(&tree, &tree) != -1)
 		abort();
 	__locate_check(&tree, -1, 0);
-	if (radix_tree_locate_item(&tree, &tree) != -1)
+	if (find_item(&tree, &tree) != -1)
 		abort();
 	item_kill_tree(&tree);
 }
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index 0de548939a2e..88bf57f7175e 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -151,6 +151,28 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 	assert(nfound == 0);
 }
 
+/* Use the same pattern as find_swap_entry() in mm/shmem.c */
+unsigned long find_item(struct radix_tree_root *root, void *item)
+{
+	struct radix_tree_iter iter;
+	void **slot;
+	unsigned long found = -1;
+	unsigned long checked = 0;
+
+	radix_tree_for_each_slot(slot, root, &iter, 0) {
+		if (*slot == item) {
+			found = iter.index;
+			break;
+		}
+		checked++;
+		if ((checked % 4) != 0)
+			continue;
+		slot = radix_tree_iter_resume(slot, &iter);
+	}
+
+	return found;
+}
+
 static int verify_node(struct radix_tree_node *slot, unsigned int tag,
 			int tagged)
 {
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 617416ec3c5e..3d9d1d30da22 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -25,6 +25,8 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 			unsigned long nr, int chunk);
 void item_kill_tree(struct radix_tree_root *root);
 
+unsigned long find_item(struct radix_tree_root *, void *item);
+
 void tag_check(void);
 void multiorder_checks(void);
 void iteration_test(void);
-- 
cgit v1.2.3


From 268f42de718128cd0301293177e79c08c38e39a6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 14 Dec 2016 15:08:55 -0800
Subject: radix-tree: delete radix_tree_range_tag_if_tagged()

This is an exceptionally complicated function with just one caller
(tag_pages_for_writeback).  We devote a large portion of the runtime of
the test suite to testing this one function which has one caller.  By
introducing the new function radix_tree_iter_tag_set(), we can eliminate
all of the complexity while keeping the performance.  The caller can now
use a fairly standard radix_tree_for_each() loop, and it doesn't need to
worry about tricksy things like 'start' wrapping.

The test suite continues to spend a large amount of time investigating
this function, but now it's testing the underlying primitives such as
radix_tree_iter_resume() and the radix_tree_for_each_tagged() iterator
which are also used by other parts of the kernel.

Link: http://lkml.kernel.org/r/1480369871-5271-57-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h             |  74 ++++++++++-----------
 lib/radix-tree.c                       | 117 ++++++---------------------------
 mm/page-writeback.c                    |  28 +++++---
 tools/testing/radix-tree/main.c        |  12 ++--
 tools/testing/radix-tree/multiorder.c  |  13 ++--
 tools/testing/radix-tree/regression2.c |   3 +-
 tools/testing/radix-tree/tag_check.c   |   4 +-
 tools/testing/radix-tree/test.c        |  34 ++++++++++
 tools/testing/radix-tree/test.h        |   3 +
 9 files changed, 125 insertions(+), 163 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index a13d3f7c6c65..7a8d2516c73a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -120,6 +120,41 @@ static inline bool radix_tree_empty(struct radix_tree_root *root)
 	return root->rnode == NULL;
 }
 
+/**
+ * struct radix_tree_iter - radix tree iterator state
+ *
+ * @index:	index of current slot
+ * @next_index:	one beyond the last index for this chunk
+ * @tags:	bit-mask for tag-iterating
+ * @node:	node that contains current slot
+ * @shift:	shift for the node that holds our slots
+ *
+ * This radix tree iterator works in terms of "chunks" of slots.  A chunk is a
+ * subinterval of slots contained within one radix tree leaf node.  It is
+ * described by a pointer to its first slot and a struct radix_tree_iter
+ * which holds the chunk's position in the tree and its size.  For tagged
+ * iteration radix_tree_iter also holds the slots' bit-mask for one chosen
+ * radix tree tag.
+ */
+struct radix_tree_iter {
+	unsigned long	index;
+	unsigned long	next_index;
+	unsigned long	tags;
+	struct radix_tree_node *node;
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+	unsigned int	shift;
+#endif
+};
+
+static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
+{
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+	return iter->shift;
+#else
+	return 0;
+#endif
+}
+
 /**
  * Radix-tree synchronization
  *
@@ -283,6 +318,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
 int radix_tree_tag_get(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
+void radix_tree_iter_tag_set(struct radix_tree_root *root,
+		const struct radix_tree_iter *iter, unsigned int tag);
 unsigned int
 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 		unsigned long first_index, unsigned int max_items,
@@ -291,10 +328,6 @@ unsigned int
 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag);
-unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
-		unsigned long *first_indexp, unsigned long last_index,
-		unsigned long nr_to_tag,
-		unsigned int fromtag, unsigned int totag);
 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
 
 static inline void radix_tree_preload_end(void)
@@ -302,39 +335,6 @@ static inline void radix_tree_preload_end(void)
 	preempt_enable();
 }
 
-/**
- * struct radix_tree_iter - radix tree iterator state
- *
- * @index:	index of current slot
- * @next_index:	one beyond the last index for this chunk
- * @tags:	bit-mask for tag-iterating
- * @shift:	shift for the node that holds our slots
- *
- * This radix tree iterator works in terms of "chunks" of slots.  A chunk is a
- * subinterval of slots contained within one radix tree leaf node.  It is
- * described by a pointer to its first slot and a struct radix_tree_iter
- * which holds the chunk's position in the tree and its size.  For tagged
- * iteration radix_tree_iter also holds the slots' bit-mask for one chosen
- * radix tree tag.
- */
-struct radix_tree_iter {
-	unsigned long	index;
-	unsigned long	next_index;
-	unsigned long	tags;
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	unsigned int	shift;
-#endif
-};
-
-static inline unsigned int iter_shift(struct radix_tree_iter *iter)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	return iter->shift;
-#else
-	return 0;
-#endif
-}
-
 #define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
 #define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
 #define RADIX_TREE_ITER_CONTIG		0x0200	/* stop at first hole */
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 5bdf1bdbca00..d1f4ca5b7989 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -219,6 +219,11 @@ radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
 	return RADIX_TREE_MAP_SIZE;
 }
 
+static unsigned int iter_offset(const struct radix_tree_iter *iter)
+{
+	return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK;
+}
+
 /*
  * The maximum index which can be stored in a radix tree
  */
@@ -1014,6 +1019,18 @@ static void node_tag_set(struct radix_tree_root *root,
 		root_tag_set(root, tag);
 }
 
+/**
+ * radix_tree_iter_tag_set - set a tag on the current iterator entry
+ * @root:	radix tree root
+ * @iter:	iterator state
+ * @tag:	tag to set
+ */
+void radix_tree_iter_tag_set(struct radix_tree_root *root,
+			const struct radix_tree_iter *iter, unsigned int tag)
+{
+	node_tag_set(root, iter->node, tag, iter_offset(iter));
+}
+
 /**
  *	radix_tree_tag_clear - clear a tag on a radix tree node
  *	@root:		radix tree root
@@ -1164,6 +1181,7 @@ void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
 		if (node == RADIX_TREE_RETRY)
 			return slot;
 		node = entry_to_node(node);
+		iter->node = node;
 		iter->shift = node->shift;
 
 		if (flags & RADIX_TREE_ITER_TAGGED) {
@@ -1266,6 +1284,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 		iter->index = index;
 		iter->next_index = maxindex + 1;
 		iter->tags = 1;
+		iter->node = NULL;
 		__set_iter_shift(iter, 0);
 		return (void **)&root->rnode;
 	}
@@ -1308,6 +1327,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 	/* Update the iterator state */
 	iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
 	iter->next_index = (index | node_maxindex(node)) + 1;
+	iter->node = node;
 	__set_iter_shift(iter, node->shift);
 
 	if (flags & RADIX_TREE_ITER_TAGGED)
@@ -1317,103 +1337,6 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_next_chunk);
 
-/**
- * radix_tree_range_tag_if_tagged - for each item in given range set given
- *				   tag if item has another tag set
- * @root:		radix tree root
- * @first_indexp:	pointer to a starting index of a range to scan
- * @last_index:		last index of a range to scan
- * @nr_to_tag:		maximum number items to tag
- * @iftag:		tag index to test
- * @settag:		tag index to set if tested tag is set
- *
- * This function scans range of radix tree from first_index to last_index
- * (inclusive).  For each item in the range if iftag is set, the function sets
- * also settag. The function stops either after tagging nr_to_tag items or
- * after reaching last_index.
- *
- * The tags must be set from the leaf level only and propagated back up the
- * path to the root. We must do this so that we resolve the full path before
- * setting any tags on intermediate nodes. If we set tags as we descend, then
- * we can get to the leaf node and find that the index that has the iftag
- * set is outside the range we are scanning. This reults in dangling tags and
- * can lead to problems with later tag operations (e.g. livelocks on lookups).
- *
- * The function returns the number of leaves where the tag was set and sets
- * *first_indexp to the first unscanned index.
- * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
- * be prepared to handle that.
- */
-unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
-		unsigned long *first_indexp, unsigned long last_index,
-		unsigned long nr_to_tag,
-		unsigned int iftag, unsigned int settag)
-{
-	struct radix_tree_node *node, *child;
-	unsigned long maxindex;
-	unsigned long tagged = 0;
-	unsigned long index = *first_indexp;
-
-	radix_tree_load_root(root, &child, &maxindex);
-	last_index = min(last_index, maxindex);
-	if (index > last_index)
-		return 0;
-	if (!nr_to_tag)
-		return 0;
-	if (!root_tag_get(root, iftag)) {
-		*first_indexp = last_index + 1;
-		return 0;
-	}
-	if (!radix_tree_is_internal_node(child)) {
-		*first_indexp = last_index + 1;
-		root_tag_set(root, settag);
-		return 1;
-	}
-
-	node = entry_to_node(child);
-
-	for (;;) {
-		unsigned offset = radix_tree_descend(node, &child, index);
-		if (!child)
-			goto next;
-		if (!tag_get(node, iftag, offset))
-			goto next;
-		/* Sibling slots never have tags set on them */
-		if (radix_tree_is_internal_node(child)) {
-			node = entry_to_node(child);
-			continue;
-		}
-
-		tagged++;
-		node_tag_set(root, node, settag, offset);
- next:
-		/* Go to next entry in node */
-		index = ((index >> node->shift) + 1) << node->shift;
-		/* Overflow can happen when last_index is ~0UL... */
-		if (index > last_index || !index)
-			break;
-		offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
-		while (offset == 0) {
-			/*
-			 * We've fully scanned this node. Go up. Because
-			 * last_index is guaranteed to be in the tree, what
-			 * we do below cannot wander astray.
-			 */
-			node = node->parent;
-			offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
-		}
-		if (is_sibling_entry(node, node->slots[offset]))
-			goto next;
-		if (tagged >= nr_to_tag)
-			break;
-	}
-
-	*first_indexp = index;
-
-	return tagged;
-}
-EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
-
 /**
  *	radix_tree_gang_lookup - perform multiple lookup on a radix tree
  *	@root:		radix tree root
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 52e2f8e3b472..290e8b7d3181 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2106,18 +2106,26 @@ void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end)
 {
 #define WRITEBACK_TAG_BATCH 4096
-	unsigned long tagged;
-
-	do {
-		spin_lock_irq(&mapping->tree_lock);
-		tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
-				&start, end, WRITEBACK_TAG_BATCH,
-				PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
+	unsigned long tagged = 0;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	spin_lock_irq(&mapping->tree_lock);
+	radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start,
+							PAGECACHE_TAG_DIRTY) {
+		if (iter.index > end)
+			break;
+		radix_tree_iter_tag_set(&mapping->page_tree, &iter,
+							PAGECACHE_TAG_TOWRITE);
+		tagged++;
+		if ((tagged % WRITEBACK_TAG_BATCH) != 0)
+			continue;
+		slot = radix_tree_iter_resume(slot, &iter);
 		spin_unlock_irq(&mapping->tree_lock);
-		WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
 		cond_resched();
-		/* We check 'start' to handle wrapping when end == ~0UL */
-	} while (tagged >= WRITEBACK_TAG_BATCH && start);
+		spin_lock_irq(&mapping->tree_lock);
+	}
+	spin_unlock_irq(&mapping->tree_lock);
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index a028dae8a043..170175ca4105 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -205,8 +205,7 @@ void copy_tag_check(void)
 	}
 
 //	printf("\ncopying tags...\n");
-	cur = start;
-	tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, ITEMS, 0, 1);
+	tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1);
 
 //	printf("checking copied tags\n");
 	assert(tagged == count);
@@ -214,16 +213,13 @@ void copy_tag_check(void)
 
 	/* Copy tags in several rounds */
 //	printf("\ncopying tags...\n");
-	cur = start;
-	do {
-		tmp = rand() % (count/10+2);
-		tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, tmp, 0, 2);
-	} while (tmp == tagged);
+	tmp = rand() % (count / 10 + 2);
+	tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2);
+	assert(tagged == count);
 
 //	printf("%lu %lu %lu\n", tagged, tmp, count);
 //	printf("checking copied tags\n");
 	check_copied_tags(&tree, start, end, idx, ITEMS, 0, 2);
-	assert(tagged < tmp);
 	verify_tag_consistency(&tree, 0);
 	verify_tag_consistency(&tree, 1);
 	verify_tag_consistency(&tree, 2);
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index b9be8856d652..86daf23b3509 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -26,7 +26,6 @@ static void __multiorder_tag_test(int index, int order)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
 	int base, err, i;
-	unsigned long first = 0;
 
 	/* our canonical entry */
 	base = index & ~((1 << order) - 1);
@@ -60,7 +59,7 @@ static void __multiorder_tag_test(int index, int order)
 		assert(!radix_tree_tag_get(&tree, i, 1));
 	}
 
-	assert(radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, 10, 0, 1) == 1);
+	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1);
 	assert(radix_tree_tag_clear(&tree, index, 0));
 
 	for_each_index(i, base, order) {
@@ -251,7 +250,6 @@ void multiorder_tagged_iteration(void)
 	RADIX_TREE(tree, GFP_KERNEL);
 	struct radix_tree_iter iter;
 	void **slot;
-	unsigned long first = 0;
 	int i, j;
 
 	printf("Multiorder tagged iteration test\n");
@@ -296,8 +294,8 @@ void multiorder_tagged_iteration(void)
 		}
 	}
 
-	radix_tree_range_tag_if_tagged(&tree, &first, ~0UL,
-					MT_NUM_ENTRIES, 1, 2);
+	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) ==
+				TAG_ENTRIES);
 
 	for (j = 0; j < 256; j++) {
 		int mask, k;
@@ -323,9 +321,8 @@ void multiorder_tagged_iteration(void)
 		}
 	}
 
-	first = 1;
-	radix_tree_range_tag_if_tagged(&tree, &first, ~0UL,
-					MT_NUM_ENTRIES, 1, 0);
+	assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0)
+			== TAG_ENTRIES);
 	i = 0;
 	radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) {
 		assert(iter.index == tag_index[i]);
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
index 63bf347aaf33..a41325d7a170 100644
--- a/tools/testing/radix-tree/regression2.c
+++ b/tools/testing/radix-tree/regression2.c
@@ -50,6 +50,7 @@
 #include <stdio.h>
 
 #include "regression.h"
+#include "test.h"
 
 #define PAGECACHE_TAG_DIRTY     0
 #define PAGECACHE_TAG_WRITEBACK 1
@@ -90,7 +91,7 @@ void regression2_test(void)
 	/* 1. */
 	start = 0;
 	end = max_slots - 2;
-	radix_tree_range_tag_if_tagged(&mt_tree, &start, end, 1,
+	tag_tagged_items(&mt_tree, NULL, start, end, 1,
 				PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
 
 	/* 2. */
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index 186f6e4914e4..ed5f87d1dce2 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -23,7 +23,7 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
 	item_tag_set(tree, index, tag);
 	ret = item_tag_get(tree, index, tag);
 	assert(ret != 0);
-	ret = radix_tree_range_tag_if_tagged(tree, &first, ~0UL, 10, tag, !tag);
+	ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag);
 	assert(ret == 1);
 	ret = item_tag_get(tree, index, !tag);
 	assert(ret != 0);
@@ -320,7 +320,7 @@ static void single_check(void)
 	assert(ret == 0);
 	verify_tag_consistency(&tree, 0);
 	verify_tag_consistency(&tree, 1);
-	ret = radix_tree_range_tag_if_tagged(&tree, &first, 10, 10, 0, 1);
+	ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
 	assert(ret == 1);
 	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
 	assert(ret == 1);
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index 88bf57f7175e..e5726e373646 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -151,6 +151,40 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 	assert(nfound == 0);
 }
 
+/* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */
+int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock,
+			unsigned long start, unsigned long end, unsigned batch,
+			unsigned iftag, unsigned thentag)
+{
+	unsigned long tagged = 0;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	if (batch == 0)
+		batch = 1;
+
+	if (lock)
+		pthread_mutex_lock(lock);
+	radix_tree_for_each_tagged(slot, root, &iter, start, iftag) {
+		if (iter.index > end)
+			break;
+		radix_tree_iter_tag_set(root, &iter, thentag);
+		tagged++;
+		if ((tagged % batch) != 0)
+			continue;
+		slot = radix_tree_iter_resume(slot, &iter);
+		if (lock) {
+			pthread_mutex_unlock(lock);
+			rcu_barrier();
+			pthread_mutex_lock(lock);
+		}
+	}
+	if (lock)
+		pthread_mutex_unlock(lock);
+
+	return tagged;
+}
+
 /* Use the same pattern as find_swap_entry() in mm/shmem.c */
 unsigned long find_item(struct radix_tree_root *root, void *item)
 {
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 3d9d1d30da22..e11e4d260b4e 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -25,6 +25,9 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 			unsigned long nr, int chunk);
 void item_kill_tree(struct radix_tree_root *root);
 
+int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
+			unsigned long start, unsigned long end, unsigned batch,
+			unsigned iftag, unsigned thentag);
 unsigned long find_item(struct radix_tree_root *, void *item);
 
 void tag_check(void);
-- 
cgit v1.2.3


From 175542f575723e43f897ddb09d0011c13f7cf0ec Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Wed, 14 Dec 2016 15:08:58 -0800
Subject: radix-tree: add radix_tree_join

This new function allows for the replacement of many smaller entries in
the radix tree with one larger multiorder entry.  From the point of view
of an RCU walker, they may see a mixture of the smaller entries and the
large entry during the same walk, but they will never see NULL for an
index which was populated before the join.

Link: http://lkml.kernel.org/r/1480369871-5271-58-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h            |   3 +
 lib/radix-tree.c                      | 183 ++++++++++++++++++++++++++++------
 tools/testing/radix-tree/multiorder.c |  58 +++++++++++
 3 files changed, 213 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7a8d2516c73a..935293a24f7d 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -335,6 +335,9 @@ static inline void radix_tree_preload_end(void)
 	preempt_enable();
 }
 
+int radix_tree_join(struct radix_tree_root *, unsigned long index,
+			unsigned new_order, void *);
+
 #define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
 #define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
 #define RADIX_TREE_ITER_CONTIG		0x0200	/* stop at first hole */
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d1f4ca5b7989..962cfb3a7671 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -339,17 +339,14 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
 {
 	struct radix_tree_node *node =
 			container_of(head, struct radix_tree_node, rcu_head);
-	int i;
 
 	/*
-	 * must only free zeroed nodes into the slab. radix_tree_shrink
-	 * can leave us with a non-NULL entry in the first slot, so clear
-	 * that here to make sure.
+	 * Must only free zeroed nodes into the slab.  We can be left with
+	 * non-NULL entries by radix_tree_free_nodes, so clear the entries
+	 * and tags here.
 	 */
-	for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
-		tag_clear(node, i, 0);
-
-	node->slots[0] = NULL;
+	memset(node->slots, 0, sizeof(node->slots));
+	memset(node->tags, 0, sizeof(node->tags));
 	INIT_LIST_HEAD(&node->private_list);
 
 	kmem_cache_free(radix_tree_node_cachep, node);
@@ -678,14 +675,14 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 	shift = radix_tree_load_root(root, &child, &maxindex);
 
 	/* Make sure the tree is high enough.  */
+	if (order > 0 && max == ((1UL << order) - 1))
+		max++;
 	if (max > maxindex) {
 		int error = radix_tree_extend(root, max, shift);
 		if (error < 0)
 			return error;
 		shift = error;
 		child = root->rnode;
-		if (order == shift)
-			shift += RADIX_TREE_MAP_SHIFT;
 	}
 
 	while (shift > order) {
@@ -697,6 +694,8 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 				return -ENOMEM;
 			child->shift = shift;
 			child->offset = offset;
+			child->count = 0;
+			child->exceptional = 0;
 			child->parent = node;
 			rcu_assign_pointer(*slot, node_to_entry(child));
 			if (node)
@@ -710,31 +709,121 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 		slot = &node->slots[offset];
 	}
 
+	if (nodep)
+		*nodep = node;
+	if (slotp)
+		*slotp = slot;
+	return 0;
+}
+
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-	/* Insert pointers to the canonical entry */
-	if (order > shift) {
-		unsigned i, n = 1 << (order - shift);
+/*
+ * Free any nodes below this node.  The tree is presumed to not need
+ * shrinking, and any user data in the tree is presumed to not need a
+ * destructor called on it.  If we need to add a destructor, we can
+ * add that functionality later.  Note that we may not clear tags or
+ * slots from the tree as an RCU walker may still have a pointer into
+ * this subtree.  We could replace the entries with RADIX_TREE_RETRY,
+ * but we'll still have to clear those in rcu_free.
+ */
+static void radix_tree_free_nodes(struct radix_tree_node *node)
+{
+	unsigned offset = 0;
+	struct radix_tree_node *child = entry_to_node(node);
+
+	for (;;) {
+		void *entry = child->slots[offset];
+		if (radix_tree_is_internal_node(entry) &&
+					!is_sibling_entry(child, entry)) {
+			child = entry_to_node(entry);
+			offset = 0;
+			continue;
+		}
+		offset++;
+		while (offset == RADIX_TREE_MAP_SIZE) {
+			struct radix_tree_node *old = child;
+			offset = child->offset + 1;
+			child = child->parent;
+			radix_tree_node_free(old);
+			if (old == entry_to_node(node))
+				return;
+		}
+	}
+}
+
+static inline int insert_entries(struct radix_tree_node *node, void **slot,
+				void *item, unsigned order, bool replace)
+{
+	struct radix_tree_node *child;
+	unsigned i, n, tag, offset, tags = 0;
+
+	if (node) {
+		n = 1 << (order - node->shift);
+		offset = get_slot_offset(node, slot);
+	} else {
+		n = 1;
+		offset = 0;
+	}
+
+	if (n > 1) {
 		offset = offset & ~(n - 1);
 		slot = &node->slots[offset];
-		child = node_to_entry(slot);
-		for (i = 0; i < n; i++) {
-			if (slot[i])
+	}
+	child = node_to_entry(slot);
+
+	for (i = 0; i < n; i++) {
+		if (slot[i]) {
+			if (replace) {
+				node->count--;
+				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+					if (tag_get(node, tag, offset + i))
+						tags |= 1 << tag;
+			} else
 				return -EEXIST;
 		}
+	}
 
-		for (i = 1; i < n; i++) {
+	for (i = 0; i < n; i++) {
+		struct radix_tree_node *old = slot[i];
+		if (i) {
 			rcu_assign_pointer(slot[i], child);
-			node->count++;
+			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+				if (tags & (1 << tag))
+					tag_clear(node, tag, offset + i);
+		} else {
+			rcu_assign_pointer(slot[i], item);
+			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+				if (tags & (1 << tag))
+					tag_set(node, tag, offset);
 		}
+		if (radix_tree_is_internal_node(old) &&
+					!is_sibling_entry(node, old))
+			radix_tree_free_nodes(old);
+		if (radix_tree_exceptional_entry(old))
+			node->exceptional--;
 	}
-#endif
-
-	if (nodep)
-		*nodep = node;
-	if (slotp)
-		*slotp = slot;
-	return 0;
+	if (node) {
+		node->count += n;
+		if (radix_tree_exceptional_entry(item))
+			node->exceptional += n;
+	}
+	return n;
 }
+#else
+static inline int insert_entries(struct radix_tree_node *node, void **slot,
+				void *item, unsigned order, bool replace)
+{
+	if (*slot)
+		return -EEXIST;
+	rcu_assign_pointer(*slot, item);
+	if (node) {
+		node->count++;
+		if (radix_tree_exceptional_entry(item))
+			node->exceptional++;
+	}
+	return 1;
+}
+#endif
 
 /**
  *	__radix_tree_insert    -    insert into a radix tree
@@ -757,15 +846,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
 	error = __radix_tree_create(root, index, order, &node, &slot);
 	if (error)
 		return error;
-	if (*slot != NULL)
-		return -EEXIST;
-	rcu_assign_pointer(*slot, item);
+
+	error = insert_entries(node, slot, item, order, false);
+	if (error < 0)
+		return error;
 
 	if (node) {
 		unsigned offset = get_slot_offset(node, slot);
-		node->count++;
-		if (radix_tree_exceptional_entry(item))
-			node->exceptional++;
 		BUG_ON(tag_get(node, 0, offset));
 		BUG_ON(tag_get(node, 1, offset));
 		BUG_ON(tag_get(node, 2, offset));
@@ -942,6 +1029,40 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
 	replace_slot(root, NULL, slot, item, true);
 }
 
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+/**
+ * radix_tree_join - replace multiple entries with one multiorder entry
+ * @root: radix tree root
+ * @index: an index inside the new entry
+ * @order: order of the new entry
+ * @item: new entry
+ *
+ * Call this function to replace several entries with one larger entry.
+ * The existing entries are presumed to not need freeing as a result of
+ * this call.
+ *
+ * The replacement entry will have all the tags set on it that were set
+ * on any of the entries it is replacing.
+ */
+int radix_tree_join(struct radix_tree_root *root, unsigned long index,
+			unsigned order, void *item)
+{
+	struct radix_tree_node *node;
+	void **slot;
+	int error;
+
+	BUG_ON(radix_tree_is_internal_node(item));
+
+	error = __radix_tree_create(root, index, order, &node, &slot);
+	if (!error)
+		error = insert_entries(node, slot, item, order, true);
+	if (error > 0)
+		error = 0;
+
+	return error;
+}
+#endif
+
 /**
  *	radix_tree_tag_set - set a tag on a radix tree node
  *	@root:		radix tree root
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 86daf23b3509..c9f656cf5f52 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -332,6 +332,63 @@ void multiorder_tagged_iteration(void)
 	item_kill_tree(&tree);
 }
 
+static void __multiorder_join(unsigned long index,
+				unsigned order1, unsigned order2)
+{
+	unsigned long loc;
+	void *item, *item2 = item_create(index + 1, order1);
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert_order(&tree, index, order2);
+	item = radix_tree_lookup(&tree, index);
+	radix_tree_join(&tree, index + 1, order1, item2);
+	loc = find_item(&tree, item);
+	if (loc == -1)
+		free(item);
+	item = radix_tree_lookup(&tree, index + 1);
+	assert(item == item2);
+	item_kill_tree(&tree);
+}
+
+static void __multiorder_join2(unsigned order1, unsigned order2)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	struct radix_tree_node *node;
+	void *item1 = item_create(0, order1);
+	void *item2;
+
+	item_insert_order(&tree, 0, order2);
+	radix_tree_insert(&tree, 1 << order2, (void *)0x12UL);
+	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
+	assert(item2 == (void *)0x12UL);
+	assert(node->exceptional == 1);
+
+	radix_tree_join(&tree, 0, order1, item1);
+	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
+	assert(item2 == item1);
+	assert(node->exceptional == 0);
+	item_kill_tree(&tree);
+}
+
+static void multiorder_join(void)
+{
+	int i, j, idx;
+
+	for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
+		for (i = 1; i < 15; i++) {
+			for (j = 0; j < i; j++) {
+				__multiorder_join(idx, i, j);
+			}
+		}
+	}
+
+	for (i = 1; i < 15; i++) {
+		for (j = 0; j < i; j++) {
+			__multiorder_join2(i, j);
+		}
+	}
+}
+
 void multiorder_checks(void)
 {
 	int i;
@@ -349,4 +406,5 @@ void multiorder_checks(void)
 	multiorder_tag_tests();
 	multiorder_iteration();
 	multiorder_tagged_iteration();
+	multiorder_join();
 }
-- 
cgit v1.2.3


From e157b555945fb16ddc6cce605a1eb6b4135ea5f1 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Wed, 14 Dec 2016 15:09:01 -0800
Subject: radix-tree: add radix_tree_split

This new function splits a larger multiorder entry into smaller entries
(potentially multi-order entries).  These entries are initialised to
RADIX_TREE_RETRY to ensure that RCU walkers who see this state aren't
confused.  The caller should then call radix_tree_for_each_slot() and
radix_tree_replace_slot() in order to turn these retry entries into the
intended new entries.  Tags are replicated from the original multiorder
entry into each new entry.

Link: http://lkml.kernel.org/r/1480369871-5271-59-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h            |  12 +++
 lib/radix-tree.c                      | 142 +++++++++++++++++++++++++++++++++-
 tools/testing/radix-tree/multiorder.c |  64 +++++++++++++++
 3 files changed, 214 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 935293a24f7d..1f4b56120de8 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -80,6 +80,14 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
 					  RADIX_TREE_MAP_SHIFT))
 
+/*
+ * @count is the count of every non-NULL element in the ->slots array
+ * whether that is an exceptional entry, a retry entry, a user pointer,
+ * a sibling entry or a pointer to the next level of the tree.
+ * @exceptional is the count of every element in ->slots which is
+ * either radix_tree_exceptional_entry() or is a sibling entry for an
+ * exceptional entry.
+ */
 struct radix_tree_node {
 	unsigned char	shift;		/* Bits remaining in each slot */
 	unsigned char	offset;		/* Slot offset in parent */
@@ -293,6 +301,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
 			  void **slot, void *item,
 			  radix_tree_update_node_t update_node, void *private);
+void radix_tree_iter_replace(struct radix_tree_root *,
+		const struct radix_tree_iter *, void **slot, void *item);
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void **slot, void *item);
 void __radix_tree_delete_node(struct radix_tree_root *root,
@@ -335,6 +345,8 @@ static inline void radix_tree_preload_end(void)
 	preempt_enable();
 }
 
+int radix_tree_split(struct radix_tree_root *, unsigned long index,
+			unsigned new_order);
 int radix_tree_join(struct radix_tree_root *, unsigned long index,
 			unsigned new_order, void *);
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 962cfb3a7671..ade2ed3f5190 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -22,6 +22,7 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -758,7 +759,10 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
 	unsigned i, n, tag, offset, tags = 0;
 
 	if (node) {
-		n = 1 << (order - node->shift);
+		if (order > node->shift)
+			n = 1 << (order - node->shift);
+		else
+			n = 1;
 		offset = get_slot_offset(node, slot);
 	} else {
 		n = 1;
@@ -797,7 +801,8 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
 					tag_set(node, tag, offset);
 		}
 		if (radix_tree_is_internal_node(old) &&
-					!is_sibling_entry(node, old))
+					!is_sibling_entry(node, old) &&
+					(old != RADIX_TREE_RETRY))
 			radix_tree_free_nodes(old);
 		if (radix_tree_exceptional_entry(old))
 			node->exceptional--;
@@ -1021,7 +1026,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
  * NOTE: This cannot be used to switch between non-entries (empty slots),
  * regular entries, and exceptional entries, as that requires accounting
  * inside the radix tree node. When switching from one type of entry or
- * deleting, use __radix_tree_lookup() and __radix_tree_replace().
+ * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
+ * radix_tree_iter_replace().
  */
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void **slot, void *item)
@@ -1029,6 +1035,21 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
 	replace_slot(root, NULL, slot, item, true);
 }
 
+/**
+ * radix_tree_iter_replace - replace item in a slot
+ * @root:	radix tree root
+ * @slot:	pointer to slot
+ * @item:	new item to store in the slot.
+ *
+ * For use with radix_tree_split() and radix_tree_for_each_slot().
+ * Caller must hold tree write locked across split and replacement.
+ */
+void radix_tree_iter_replace(struct radix_tree_root *root,
+		const struct radix_tree_iter *iter, void **slot, void *item)
+{
+	__radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
+}
+
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 /**
  * radix_tree_join - replace multiple entries with one multiorder entry
@@ -1061,6 +1082,117 @@ int radix_tree_join(struct radix_tree_root *root, unsigned long index,
 
 	return error;
 }
+
+/**
+ * radix_tree_split - Split an entry into smaller entries
+ * @root: radix tree root
+ * @index: An index within the large entry
+ * @order: Order of new entries
+ *
+ * Call this function as the first step in replacing a multiorder entry
+ * with several entries of lower order.  After this function returns,
+ * loop over the relevant portion of the tree using radix_tree_for_each_slot()
+ * and call radix_tree_iter_replace() to set up each new entry.
+ *
+ * The tags from this entry are replicated to all the new entries.
+ *
+ * The radix tree should be locked against modification during the entire
+ * replacement operation.  Lock-free lookups will see RADIX_TREE_RETRY which
+ * should prompt RCU walkers to restart the lookup from the root.
+ */
+int radix_tree_split(struct radix_tree_root *root, unsigned long index,
+				unsigned order)
+{
+	struct radix_tree_node *parent, *node, *child;
+	void **slot;
+	unsigned int offset, end;
+	unsigned n, tag, tags = 0;
+
+	if (!__radix_tree_lookup(root, index, &parent, &slot))
+		return -ENOENT;
+	if (!parent)
+		return -ENOENT;
+
+	offset = get_slot_offset(parent, slot);
+
+	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+		if (tag_get(parent, tag, offset))
+			tags |= 1 << tag;
+
+	for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
+		if (!is_sibling_entry(parent, parent->slots[end]))
+			break;
+		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+			if (tags & (1 << tag))
+				tag_set(parent, tag, end);
+		/* rcu_assign_pointer ensures tags are set before RETRY */
+		rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
+	}
+	rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
+	parent->exceptional -= (end - offset);
+
+	if (order == parent->shift)
+		return 0;
+	if (order > parent->shift) {
+		while (offset < end)
+			offset += insert_entries(parent, &parent->slots[offset],
+					RADIX_TREE_RETRY, order, true);
+		return 0;
+	}
+
+	node = parent;
+
+	for (;;) {
+		if (node->shift > order) {
+			child = radix_tree_node_alloc(root);
+			if (!child)
+				goto nomem;
+			child->shift = node->shift - RADIX_TREE_MAP_SHIFT;
+			child->offset = offset;
+			child->count = 0;
+			child->parent = node;
+			if (node != parent) {
+				node->count++;
+				node->slots[offset] = node_to_entry(child);
+				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+					if (tags & (1 << tag))
+						tag_set(node, tag, offset);
+			}
+
+			node = child;
+			offset = 0;
+			continue;
+		}
+
+		n = insert_entries(node, &node->slots[offset],
+					RADIX_TREE_RETRY, order, false);
+		BUG_ON(n > RADIX_TREE_MAP_SIZE);
+
+		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+			if (tags & (1 << tag))
+				tag_set(node, tag, offset);
+		offset += n;
+
+		while (offset == RADIX_TREE_MAP_SIZE) {
+			if (node == parent)
+				break;
+			offset = node->offset;
+			child = node;
+			node = node->parent;
+			rcu_assign_pointer(node->slots[offset],
+						node_to_entry(child));
+			offset++;
+		}
+		if ((node == parent) && (offset == end))
+			return 0;
+	}
+
+ nomem:
+	/* Shouldn't happen; did user forget to preload? */
+	/* TODO: free all the allocated nodes */
+	WARN_ON(1);
+	return -ENOMEM;
+}
 #endif
 
 /**
@@ -1441,8 +1573,10 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 			child = rcu_dereference_raw(node->slots[offset]);
 		}
 
-		if ((child == NULL) || (child == RADIX_TREE_RETRY))
+		if (!child)
 			goto restart;
+		if (child == RADIX_TREE_RETRY)
+			break;
 	} while (radix_tree_is_internal_node(child));
 
 	/* Update the iterator state */
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index c9f656cf5f52..fa6effe997a3 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -389,6 +389,69 @@ static void multiorder_join(void)
 	}
 }
 
+static void __multiorder_split(int old_order, int new_order)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	void **slot;
+	struct radix_tree_iter iter;
+	struct radix_tree_node *node;
+	void *item;
+
+	item_insert_order(&tree, 0, old_order);
+	radix_tree_tag_set(&tree, 0, 2);
+	radix_tree_split(&tree, 0, new_order);
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_iter_replace(&tree, &iter, slot,
+					item_create(iter.index, new_order));
+	}
+
+	item_kill_tree(&tree);
+
+	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+	item = __radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(item == (void *)0x12);
+	assert(node->exceptional > 0);
+
+	radix_tree_split(&tree, 0, new_order);
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_iter_replace(&tree, &iter, slot,
+					item_create(iter.index, new_order));
+	}
+
+	item = __radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(item != (void *)0x12);
+	assert(node->exceptional == 0);
+
+	item_kill_tree(&tree);
+
+	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+	item = __radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(item == (void *)0x12);
+	assert(node->exceptional > 0);
+
+	radix_tree_split(&tree, 0, new_order);
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
+	}
+
+	item = __radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(item == (void *)0x16);
+	assert(node->exceptional > 0);
+
+	item_kill_tree(&tree);
+}
+
+static void multiorder_split(void)
+{
+	int i, j;
+
+	for (i = 9; i < 19; i++)
+		for (j = 0; j < i; j++)
+			__multiorder_split(i, j);
+}
+
 void multiorder_checks(void)
 {
 	int i;
@@ -407,4 +470,5 @@ void multiorder_checks(void)
 	multiorder_iteration();
 	multiorder_tagged_iteration();
 	multiorder_join();
+	multiorder_split();
 }
-- 
cgit v1.2.3


From 2791653a6814d170fa893344618563a7b1da95c6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Wed, 14 Dec 2016 15:09:04 -0800
Subject: radix-tree: add radix_tree_split_preload()

Calculate how many nodes we need to allocate to split an old_order entry
into multiple entries, each of size new_order.  The test suite checks
that we allocated exactly the right number of nodes; neither too many
(checked by rtp->nr == 0), nor too few (checked by comparing
nr_allocated before and after the call to radix_tree_split()).

Link: http://lkml.kernel.org/r/1480369871-5271-60-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h            |  1 +
 lib/radix-tree.c                      | 24 +++++++++++++++++++-
 tools/testing/radix-tree/multiorder.c | 42 +++++++++++++++++++++++++++++++++--
 tools/testing/radix-tree/test.h       |  5 +++++
 4 files changed, 69 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 1f4b56120de8..5dea8f6440e4 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -345,6 +345,7 @@ static inline void radix_tree_preload_end(void)
 	preempt_enable();
 }
 
+int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
 int radix_tree_split(struct radix_tree_root *, unsigned long index,
 			unsigned new_order);
 int radix_tree_join(struct radix_tree_root *, unsigned long index,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index ade2ed3f5190..be1183e62590 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -368,7 +368,7 @@ radix_tree_node_free(struct radix_tree_node *node)
  * To make use of this facility, the radix tree must be initialised without
  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  */
-static int __radix_tree_preload(gfp_t gfp_mask, int nr)
+static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
 {
 	struct radix_tree_preload *rtp;
 	struct radix_tree_node *node;
@@ -434,6 +434,28 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+/*
+ * Preload with enough objects to ensure that we can split a single entry
+ * of order @old_order into many entries of size @new_order
+ */
+int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
+							gfp_t gfp_mask)
+{
+	unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
+	unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
+				(new_order / RADIX_TREE_MAP_SHIFT);
+	unsigned nr = 0;
+
+	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
+	BUG_ON(new_order >= old_order);
+
+	while (layers--)
+		nr = nr * RADIX_TREE_MAP_SIZE + 1;
+	return __radix_tree_preload(gfp_mask, top * nr);
+}
+#endif
+
 /*
  * The same as function above, but preload number of nodes required to insert
  * (1 << order) continuous naturally-aligned elements.
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index fa6effe997a3..5421f015f46c 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -389,35 +389,67 @@ static void multiorder_join(void)
 	}
 }
 
+static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
+{
+	struct radix_tree_preload *rtp = &radix_tree_preloads;
+	if (rtp->nr != 0)
+		printf("split(%u %u) remaining %u\n", old_order, new_order,
+							rtp->nr);
+	/*
+	 * Can't check for equality here as some nodes may have been
+	 * RCU-freed while we ran.  But we should never finish with more
+	 * nodes allocated since they should have all been preloaded.
+	 */
+	if (nr_allocated > alloc)
+		printf("split(%u %u) allocated %u %u\n", old_order, new_order,
+							alloc, nr_allocated);
+}
+
 static void __multiorder_split(int old_order, int new_order)
 {
-	RADIX_TREE(tree, GFP_KERNEL);
+	RADIX_TREE(tree, GFP_ATOMIC);
 	void **slot;
 	struct radix_tree_iter iter;
 	struct radix_tree_node *node;
 	void *item;
+	unsigned alloc;
+
+	radix_tree_preload(GFP_KERNEL);
+	assert(item_insert_order(&tree, 0, old_order) == 0);
+	radix_tree_preload_end();
+
+	/* Wipe out the preloaded cache or it'll confuse check_mem() */
+	radix_tree_cpu_dead(0);
 
-	item_insert_order(&tree, 0, old_order);
 	radix_tree_tag_set(&tree, 0, 2);
+
+	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
+	alloc = nr_allocated;
 	radix_tree_split(&tree, 0, new_order);
+	check_mem(old_order, new_order, alloc);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
 		radix_tree_iter_replace(&tree, &iter, slot,
 					item_create(iter.index, new_order));
 	}
+	radix_tree_preload_end();
 
 	item_kill_tree(&tree);
 
+	radix_tree_preload(GFP_KERNEL);
 	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+	radix_tree_preload_end();
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == (void *)0x12);
 	assert(node->exceptional > 0);
 
+	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
 		radix_tree_iter_replace(&tree, &iter, slot,
 					item_create(iter.index, new_order));
 	}
+	radix_tree_preload_end();
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item != (void *)0x12);
@@ -425,16 +457,20 @@ static void __multiorder_split(int old_order, int new_order)
 
 	item_kill_tree(&tree);
 
+	radix_tree_preload(GFP_KERNEL);
 	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+	radix_tree_preload_end();
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == (void *)0x12);
 	assert(node->exceptional > 0);
 
+	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
 		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
 	}
+	radix_tree_preload_end();
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == (void *)0x16);
@@ -471,4 +507,6 @@ void multiorder_checks(void)
 	multiorder_tagged_iteration();
 	multiorder_join();
 	multiorder_split();
+
+	radix_tree_cpu_dead(0);
 }
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index e11e4d260b4e..7c2611caa6d2 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -52,3 +52,8 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag);
 unsigned long node_maxindex(struct radix_tree_node *);
 unsigned long shift_maxindex(unsigned int shift);
 int radix_tree_cpu_dead(unsigned int cpu);
+struct radix_tree_preload {
+	unsigned nr;
+	struct radix_tree_node *nodes;
+};
+extern struct radix_tree_preload radix_tree_preloads;
-- 
cgit v1.2.3


From 99c494077e2d4282a17120a772eecc00ec3004cc Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Wed, 14 Dec 2016 15:09:13 -0800
Subject: idr: add ida_is_empty

Two of the USB Gadgets were poking around in the internals of struct ida
in order to determine if it is empty.  Add the appropriate abstraction.

Link: http://lkml.kernel.org/r/1480369871-5271-63-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Acked-by: Konstantin Khlebnikov <koct9i@gmail.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/gadget/function/f_hid.c     | 6 +++---
 drivers/usb/gadget/function/f_printer.c | 6 +++---
 include/linux/idr.h                     | 5 +++++
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 7abd70b2a588..3151d2a0fe59 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -905,7 +905,7 @@ static void hidg_free_inst(struct usb_function_instance *f)
 	mutex_lock(&hidg_ida_lock);
 
 	hidg_put_minor(opts->minor);
-	if (idr_is_empty(&hidg_ida.idr))
+	if (ida_is_empty(&hidg_ida))
 		ghid_cleanup();
 
 	mutex_unlock(&hidg_ida_lock);
@@ -931,7 +931,7 @@ static struct usb_function_instance *hidg_alloc_inst(void)
 
 	mutex_lock(&hidg_ida_lock);
 
-	if (idr_is_empty(&hidg_ida.idr)) {
+	if (ida_is_empty(&hidg_ida)) {
 		status = ghid_setup(NULL, HIDG_MINORS);
 		if (status)  {
 			ret = ERR_PTR(status);
@@ -944,7 +944,7 @@ static struct usb_function_instance *hidg_alloc_inst(void)
 	if (opts->minor < 0) {
 		ret = ERR_PTR(opts->minor);
 		kfree(opts);
-		if (idr_is_empty(&hidg_ida.idr))
+		if (ida_is_empty(&hidg_ida))
 			ghid_cleanup();
 		goto unlock;
 	}
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index 0de36cda6e41..8054da9276dd 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -1265,7 +1265,7 @@ static void gprinter_free_inst(struct usb_function_instance *f)
 	mutex_lock(&printer_ida_lock);
 
 	gprinter_put_minor(opts->minor);
-	if (idr_is_empty(&printer_ida.idr))
+	if (ida_is_empty(&printer_ida))
 		gprinter_cleanup();
 
 	mutex_unlock(&printer_ida_lock);
@@ -1289,7 +1289,7 @@ static struct usb_function_instance *gprinter_alloc_inst(void)
 
 	mutex_lock(&printer_ida_lock);
 
-	if (idr_is_empty(&printer_ida.idr)) {
+	if (ida_is_empty(&printer_ida)) {
 		status = gprinter_setup(PRINTER_MINORS);
 		if (status) {
 			ret = ERR_PTR(status);
@@ -1302,7 +1302,7 @@ static struct usb_function_instance *gprinter_alloc_inst(void)
 	if (opts->minor < 0) {
 		ret = ERR_PTR(opts->minor);
 		kfree(opts);
-		if (idr_is_empty(&printer_ida.idr))
+		if (ida_is_empty(&printer_ida))
 			gprinter_cleanup();
 		goto unlock;
 	}
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 083d61e92706..3639a28188c9 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -195,6 +195,11 @@ static inline int ida_get_new(struct ida *ida, int *p_id)
 	return ida_get_new_above(ida, 0, p_id);
 }
 
+static inline bool ida_is_empty(struct ida *ida)
+{
+	return idr_is_empty(&ida->idr);
+}
+
 void __init idr_init_cache(void);
 
 #endif /* __IDR_H__ */
-- 
cgit v1.2.3


From 444306129a920015a2cc876d13fcbf52382f39bd Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 14 Dec 2016 15:09:19 -0800
Subject: rxrpc: abstract away knowledge of IDR internals

Add idr_get_cursor() / idr_set_cursor() APIs, and remove the reference
to IDR_SIZE.

Link: http://lkml.kernel.org/r/1480369871-5271-65-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h     | 26 ++++++++++++++++++++++++++
 net/rxrpc/af_rxrpc.c    | 11 ++++++-----
 net/rxrpc/conn_client.c |  4 ++--
 3 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3639a28188c9..1eb755f77f2f 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -55,6 +55,32 @@ struct idr {
 }
 #define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
 
+/**
+ * idr_get_cursor - Return the current position of the cyclic allocator
+ * @idr: idr handle
+ *
+ * The value returned is the value that will be next returned from
+ * idr_alloc_cyclic() if it is free (otherwise the search will start from
+ * this position).
+ */
+static inline unsigned int idr_get_cursor(struct idr *idr)
+{
+	return READ_ONCE(idr->cur);
+}
+
+/**
+ * idr_set_cursor - Set the current position of the cyclic allocator
+ * @idr: idr handle
+ * @val: new position
+ *
+ * The next call to idr_alloc_cyclic() will return @val if it is free
+ * (otherwise the search will start from this position).
+ */
+static inline void idr_set_cursor(struct idr *idr, unsigned int val)
+{
+	WRITE_ONCE(idr->cur, val);
+}
+
 /**
  * DOC: idr sync
  * idr synchronization (stolen from radix-tree.h)
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 2d59c9be40e1..5f63f6dcaabb 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -762,16 +762,17 @@ static const struct net_proto_family rxrpc_family_ops = {
 static int __init af_rxrpc_init(void)
 {
 	int ret = -1;
+	unsigned int tmp;
 
 	BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch));
 	rxrpc_epoch |= RXRPC_RANDOM_EPOCH;
-	get_random_bytes(&rxrpc_client_conn_ids.cur,
-			 sizeof(rxrpc_client_conn_ids.cur));
-	rxrpc_client_conn_ids.cur &= 0x3fffffff;
-	if (rxrpc_client_conn_ids.cur == 0)
-		rxrpc_client_conn_ids.cur = 1;
+	get_random_bytes(&tmp, sizeof(tmp));
+	tmp &= 0x3fffffff;
+	if (tmp == 0)
+		tmp = 1;
+	idr_set_cursor(&rxrpc_client_conn_ids, tmp);
 
 	ret = -ENOMEM;
 	rxrpc_call_jar = kmem_cache_create(
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 60ef9605167e..6cbcdcc29853 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -263,12 +263,12 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
 	 * times the maximum number of client conns away from the current
 	 * allocation point to try and keep the IDs concentrated.
 	 */
-	id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur);
+	id_cursor = idr_get_cursor(&rxrpc_client_conn_ids);
 	id = conn->proto.cid >> RXRPC_CIDSHIFT;
 	distance = id - id_cursor;
 	if (distance < 0)
 		distance = -distance;
-	limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4;
+	limit = max(rxrpc_max_client_connections * 4, 1024U);
 	if (distance > limit)
 		goto mark_dont_reuse;
 
-- 
cgit v1.2.3


From 424251a4a929a1b6dff2056d49135e3805132e32 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Wed, 14 Dec 2016 15:09:22 -0800
Subject: idr: reduce the number of bits per level from 8 to 6

In preparation for merging the IDR and radix tree, reduce the fanout at
each level from 256 to 64.  If this causes a performance problem then a
bisect will point to this commit, and we'll have a better idea about
what we might do to fix it.

Link: http://lkml.kernel.org/r/1480369871-5271-66-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 1eb755f77f2f..3c01b89aed67 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -18,12 +18,11 @@
 #include <linux/rcupdate.h>
 
 /*
- * We want shallower trees and thus more bits covered at each layer.  8
- * bits gives us large enough first layer for most use cases and maximum
- * tree depth of 4.  Each idr_layer is slightly larger than 2k on 64bit and
- * 1k on 32bit.
+ * Using 6 bits at each layer allows us to allocate 7 layers out of each page.
+ * 8 bits only gave us 3 layers out of every pair of pages, which is less
+ * efficient except for trees with a largest element between 192-255 inclusive.
  */
-#define IDR_BITS 8
+#define IDR_BITS 6
 #define IDR_SIZE (1 << IDR_BITS)
 #define IDR_MASK ((1 << IDR_BITS)-1)
 
-- 
cgit v1.2.3


From 427d77a32365d5f942d335248305a5c237baf63a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 13 Dec 2016 19:32:28 +0100
Subject: x86/smpboot: Prevent false positive out of bounds cpumask access
 warning

prefill_possible_map() reinitializes the cpu_possible_map by setting the
possible cpu bits and clearing all other bits up to NR_CPUS.

This is technically always correct because cpu_possible_map is statically
allocated and sized NR_CPUS. With CPUMASK_OFFSTACK and DEBUG_PER_CPU_MAPS
enabled the bounds check of cpu masks happens on nr_cpu_ids. nr_cpu_ids is
initialized to NR_CPUS and only limited after the set/clear bit loops have
been executed.

But if the system was booted with "nr_cpus=N" on the command line, where N
is < NR_CPUS then nr_cpu_ids is limited in the parameter parsing function
before prefill_possible_map() is invoked. As a consequence the cpumask
bounds check triggers when clearing the bits past nr_cpu_ids.

Add a helper which allows to reset cpu_possible_map w/o the bounds check
and then set only the possible bits which are well inside bounds.

Reported-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: 0x7f454c46@gmail.com
Cc: Jan Beulich <JBeulich@novell.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1612131836050.3415@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smpboot.c | 8 ++++----
 include/linux/cpumask.h   | 5 +++++
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e09aa58a7603..46732dc3b73c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1463,15 +1463,15 @@ __init void prefill_possible_map(void)
 		possible = i;
 	}
 
+	nr_cpu_ids = possible;
+
 	pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
 		possible, max_t(int, possible - num_processors, 0));
 
+	reset_cpu_possible_mask();
+
 	for (i = 0; i < possible; i++)
 		set_cpu_possible(i, true);
-	for (; i < NR_CPUS; i++)
-		set_cpu_possible(i, false);
-
-	nr_cpu_ids = possible;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index da7fbf1cdd56..c717f5ea88cb 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -722,6 +722,11 @@ void init_cpu_present(const struct cpumask *src);
 void init_cpu_possible(const struct cpumask *src);
 void init_cpu_online(const struct cpumask *src);
 
+static inline void reset_cpu_possible_mask(void)
+{
+	bitmap_zero(cpumask_bits(&__cpu_possible_mask), NR_CPUS);
+}
+
 static inline void
 set_cpu_possible(unsigned int cpu, bool possible)
 {
-- 
cgit v1.2.3


From 0b892c717714334890ea179a2dc1941a223e446f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sat, 29 Oct 2016 09:56:00 -0200
Subject: edac: move EDAC PCI definitions to drivers/edac/edac_pci.h

The edac_core.h header contain data structures and function
definitions for the 3 parts of EDAC: MC, PCI and device.

Let's move the PCI ones to a separate header file, as part
of a header reorganization.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/edac/edac_core.h      | 145 +----------------------------------
 drivers/edac/edac_pci.c       |  19 ++---
 drivers/edac/edac_pci.h       | 173 ++++++++++++++++++++++++++++++++++++++++++
 drivers/edac/edac_pci_sysfs.c |   2 +-
 include/linux/edac.h          |   2 +
 5 files changed, 185 insertions(+), 156 deletions(-)
 create mode 100644 drivers/edac/edac_pci.h

(limited to 'include/linux')

diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 1723f3643e75..8c7cca2d6e40 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -35,8 +35,7 @@
 #include <linux/workqueue.h>
 #include <linux/edac.h>
 
-#define EDAC_DEVICE_NAME_LEN	31
-#define EDAC_ATTRIB_VALUE_LEN	15
+#include "edac_pci.h"
 
 #if PAGE_SHIFT < 20
 #define PAGES_TO_MiB(pages)	((pages) >> (20 - PAGE_SHIFT))
@@ -321,128 +320,6 @@ extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
 
 extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
 
-#ifdef CONFIG_PCI
-
-struct edac_pci_counter {
-	atomic_t pe_count;
-	atomic_t npe_count;
-};
-
-/*
- * Abstract edac_pci control info structure
- *
- */
-struct edac_pci_ctl_info {
-	/* for global list of edac_pci_ctl_info structs */
-	struct list_head link;
-
-	int pci_idx;
-
-	struct bus_type *edac_subsys;	/* pointer to subsystem */
-
-	/* the internal state of this controller instance */
-	int op_state;
-	/* work struct for this instance */
-	struct delayed_work work;
-
-	/* pointer to edac polling checking routine:
-	 *      If NOT NULL: points to polling check routine
-	 *      If NULL: Then assumes INTERRUPT operation, where
-	 *              MC driver will receive events
-	 */
-	void (*edac_check) (struct edac_pci_ctl_info * edac_dev);
-
-	struct device *dev;	/* pointer to device structure */
-
-	const char *mod_name;	/* module name */
-	const char *ctl_name;	/* edac controller  name */
-	const char *dev_name;	/* pci/platform/etc... name */
-
-	void *pvt_info;		/* pointer to 'private driver' info */
-
-	unsigned long start_time;	/* edac_pci load start time (jiffies) */
-
-	struct completion complete;
-
-	/* sysfs top name under 'edac' directory
-	 * and instance name:
-	 *      cpu/cpu0/...
-	 *      cpu/cpu1/...
-	 *      cpu/cpu2/...
-	 *      ...
-	 */
-	char name[EDAC_DEVICE_NAME_LEN + 1];
-
-	/* Event counters for the this whole EDAC Device */
-	struct edac_pci_counter counters;
-
-	/* edac sysfs device control for the 'name'
-	 * device this structure controls
-	 */
-	struct kobject kobj;
-};
-
-#define to_edac_pci_ctl_work(w) \
-		container_of(w, struct edac_pci_ctl_info,work)
-
-/* write all or some bits in a byte-register*/
-static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
-				   u8 mask)
-{
-	if (mask != 0xff) {
-		u8 buf;
-
-		pci_read_config_byte(pdev, offset, &buf);
-		value &= mask;
-		buf &= ~mask;
-		value |= buf;
-	}
-
-	pci_write_config_byte(pdev, offset, value);
-}
-
-/* write all or some bits in a word-register*/
-static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
-				    u16 value, u16 mask)
-{
-	if (mask != 0xffff) {
-		u16 buf;
-
-		pci_read_config_word(pdev, offset, &buf);
-		value &= mask;
-		buf &= ~mask;
-		value |= buf;
-	}
-
-	pci_write_config_word(pdev, offset, value);
-}
-
-/*
- * pci_write_bits32
- *
- * edac local routine to do pci_write_config_dword, but adds
- * a mask parameter. If mask is all ones, ignore the mask.
- * Otherwise utilize the mask to isolate specified bits
- *
- * write all or some bits in a dword-register
- */
-static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
-				    u32 value, u32 mask)
-{
-	if (mask != 0xffffffff) {
-		u32 buf;
-
-		pci_read_config_dword(pdev, offset, &buf);
-		value &= mask;
-		buf &= ~mask;
-		value |= buf;
-	}
-
-	pci_write_config_dword(pdev, offset, value);
-}
-
-#endif				/* CONFIG_PCI */
-
 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 				   unsigned n_layers,
 				   struct edac_mc_layer *layers,
@@ -485,26 +362,6 @@ extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
 extern int edac_device_alloc_index(void);
 extern const char *edac_layer_name[];
 
-/*
- * edac_pci APIs
- */
-extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
-				const char *edac_pci_name);
-
-extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);
-
-extern int edac_pci_alloc_index(void);
-extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
-extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
-
-extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
-				struct device *dev,
-				const char *mod_name);
-
-extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
-extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
-extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
-
 /*
  * edac misc APIs
  */
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 8f2f2899a7a2..02a0af4f53b3 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -9,22 +9,19 @@
  * or implied.
  *
  */
+#include <asm/page.h>
+#include <asm/uaccess.h>
+#include <linux/ctype.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
 #include <linux/module.h>
-#include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/smp.h>
-#include <linux/init.h>
+#include <linux/spinlock.h>
 #include <linux/sysctl.h>
-#include <linux/highmem.h>
 #include <linux/timer.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/ctype.h>
-#include <linux/workqueue.h>
-#include <asm/uaccess.h>
-#include <asm/page.h>
 
-#include "edac_core.h"
+#include "edac_pci.h"
 #include "edac_module.h"
 
 static DEFINE_MUTEX(edac_pci_ctls_mutex);
diff --git a/drivers/edac/edac_pci.h b/drivers/edac/edac_pci.h
new file mode 100644
index 000000000000..9da0c6fb0634
--- /dev/null
+++ b/drivers/edac/edac_pci.h
@@ -0,0 +1,173 @@
+/*
+ * Defines, structures, APIs for edac_pci and edac_pci_sysfs
+ *
+ * (C) 2007 Linux Networx (http://lnxi.com)
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ * Written by Thayne Harbaugh
+ * Based on work by Dan Hollis <goemon at anime dot net> and others.
+ *	http://www.anime.net/~goemon/linux-ecc/
+ *
+ * NMI handling support added by
+ *     Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
+ *
+ * Refactored for multi-source files:
+ *	Doug Thompson <norsk5@xmission.com>
+ *
+ * Please look at Documentation/driver-api/edac.rst for more info about
+ * EDAC core structs and functions.
+ */
+
+#ifndef _EDAC_PCI_H_
+#define _EDAC_PCI_H_
+
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/edac.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#ifdef CONFIG_PCI
+
+struct edac_pci_counter {
+	atomic_t pe_count;
+	atomic_t npe_count;
+};
+
+/*
+ * Abstract edac_pci control info structure
+ *
+ */
+struct edac_pci_ctl_info {
+	/* for global list of edac_pci_ctl_info structs */
+	struct list_head link;
+
+	int pci_idx;
+
+	struct bus_type *edac_subsys;	/* pointer to subsystem */
+
+	/* the internal state of this controller instance */
+	int op_state;
+	/* work struct for this instance */
+	struct delayed_work work;
+
+	/* pointer to edac polling checking routine:
+	 *      If NOT NULL: points to polling check routine
+	 *      If NULL: Then assumes INTERRUPT operation, where
+	 *              MC driver will receive events
+	 */
+	void (*edac_check) (struct edac_pci_ctl_info * edac_dev);
+
+	struct device *dev;	/* pointer to device structure */
+
+	const char *mod_name;	/* module name */
+	const char *ctl_name;	/* edac controller  name */
+	const char *dev_name;	/* pci/platform/etc... name */
+
+	void *pvt_info;		/* pointer to 'private driver' info */
+
+	unsigned long start_time;	/* edac_pci load start time (jiffies) */
+
+	struct completion complete;
+
+	/* sysfs top name under 'edac' directory
+	 * and instance name:
+	 *      cpu/cpu0/...
+	 *      cpu/cpu1/...
+	 *      cpu/cpu2/...
+	 *      ...
+	 */
+	char name[EDAC_DEVICE_NAME_LEN + 1];
+
+	/* Event counters for the this whole EDAC Device */
+	struct edac_pci_counter counters;
+
+	/* edac sysfs device control for the 'name'
+	 * device this structure controls
+	 */
+	struct kobject kobj;
+};
+
+#define to_edac_pci_ctl_work(w) \
+		container_of(w, struct edac_pci_ctl_info,work)
+
+/* write all or some bits in a byte-register*/
+static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
+				   u8 mask)
+{
+	if (mask != 0xff) {
+		u8 buf;
+
+		pci_read_config_byte(pdev, offset, &buf);
+		value &= mask;
+		buf &= ~mask;
+		value |= buf;
+	}
+
+	pci_write_config_byte(pdev, offset, value);
+}
+
+/* write all or some bits in a word-register*/
+static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
+				    u16 value, u16 mask)
+{
+	if (mask != 0xffff) {
+		u16 buf;
+
+		pci_read_config_word(pdev, offset, &buf);
+		value &= mask;
+		buf &= ~mask;
+		value |= buf;
+	}
+
+	pci_write_config_word(pdev, offset, value);
+}
+
+/*
+ * pci_write_bits32
+ *
+ * edac local routine to do pci_write_config_dword, but adds
+ * a mask parameter. If mask is all ones, ignore the mask.
+ * Otherwise utilize the mask to isolate specified bits
+ *
+ * write all or some bits in a dword-register
+ */
+static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
+				    u32 value, u32 mask)
+{
+	if (mask != 0xffffffff) {
+		u32 buf;
+
+		pci_read_config_dword(pdev, offset, &buf);
+		value &= mask;
+		buf &= ~mask;
+		value |= buf;
+	}
+
+	pci_write_config_dword(pdev, offset, value);
+}
+
+#endif				/* CONFIG_PCI */
+
+extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
+				const char *edac_pci_name);
+
+extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);
+
+extern int edac_pci_alloc_index(void);
+extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
+extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
+
+extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
+				struct device *dev,
+				const char *mod_name);
+
+extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
+extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
+extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
+
+#endif
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 6e3428ba400f..dbeb048d8efb 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -11,7 +11,7 @@
 #include <linux/slab.h>
 #include <linux/ctype.h>
 
-#include "edac_core.h"
+#include "edac_pci.h"
 #include "edac_module.h"
 
 #define EDAC_PCI_SYMLINK	"device"
diff --git a/include/linux/edac.h b/include/linux/edac.h
index cb56dcba68c6..0fec0f26469b 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -18,6 +18,8 @@
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
 
+#define EDAC_DEVICE_NAME_LEN	31
+
 struct device;
 
 #define EDAC_OPSTATE_INVAL	-1
-- 
cgit v1.2.3


From e002075819d987dec3bf9fa3ca98ad19fa86ae0f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Fri, 28 Oct 2016 15:04:52 -0200
Subject: edac: fix kenel-doc markups at edac.h

As this file was never added to the driver-api, the kernel-doc
markups there were never tested. Some of them have issues.
Fix them.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/linux/edac.h | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 0fec0f26469b..3c20d2d2686a 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -132,6 +132,8 @@ enum dev_type {
  *				it for example, by re-trying the operation).
  * @HW_EVENT_ERR_FATAL:		Fatal Error - Uncorrected error that could not
  *				be recovered.
+ * @HW_EVENT_ERR_INFO:		Informational - The CPER spec defines a forth
+ *				type of error: informational logs.
  */
 enum hw_event_mc_err_type {
 	HW_EVENT_ERR_CORRECTED,
@@ -162,7 +164,7 @@ static inline char *mc_event_error_type(const unsigned int err_type)
  * enum mem_type - memory types. For a more detailed reference, please see
  *			http://en.wikipedia.org/wiki/DRAM
  *
- * @MEM_EMPTY		Empty csrow
+ * @MEM_EMPTY:		Empty csrow
  * @MEM_RESERVED:	Reserved csrow type
  * @MEM_UNKNOWN:	Unknown csrow type
  * @MEM_FPM:		FPM - Fast Page Mode, used on systems up to 1995.
@@ -286,7 +288,7 @@ enum edac_type {
 
 /**
  * enum scrub_type - scrubbing capabilities
- * @SCRUB_UNKNOWN		Unknown if scrubber is available
+ * @SCRUB_UNKNOWN:		Unknown if scrubber is available
  * @SCRUB_NONE:			No scrubber
  * @SCRUB_SW_PROG:		SW progressive (sequential) scrubbing
  * @SCRUB_SW_SRC:		Software scrub only errors
@@ -295,7 +297,7 @@ enum edac_type {
  * @SCRUB_HW_PROG:		HW progressive (sequential) scrubbing
  * @SCRUB_HW_SRC:		Hardware scrub only errors
  * @SCRUB_HW_PROG_SRC:		Progressive hardware scrub from an error
- * SCRUB_HW_TUNABLE:		Hardware scrub frequency is tunable
+ * @SCRUB_HW_TUNABLE:		Hardware scrub frequency is tunable
  */
 enum scrub_type {
 	SCRUB_UNKNOWN =	0,
@@ -460,7 +462,7 @@ enum edac_mc_layer_type {
 
 /**
  * struct edac_mc_layer - describes the memory controller hierarchy
- * @layer:		layer type
+ * @type:		layer type
  * @size:		number of components per layer. For example,
  *			if the channel layer has two channels, size = 2
  * @is_virt_csrow:	This layer is part of the "csrow" when old API
@@ -483,24 +485,28 @@ struct edac_mc_layer {
 #define EDAC_MAX_LAYERS		3
 
 /**
- * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer array
- *		   for the element given by [layer0,layer1,layer2] position
+ * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer
+ *		   array for the element given by [layer0,layer1,layer2]
+ *		   position
  *
  * @layers:	a struct edac_mc_layer array, describing how many elements
  *		were allocated for each layer
- * @n_layers:	Number of layers at the @layers array
+ * @nlayers:	Number of layers at the @layers array
  * @layer0:	layer0 position
  * @layer1:	layer1 position. Unused if n_layers < 2
  * @layer2:	layer2 position. Unused if n_layers < 3
  *
- * For 1 layer, this macro returns &var[layer0] - &var
+ * For 1 layer, this macro returns "var[layer0] - var";
+ *
  * For 2 layers, this macro is similar to allocate a bi-dimensional array
- *		and to return "&var[layer0][layer1] - &var"
+ * and to return "var[layer0][layer1] - var";
+ *
  * For 3 layers, this macro is similar to allocate a tri-dimensional array
- *		and to return "&var[layer0][layer1][layer2] - &var"
+ * and to return "var[layer0][layer1][layer2] - var".
  *
  * A loop could be used here to make it more generic, but, as we only have
  * 3 layers, this is a little faster.
+ *
  * By design, layers can never be 0 or more than 3. If that ever happens,
  * a NULL is returned, causing an OOPS during the memory allocation routine,
  * with would point to the developer that he's doing something wrong.
@@ -527,16 +533,18 @@ struct edac_mc_layer {
  *		were allocated for each layer
  * @var:	name of the var where we want to get the pointer
  *		(like mci->dimms)
- * @n_layers:	Number of layers at the @layers array
+ * @nlayers:	Number of layers at the @layers array
  * @layer0:	layer0 position
  * @layer1:	layer1 position. Unused if n_layers < 2
  * @layer2:	layer2 position. Unused if n_layers < 3
  *
- * For 1 layer, this macro returns &var[layer0]
+ * For 1 layer, this macro returns "var[layer0]";
+ *
  * For 2 layers, this macro is similar to allocate a bi-dimensional array
- *		and to return "&var[layer0][layer1]"
+ * and to return "var[layer0][layer1]";
+ *
  * For 3 layers, this macro is similar to allocate a tri-dimensional array
- *		and to return "&var[layer0][layer1][layer2]"
+ * and to return "var[layer0][layer1][layer2]";
  */
 #define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({	\
 	typeof(*var) __p;						\
@@ -622,7 +630,7 @@ struct errcount_attribute_data {
 };
 
 /**
- * edac_raw_error_desc - Raw error report structure
+ * struct edac_raw_error_desc - Raw error report structure
  * @grain:			minimum granularity for an error report, in bytes
  * @error_count:		number of errors of the same type
  * @top_layer:			top layer of the error (layer[0])
-- 
cgit v1.2.3


From 6b1fb6f7037221981fb2cf1822c31b5fba1b9c22 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sat, 29 Oct 2016 16:13:23 -0200
Subject: edac.rst: move concepts dictionary from edac.h

Instead of storing the concepts dictionary inside header file,
move it to the subsystem documentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/driver-api/edac.rst | 106 +++++++++++++++++++++++++++++++++++++
 include/linux/edac.h              | 108 --------------------------------------
 2 files changed, 106 insertions(+), 108 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/edac.rst b/Documentation/driver-api/edac.rst
index 3771e768fda1..b8c742aa0a71 100644
--- a/Documentation/driver-api/edac.rst
+++ b/Documentation/driver-api/edac.rst
@@ -1,6 +1,112 @@
 Error Detection And Correction (EDAC) Devices
 =============================================
 
+Main Concepts used at the EDAC subsystem
+----------------------------------------
+
+There are several things to be aware of that aren't at all obvious, like
+*sockets, *socket sets*, *banks*, *rows*, *chip-select rows*, *channels*,
+etc...
+
+These are some of the many terms that are thrown about that don't always
+mean what people think they mean (Inconceivable!).  In the interest of
+creating a common ground for discussion, terms and their definitions
+will be established.
+
+* Memory devices
+
+The individual DRAM chips on a memory stick.  These devices commonly
+output 4 and 8 bits each (x4, x8). Grouping several of these in parallel
+provides the number of bits that the memory controller expects:
+typically 72 bits, in order to provide 64 bits + 8 bits of ECC data.
+
+* Memory Stick
+
+A printed circuit board that aggregates multiple memory devices in
+parallel.  In general, this is the Field Replaceable Unit (FRU) which
+gets replaced, in the case of excessive errors. Most often it is also
+called DIMM (Dual Inline Memory Module).
+
+* Memory Socket
+
+A physical connector on the motherboard that accepts a single memory
+stick. Also called as "slot" on several datasheets.
+
+* Channel
+
+A memory controller channel, responsible to communicate with a group of
+DIMMs. Each channel has its own independent control (command) and data
+bus, and can be used independently or grouped with other channels.
+
+* Branch
+
+It is typically the highest hierarchy on a Fully-Buffered DIMM memory
+controller. Typically, it contains two channels. Two channels at the
+same branch can be used in single mode or in lockstep mode. When
+lockstep is enabled, the cacheline is doubled, but it generally brings
+some performance penalty. Also, it is generally not possible to point to
+just one memory stick when an error occurs, as the error correction code
+is calculated using two DIMMs instead of one. Due to that, it is capable
+of correcting more errors than on single mode.
+
+* Single-channel
+
+The data accessed by the memory controller is contained into one dimm
+only. E. g. if the data is 64 bits-wide, the data flows to the CPU using
+one 64 bits parallel access. Typically used with SDR, DDR, DDR2 and DDR3
+memories. FB-DIMM and RAMBUS use a different concept for channel, so
+this concept doesn't apply there.
+
+* Double-channel
+
+The data size accessed by the memory controller is interlaced into two
+dimms, accessed at the same time. E. g. if the DIMM is 64 bits-wide (72
+bits with ECC), the data flows to the CPU using a 128 bits parallel
+access.
+
+* Chip-select row
+
+This is the name of the DRAM signal used to select the DRAM ranks to be
+accessed. Common chip-select rows for single channel are 64 bits, for
+dual channel 128 bits. It may not be visible by the memory controller,
+as some DIMM types have a memory buffer that can hide direct access to
+it from the Memory Controller.
+
+* Single-Ranked stick
+
+A Single-ranked stick has 1 chip-select row of memory. Motherboards
+commonly drive two chip-select pins to a memory stick. A single-ranked
+stick, will occupy only one of those rows. The other will be unused.
+
+.. _doubleranked:
+
+* Double-Ranked stick
+
+A double-ranked stick has two chip-select rows which access different
+sets of memory devices.  The two rows cannot be accessed concurrently.
+
+* Double-sided stick
+
+**DEPRECATED TERM**, see :ref:`Double-Ranked stick <doubleranked>`.
+
+A double-sided stick has two chip-select rows which access different sets
+of memory devices. The two rows cannot be accessed concurrently.
+"Double-sided" is irrespective of the memory devices being mounted on
+both sides of the memory stick.
+
+* Socket set
+
+All of the memory sticks that are required for a single memory access or
+all of the memory sticks spanned by a chip-select row.  A single socket
+set has two chip-select rows and if double-sided sticks are used these
+will occupy those chip-select rows.
+
+* Bank
+
+This term is avoided because it is unclear when needing to distinguish
+between chip-select rows and socket sets.
+
+
 Memory Controllers
 ------------------
 
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 3c20d2d2686a..c4433fd6c859 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -330,114 +330,6 @@ enum scrub_type {
 #define OP_RUNNING_POLL_INTR	0x203
 #define OP_OFFLINE		0x300
 
-/*
- * Concepts used at the EDAC subsystem
- *
- * There are several things to be aware of that aren't at all obvious:
- *
- * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
- *
- * These are some of the many terms that are thrown about that don't always
- * mean what people think they mean (Inconceivable!).  In the interest of
- * creating a common ground for discussion, terms and their definitions
- * will be established.
- *
- * Memory devices:	The individual DRAM chips on a memory stick.  These
- *			devices commonly output 4 and 8 bits each (x4, x8).
- *			Grouping several of these in parallel provides the
- *			number of bits that the memory controller expects:
- *			typically 72 bits, in order to provide 64 bits +
- *			8 bits of ECC data.
- *
- * Memory Stick:	A printed circuit board that aggregates multiple
- *			memory devices in parallel.  In general, this is the
- *			Field Replaceable Unit (FRU) which gets replaced, in
- *			the case of excessive errors. Most often it is also
- *			called DIMM (Dual Inline Memory Module).
- *
- * Memory Socket:	A physical connector on the motherboard that accepts
- *			a single memory stick. Also called as "slot" on several
- *			datasheets.
- *
- * Channel:		A memory controller channel, responsible to communicate
- *			with a group of DIMMs. Each channel has its own
- *			independent control (command) and data bus, and can
- *			be used independently or grouped with other channels.
- *
- * Branch:		It is typically the highest hierarchy on a
- *			Fully-Buffered DIMM memory controller.
- *			Typically, it contains two channels.
- *			Two channels at the same branch can be used in single
- *			mode or in lockstep mode.
- *			When lockstep is enabled, the cacheline is doubled,
- *			but it generally brings some performance penalty.
- *			Also, it is generally not possible to point to just one
- *			memory stick when an error occurs, as the error
- *			correction code is calculated using two DIMMs instead
- *			of one. Due to that, it is capable of correcting more
- *			errors than on single mode.
- *
- * Single-channel:	The data accessed by the memory controller is contained
- *			into one dimm only. E. g. if the data is 64 bits-wide,
- *			the data flows to the CPU using one 64 bits parallel
- *			access.
- *			Typically used with SDR, DDR, DDR2 and DDR3 memories.
- *			FB-DIMM and RAMBUS use a different concept for channel,
- *			so this concept doesn't apply there.
- *
- * Double-channel:	The data size accessed by the memory controller is
- *			interlaced into two dimms, accessed at the same time.
- *			E. g. if the DIMM is 64 bits-wide (72 bits with ECC),
- *			the data flows to the CPU using a 128 bits parallel
- *			access.
- *
- * Chip-select row:	This is the name of the DRAM signal used to select the
- *			DRAM ranks to be accessed. Common chip-select rows for
- *			single channel are 64 bits, for dual channel 128 bits.
- *			It may not be visible by the memory controller, as some
- *			DIMM types have a memory buffer that can hide direct
- *			access to it from the Memory Controller.
- *
- * Single-Ranked stick:	A Single-ranked stick has 1 chip-select row of memory.
- *			Motherboards commonly drive two chip-select pins to
- *			a memory stick. A single-ranked stick, will occupy
- *			only one of those rows. The other will be unused.
- *
- * Double-Ranked stick:	A double-ranked stick has two chip-select rows which
- *			access different sets of memory devices.  The two
- *			rows cannot be accessed concurrently.
- *
- * Double-sided stick:	DEPRECATED TERM, see Double-Ranked stick.
- *			A double-sided stick has two chip-select rows which
- *			access different sets of memory devices. The two
- *			rows cannot be accessed concurrently. "Double-sided"
- *			is irrespective of the memory devices being mounted
- *			on both sides of the memory stick.
- *
- * Socket set:		All of the memory sticks that are required for
- *			a single memory access or all of the memory sticks
- *			spanned by a chip-select row.  A single socket set
- *			has two chip-select rows and if double-sided sticks
- *			are used these will occupy those chip-select rows.
- *
- * Bank:		This term is avoided because it is unclear when
- *			needing to distinguish between chip-select rows and
- *			socket sets.
- *
- * Controller pages:
- *
- * Physical pages:
- *
- * Virtual pages:
- *
- *
- * STRUCTURE ORGANIZATION AND CHOICES
- *
- *
- *
- * PS - I enjoyed writing all that about as much as you enjoyed reading it.
- */
-
 /**
  * enum edac_mc_layer - memory controller hierarchy layer
  *
-- 
cgit v1.2.3


From 4838a0def07f5611347860b1fc0129c3fe77cc02 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <Yazen.Ghannam@amd.com>
Date: Thu, 1 Dec 2016 14:24:53 -0600
Subject: EDAC: Document HW_EVENT_ERR_DEFERRED type

Add a description of the HW_EVENT_ERR_DEFERRED type that wasn't included
with commit d12a969ebbfc ("EDAC, amd64: Add Deferred Error type").

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/linux/edac.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index c4433fd6c859..07c52c0af62d 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -130,6 +130,12 @@ enum dev_type {
  *				fatal (maybe it is on an unused memory area,
  *				or the memory controller could recover from
  *				it for example, by re-trying the operation).
+ * @HW_EVENT_ERR_DEFERRED:	Deferred Error - Indicates an uncorrectable
+ *				error whose handling is not urgent. This could
+ *				be due to hardware data poisoning where the
+ *				system can continue operation until the poisoned
+ *				data is consumed. Preemptive measures may also
+ *				be taken, e.g. offlining pages, etc.
  * @HW_EVENT_ERR_FATAL:		Fatal Error - Uncorrected error that could not
  *				be recovered.
  * @HW_EVENT_ERR_INFO:		Informational - The CPER spec defines a forth
-- 
cgit v1.2.3


From 9bf11ecce5a2758e5a097c2f3a13d08552d0d6f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Dec 2016 12:01:05 +0100
Subject: clocksource/dummy_timer: Move hotplug callback after the real timers

When the dummy timer callback is invoked before the real timer callbacks,
then it tries to install that timer for the starting CPU. If the platform
does not have a broadcast timer installed the installation fails with a
kernel crash. The crash happens due to a unconditional deference of the non
available broadcast device. This needs to be fixed in the timer core code.

But even when this is fixed in the core code then installing the dummy
timer before the real timers is a pointless exercise.

Move it to the end of the callback list.

Fixes: 00c1d17aab51 ("clocksource/dummy_timer: Convert to hotplug state machine")
Reported-and-tested-by: Mason <slash.tmp@free.fr>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Anna-Maria Gleixner <anna-maria@linutronix.de>
Cc: Richard Cochran <rcochran@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>,
Cc: Sebastian Frias <sf84@laposte.net>
Cc: Thibaud Cornic <thibaud_cornic@sigmadesigns.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Link: http://lkml.kernel.org/r/1147ef90-7877-e4d2-bb2b-5c4fa8d3144b@free.fr
---
 include/linux/cpuhotplug.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 22acee76cf4c..2ab7bf53d529 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -101,7 +101,6 @@ enum cpuhp_state {
 	CPUHP_AP_ARM_L2X0_STARTING,
 	CPUHP_AP_ARM_ARCH_TIMER_STARTING,
 	CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
-	CPUHP_AP_DUMMY_TIMER_STARTING,
 	CPUHP_AP_JCORE_TIMER_STARTING,
 	CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
 	CPUHP_AP_ARM_TWD_STARTING,
@@ -115,6 +114,8 @@ enum cpuhp_state {
 	CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_STARTING,
 	CPUHP_AP_KVM_ARM_TIMER_STARTING,
+	/* Must be the last timer callback */
+	CPUHP_AP_DUMMY_TIMER_STARTING,
 	CPUHP_AP_ARM_XEN_STARTING,
 	CPUHP_AP_ARM_CORESIGHT_STARTING,
 	CPUHP_AP_ARM_CORESIGHT4_STARTING,
-- 
cgit v1.2.3


From 9efeccacd3a486128d3add611dd4cefb5b60a58c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 11 Dec 2016 06:34:53 +0200
Subject: linux: drop __bitwise__ everywhere

__bitwise__ used to mean "yes, please enable sparse checks
unconditionally", but now that we dropped __CHECK_ENDIAN__
__bitwise is exactly the same.
There aren't many users, replace it by __bitwise everywhere.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Stefan Schmidt <stefan@osg.samsung.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Akced-by: Lee Duncan <lduncan@suse.com>
---
 arch/arm/plat-samsung/include/plat/gpio-cfg.h    | 2 +-
 drivers/md/dm-cache-block-types.h                | 6 +++---
 drivers/net/ethernet/sun/sunhme.h                | 2 +-
 drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h | 4 ++--
 include/linux/mmzone.h                           | 2 +-
 include/linux/serial_core.h                      | 4 ++--
 include/linux/types.h                            | 4 ++--
 include/scsi/iscsi_proto.h                       | 2 +-
 include/target/target_core_base.h                | 2 +-
 include/uapi/linux/virtio_types.h                | 6 +++---
 net/ieee802154/6lowpan/6lowpan_i.h               | 2 +-
 net/mac80211/ieee80211_i.h                       | 4 ++--
 12 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-samsung/include/plat/gpio-cfg.h b/arch/arm/plat-samsung/include/plat/gpio-cfg.h
index 21391faab068..e55d1f597db8 100644
--- a/arch/arm/plat-samsung/include/plat/gpio-cfg.h
+++ b/arch/arm/plat-samsung/include/plat/gpio-cfg.h
@@ -26,7 +26,7 @@
 
 #include <linux/types.h>
 
-typedef unsigned int __bitwise__ samsung_gpio_pull_t;
+typedef unsigned int __bitwise samsung_gpio_pull_t;
 
 /* forward declaration if gpio-core.h hasn't been included */
 struct samsung_gpio_chip;
diff --git a/drivers/md/dm-cache-block-types.h b/drivers/md/dm-cache-block-types.h
index bed4ad4e1b7c..389c9e8ac785 100644
--- a/drivers/md/dm-cache-block-types.h
+++ b/drivers/md/dm-cache-block-types.h
@@ -17,9 +17,9 @@
  * discard bitset.
  */
 
-typedef dm_block_t __bitwise__ dm_oblock_t;
-typedef uint32_t __bitwise__ dm_cblock_t;
-typedef dm_block_t __bitwise__ dm_dblock_t;
+typedef dm_block_t __bitwise dm_oblock_t;
+typedef uint32_t __bitwise dm_cblock_t;
+typedef dm_block_t __bitwise dm_dblock_t;
 
 static inline dm_oblock_t to_oblock(dm_block_t b)
 {
diff --git a/drivers/net/ethernet/sun/sunhme.h b/drivers/net/ethernet/sun/sunhme.h
index f4307654e4ae..4a8d5b18dfd5 100644
--- a/drivers/net/ethernet/sun/sunhme.h
+++ b/drivers/net/ethernet/sun/sunhme.h
@@ -302,7 +302,7 @@
  * Always write the address first before setting the ownership
  * bits to avoid races with the hardware scanning the ring.
  */
-typedef u32 __bitwise__ hme32;
+typedef u32 __bitwise hme32;
 
 struct happy_meal_rxd {
 	hme32 rx_flags;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h
index 1ad0ec180d5d..84813b550ef1 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h
@@ -228,7 +228,7 @@ enum iwl_ucode_tlv_flag {
 	IWL_UCODE_TLV_FLAGS_BCAST_FILTERING	= BIT(29),
 };
 
-typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t;
+typedef unsigned int __bitwise iwl_ucode_tlv_api_t;
 
 /**
  * enum iwl_ucode_tlv_api - ucode api
@@ -258,7 +258,7 @@ enum iwl_ucode_tlv_api {
 #endif
 };
 
-typedef unsigned int __bitwise__ iwl_ucode_tlv_capa_t;
+typedef unsigned int __bitwise iwl_ucode_tlv_capa_t;
 
 /**
  * enum iwl_ucode_tlv_capa - ucode capabilities
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0f088f3a2fed..36d9896fbc1e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -246,7 +246,7 @@ struct lruvec {
 #define ISOLATE_UNEVICTABLE	((__force isolate_mode_t)0x8)
 
 /* LRU Isolation modes. */
-typedef unsigned __bitwise__ isolate_mode_t;
+typedef unsigned __bitwise isolate_mode_t;
 
 enum zone_watermarks {
 	WMARK_MIN,
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 5d494888a612..5def8e830fb0 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -111,8 +111,8 @@ struct uart_icount {
 	__u32	buf_overrun;
 };
 
-typedef unsigned int __bitwise__ upf_t;
-typedef unsigned int __bitwise__ upstat_t;
+typedef unsigned int __bitwise upf_t;
+typedef unsigned int __bitwise upstat_t;
 
 struct uart_port {
 	spinlock_t		lock;			/* port lock */
diff --git a/include/linux/types.h b/include/linux/types.h
index baf718324f4a..d501ad3ba247 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -154,8 +154,8 @@ typedef u64 dma_addr_t;
 typedef u32 dma_addr_t;
 #endif
 
-typedef unsigned __bitwise__ gfp_t;
-typedef unsigned __bitwise__ fmode_t;
+typedef unsigned __bitwise gfp_t;
+typedef unsigned __bitwise fmode_t;
 
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 typedef u64 phys_addr_t;
diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h
index c1260d80ef30..df156f1d50b2 100644
--- a/include/scsi/iscsi_proto.h
+++ b/include/scsi/iscsi_proto.h
@@ -74,7 +74,7 @@ static inline int iscsi_sna_gte(u32 n1, u32 n2)
 #define zero_data(p) {p[0]=0;p[1]=0;p[2]=0;}
 
 /* initiator tags; opaque for target */
-typedef uint32_t __bitwise__ itt_t;
+typedef uint32_t __bitwise itt_t;
 /* below makes sense only for initiator that created this tag */
 #define build_itt(itt, age) ((__force itt_t)\
 	((itt) | ((age) << ISCSI_AGE_SHIFT)))
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index c2119008990a..00558287936d 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -149,7 +149,7 @@ enum se_cmd_flags_table {
  * Used by transport_send_check_condition_and_sense()
  * to signal which ASC/ASCQ sense payload should be built.
  */
-typedef unsigned __bitwise__ sense_reason_t;
+typedef unsigned __bitwise sense_reason_t;
 
 enum tcm_sense_reason_table {
 #define R(x)	(__force sense_reason_t )(x)
diff --git a/include/uapi/linux/virtio_types.h b/include/uapi/linux/virtio_types.h
index e845e8c4cbee..55c3b738722c 100644
--- a/include/uapi/linux/virtio_types.h
+++ b/include/uapi/linux/virtio_types.h
@@ -39,8 +39,8 @@
  * - __le{16,32,64} for standard-compliant virtio devices
  */
 
-typedef __u16 __bitwise__ __virtio16;
-typedef __u32 __bitwise__ __virtio32;
-typedef __u64 __bitwise__ __virtio64;
+typedef __u16 __bitwise __virtio16;
+typedef __u32 __bitwise __virtio32;
+typedef __u64 __bitwise __virtio64;
 
 #endif /* _UAPI_LINUX_VIRTIO_TYPES_H */
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index 5ac778962e4e..ac7c96b73ad5 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -7,7 +7,7 @@
 #include <net/inet_frag.h>
 #include <net/6lowpan.h>
 
-typedef unsigned __bitwise__ lowpan_rx_result;
+typedef unsigned __bitwise lowpan_rx_result;
 #define RX_CONTINUE		((__force lowpan_rx_result) 0u)
 #define RX_DROP_UNUSABLE	((__force lowpan_rx_result) 1u)
 #define RX_DROP			((__force lowpan_rx_result) 2u)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d37a577f63a1..b2069fbd60f9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -159,7 +159,7 @@ enum ieee80211_bss_valid_data_flags {
 	IEEE80211_BSS_VALID_ERP			= BIT(3)
 };
 
-typedef unsigned __bitwise__ ieee80211_tx_result;
+typedef unsigned __bitwise ieee80211_tx_result;
 #define TX_CONTINUE	((__force ieee80211_tx_result) 0u)
 #define TX_DROP		((__force ieee80211_tx_result) 1u)
 #define TX_QUEUED	((__force ieee80211_tx_result) 2u)
@@ -180,7 +180,7 @@ struct ieee80211_tx_data {
 };
 
 
-typedef unsigned __bitwise__ ieee80211_rx_result;
+typedef unsigned __bitwise ieee80211_rx_result;
 #define RX_CONTINUE		((__force ieee80211_rx_result) 0u)
 #define RX_DROP_UNUSABLE	((__force ieee80211_rx_result) 1u)
 #define RX_DROP_MONITOR		((__force ieee80211_rx_result) 2u)
-- 
cgit v1.2.3


From 47057abde515155a4fee53038e7772d6b387e0aa Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 12 Jan 2016 20:24:14 +0100
Subject: nfsd: add support for the umask attribute

Clients can set the umask attribute when creating files to cause the
server to apply it always except when inheriting permissions from the
parent directory.  That way, the new files will end up with the same
permissions as files created locally.

See https://tools.ietf.org/html/draft-ietf-nfsv4-umask-02 for more
details.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c   |  3 +++
 fs/nfsd/nfs4xdr.c    | 26 +++++++++++++++++++++-----
 fs/nfsd/nfsd.h       |  9 +++++++--
 fs/nfsd/nfssvc.c     |  4 ++--
 include/linux/nfs4.h |  1 +
 5 files changed, 34 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e901cf124e9f..74a6e573e061 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -102,6 +102,9 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		return nfserr_attrnotsupp;
 	if (writable && !bmval_is_subset(bmval, writable))
 		return nfserr_inval;
+	if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
+			(bmval[1] & FATTR4_WORD1_MODE))
+		return nfserr_inval;
 	return nfs_ok;
 }
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 281739e1d477..79edde4577b2 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -33,6 +33,7 @@
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/fs_struct.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/namei.h>
@@ -299,7 +300,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
 static __be32
 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
 		   struct iattr *iattr, struct nfs4_acl **acl,
-		   struct xdr_netobj *label)
+		   struct xdr_netobj *label, int *umask)
 {
 	int expected_len, len = 0;
 	u32 dummy32;
@@ -457,6 +458,17 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
 			return nfserr_jukebox;
 	}
 #endif
+	if (bmval[2] & FATTR4_WORD2_MODE_UMASK) {
+		if (!umask)
+			goto xdr_error;
+		READ_BUF(8);
+		len += 8;
+		dummy32 = be32_to_cpup(p++);
+		iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO);
+		dummy32 = be32_to_cpup(p++);
+		*umask = dummy32 & S_IRWXUGO;
+		iattr->ia_valid |= ATTR_MODE;
+	}
 	if (len != expected_len)
 		goto xdr_error;
 
@@ -651,7 +663,8 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
 		return status;
 
 	status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
-				    &create->cr_acl, &create->cr_label);
+				    &create->cr_acl, &create->cr_label,
+				    &current->fs->umask);
 	if (status)
 		goto out;
 
@@ -896,13 +909,15 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 	case NFS4_OPEN_NOCREATE:
 		break;
 	case NFS4_OPEN_CREATE:
+		current->fs->umask = 0;
 		READ_BUF(4);
 		open->op_createmode = be32_to_cpup(p++);
 		switch (open->op_createmode) {
 		case NFS4_CREATE_UNCHECKED:
 		case NFS4_CREATE_GUARDED:
 			status = nfsd4_decode_fattr(argp, open->op_bmval,
-				&open->op_iattr, &open->op_acl, &open->op_label);
+				&open->op_iattr, &open->op_acl, &open->op_label,
+				&current->fs->umask);
 			if (status)
 				goto out;
 			break;
@@ -916,7 +931,8 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 			READ_BUF(NFS4_VERIFIER_SIZE);
 			COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
 			status = nfsd4_decode_fattr(argp, open->op_bmval,
-				&open->op_iattr, &open->op_acl, &open->op_label);
+				&open->op_iattr, &open->op_acl, &open->op_label,
+				&current->fs->umask);
 			if (status)
 				goto out;
 			break;
@@ -1153,7 +1169,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
 	if (status)
 		return status;
 	return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
-				  &setattr->sa_acl, &setattr->sa_label);
+				  &setattr->sa_acl, &setattr->sa_label, NULL);
 }
 
 static __be32
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 7155239b2908..d74c8c44dc35 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -359,6 +359,7 @@ void		nfsd_lockd_shutdown(void);
 
 #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
 	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+	FATTR4_WORD2_MODE_UMASK | \
 	NFSD4_2_SECURITY_ATTRS)
 
 extern u32 nfsd_suppattrs[3][3];
@@ -390,10 +391,14 @@ static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
 	(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
 	| FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
-#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL
+#define MAYBE_FATTR4_WORD2_SECURITY_LABEL \
+	FATTR4_WORD2_SECURITY_LABEL
 #else
-#define NFSD_WRITEABLE_ATTRS_WORD2 0
+#define MAYBE_FATTR4_WORD2_SECURITY_LABEL 0
 #endif
+#define NFSD_WRITEABLE_ATTRS_WORD2 \
+	(FATTR4_WORD2_MODE_UMASK \
+	| MAYBE_FATTR4_WORD2_SECURITY_LABEL)
 
 #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
 	NFSD_WRITEABLE_ATTRS_WORD0
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index a2b65fc56dd6..e6bfd96734c0 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -661,8 +661,8 @@ nfsd(void *vrqstp)
 	mutex_lock(&nfsd_mutex);
 
 	/* At this point, the thread shares current->fs
-	 * with the init process. We need to create files with a
-	 * umask of 0 instead of init's umask. */
+	 * with the init process. We need to create files with the
+	 * umask as defined by the client instead of init's umask. */
 	if (unshare_fs_struct() < 0) {
 		printk("Unable to start nfsd thread: out of memory\n");
 		goto out;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 9094faf0699d..bca536341d1a 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -440,6 +440,7 @@ enum lock_type4 {
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_CLONE_BLKSIZE	(1UL << 13)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
+#define FATTR4_WORD2_MODE_UMASK		(1UL << 17)
 
 /* MDS threshold bitmap bits */
 #define THRESHOLD_RD                    (1UL << 0)
-- 
cgit v1.2.3


From 031a072a0b8ac2646def77aa310a95016c884bb0 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 23 Sep 2016 11:38:11 +0300
Subject: vfs: call vfs_clone_file_range() under freeze protection

Move sb_start_write()/sb_end_write() out of the vfs helper and up into the
ioctl handler.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/ioctl.c         |  2 +-
 fs/nfsd/vfs.c      |  3 +--
 fs/read_write.c    |  3 ---
 include/linux/fs.h | 13 +++++++++++++
 4 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6715b7208835..cb9b02940805 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -226,7 +226,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 	ret = -EXDEV;
 	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
 		goto fdput;
-	ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+	ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen);
 fdput:
 	fdput(src_file);
 	return ret;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8ca642fe9b21..357e844aee84 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -509,8 +509,7 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
 		u64 dst_pos, u64 count)
 {
-	return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
-			count));
+	return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count));
 }
 
 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
diff --git a/fs/read_write.c b/fs/read_write.c
index 175d30e3b603..c4e206b875d0 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1687,8 +1687,6 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 	if (pos_in + len > i_size_read(inode_in))
 		return -EINVAL;
 
-	sb_start_write(inode_out->i_sb);
-
 	ret = file_in->f_op->clone_file_range(file_in, pos_in,
 			file_out, pos_out, len);
 	if (!ret) {
@@ -1696,7 +1694,6 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 		fsnotify_modify(file_out);
 	}
 
-	sb_end_write(inode_out->i_sb);
 	return ret;
 }
 EXPORT_SYMBOL(vfs_clone_file_range);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dc0478c07b2a..52663f1f3084 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1783,6 +1783,19 @@ extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 
+static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
+				      struct file *file_out, loff_t pos_out,
+				      u64 len)
+{
+	int ret;
+
+	sb_start_write(file_inode(file_out)->i_sb);
+	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+	sb_end_write(file_inode(file_out)->i_sb);
+
+	return ret;
+}
+
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
-- 
cgit v1.2.3


From cb02de96ec724b84373488dd349e53897ab432f5 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 16 Dec 2016 12:40:55 +0000
Subject: x86/mpx: Move bd_addr to mm_context_t

Currently bd_addr lives in mm_struct, which is otherwise architecture
independent. Architecture-specific data is supposed to live within
mm_context_t (itself contained in mm_struct).

Other x86-specific context like the pkey accounting data lives in
mm_context_t, and there's no readon the MPX data can't also live there.
So as to keep the arch-specific data togather, and to set a good example
for others, this patch moves bd_addr into x86's mm_context_t.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1481892055-24596-1-git-send-email-mark.rutland@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/mmu.h |  4 ++++
 arch/x86/include/asm/mpx.h |  4 ++--
 arch/x86/mm/mpx.c          | 10 +++++-----
 include/linux/mm_types.h   |  4 ----
 4 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 72198c64e646..f9813b6d8b80 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -31,6 +31,10 @@ typedef struct {
 	u16 pkey_allocation_map;
 	s16 execute_only_pkey;
 #endif
+#ifdef CONFIG_X86_INTEL_MPX
+	/* address of the bounds directory */
+	void __user *bd_addr;
+#endif
 } mm_context_t;
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index 7a35495275a9..0b416d4cf73b 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -59,7 +59,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs);
 int mpx_handle_bd_fault(void);
 static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
 {
-	return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
+	return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
 }
 static inline void mpx_mm_init(struct mm_struct *mm)
 {
@@ -67,7 +67,7 @@ static inline void mpx_mm_init(struct mm_struct *mm)
 	 * NULL is theoretically a valid place to put the bounds
 	 * directory, so point this at an invalid address.
 	 */
-	mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
+	mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
 }
 void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 		      unsigned long start, unsigned long end);
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index e4f800999b32..324e5713d386 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -350,12 +350,12 @@ int mpx_enable_management(void)
 	 * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is
 	 * expected to be relatively expensive. Storing the bounds
 	 * directory here means that we do not have to do xsave in the
-	 * unmap path; we can just use mm->bd_addr instead.
+	 * unmap path; we can just use mm->context.bd_addr instead.
 	 */
 	bd_base = mpx_get_bounds_dir();
 	down_write(&mm->mmap_sem);
-	mm->bd_addr = bd_base;
-	if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR)
+	mm->context.bd_addr = bd_base;
+	if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR)
 		ret = -ENXIO;
 
 	up_write(&mm->mmap_sem);
@@ -370,7 +370,7 @@ int mpx_disable_management(void)
 		return -ENXIO;
 
 	down_write(&mm->mmap_sem);
-	mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
+	mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
 	up_write(&mm->mmap_sem);
 	return 0;
 }
@@ -947,7 +947,7 @@ static int try_unmap_single_bt(struct mm_struct *mm,
 		end = bta_end_vaddr;
 	}
 
-	bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start);
+	bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start);
 	ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
 	/*
 	 * No bounds table there, so nothing to unmap.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4a8acedf4b7d..ce70cebef603 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -508,10 +508,6 @@ struct mm_struct {
 	bool tlb_flush_pending;
 #endif
 	struct uprobes_state uprobes_state;
-#ifdef CONFIG_X86_INTEL_MPX
-	/* address of the bounds directory */
-	void __user *bd_addr;
-#endif
 #ifdef CONFIG_HUGETLB_PAGE
 	atomic_long_t hugetlb_usage;
 #endif
-- 
cgit v1.2.3


From 83a77e9ec4150ee4acc635638f7dedd9da523a26 Mon Sep 17 00:00:00 2001
From: Bartosz Folta <bfolta@cadence.com>
Date: Wed, 14 Dec 2016 06:39:15 +0000
Subject: net: macb: Added PCI wrapper for Platform Driver.

There are hardware PCI implementations of Cadence GEM network
controller. This patch will allow to use such hardware with reuse of
existing Platform Driver.

Signed-off-by: Bartosz Folta <bfolta@cadence.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/Kconfig    |   9 ++
 drivers/net/ethernet/cadence/Makefile   |   1 +
 drivers/net/ethernet/cadence/macb.c     |  31 +++++--
 drivers/net/ethernet/cadence/macb_pci.c | 153 ++++++++++++++++++++++++++++++++
 include/linux/platform_data/macb.h      |   6 ++
 5 files changed, 195 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/ethernet/cadence/macb_pci.c

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index f0bcb15d3fec..608bea171956 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -31,4 +31,13 @@ config MACB
 	  To compile this driver as a module, choose M here: the module
 	  will be called macb.
 
+config MACB_PCI
+	tristate "Cadence PCI MACB/GEM support"
+	depends on MACB && PCI && COMMON_CLK
+	---help---
+	  This is PCI wrapper for MACB driver.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called macb_pci.
+
 endif # NET_CADENCE
diff --git a/drivers/net/ethernet/cadence/Makefile b/drivers/net/ethernet/cadence/Makefile
index 91f79b1f0505..4ba75594d5c5 100644
--- a/drivers/net/ethernet/cadence/Makefile
+++ b/drivers/net/ethernet/cadence/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_MACB) += macb.o
+obj-$(CONFIG_MACB_PCI) += macb_pci.o
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 538544a7c642..c0fb80acc2da 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -404,6 +404,8 @@ static int macb_mii_probe(struct net_device *dev)
 			phy_irq = gpio_to_irq(pdata->phy_irq_pin);
 			phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq;
 		}
+	} else {
+		phydev->irq = PHY_POLL;
 	}
 
 	/* attach the mac to the phy */
@@ -482,6 +484,9 @@ static int macb_mii_init(struct macb *bp)
 				goto err_out_unregister_bus;
 		}
 	} else {
+		for (i = 0; i < PHY_MAX_ADDR; i++)
+			bp->mii_bus->irq[i] = PHY_POLL;
+
 		if (pdata)
 			bp->mii_bus->phy_mask = pdata->phy_mask;
 
@@ -2523,16 +2528,24 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
 			 struct clk **hclk, struct clk **tx_clk,
 			 struct clk **rx_clk)
 {
+	struct macb_platform_data *pdata;
 	int err;
 
-	*pclk = devm_clk_get(&pdev->dev, "pclk");
+	pdata = dev_get_platdata(&pdev->dev);
+	if (pdata) {
+		*pclk = pdata->pclk;
+		*hclk = pdata->hclk;
+	} else {
+		*pclk = devm_clk_get(&pdev->dev, "pclk");
+		*hclk = devm_clk_get(&pdev->dev, "hclk");
+	}
+
 	if (IS_ERR(*pclk)) {
 		err = PTR_ERR(*pclk);
 		dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err);
 		return err;
 	}
 
-	*hclk = devm_clk_get(&pdev->dev, "hclk");
 	if (IS_ERR(*hclk)) {
 		err = PTR_ERR(*hclk);
 		dev_err(&pdev->dev, "failed to get hclk (%u)\n", err);
@@ -3107,15 +3120,23 @@ static const struct of_device_id macb_dt_ids[] = {
 MODULE_DEVICE_TABLE(of, macb_dt_ids);
 #endif /* CONFIG_OF */
 
+static const struct macb_config default_gem_config = {
+	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO,
+	.dma_burst_length = 16,
+	.clk_init = macb_clk_init,
+	.init = macb_init,
+	.jumbo_max_len = 10240,
+};
+
 static int macb_probe(struct platform_device *pdev)
 {
+	const struct macb_config *macb_config = &default_gem_config;
 	int (*clk_init)(struct platform_device *, struct clk **,
 			struct clk **, struct clk **,  struct clk **)
-					      = macb_clk_init;
-	int (*init)(struct platform_device *) = macb_init;
+					      = macb_config->clk_init;
+	int (*init)(struct platform_device *) = macb_config->init;
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *phy_node;
-	const struct macb_config *macb_config = NULL;
 	struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL;
 	unsigned int queue_mask, num_queues;
 	struct macb_platform_data *pdata;
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
new file mode 100644
index 000000000000..92be2cd8f817
--- /dev/null
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -0,0 +1,153 @@
+/**
+ * macb_pci.c - Cadence GEM PCI wrapper.
+ *
+ * Copyright (C) 2016 Cadence Design Systems - http://www.cadence.com
+ *
+ * Authors: Rafal Ozieblo <rafalo@cadence.com>
+ *	    Bartosz Folta <bfolta@cadence.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_data/macb.h>
+#include <linux/platform_device.h>
+#include "macb.h"
+
+#define PCI_DRIVER_NAME "macb_pci"
+#define PLAT_DRIVER_NAME "macb"
+
+#define CDNS_VENDOR_ID 0x17cd
+#define CDNS_DEVICE_ID 0xe007
+
+#define GEM_PCLK_RATE 50000000
+#define GEM_HCLK_RATE 50000000
+
+static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int err;
+	struct platform_device *plat_dev;
+	struct platform_device_info plat_info;
+	struct macb_platform_data plat_data;
+	struct resource res[2];
+
+	/* sanity check */
+	if (!id)
+		return -EINVAL;
+
+	/* enable pci device */
+	err = pci_enable_device(pdev);
+	if (err < 0) {
+		dev_err(&pdev->dev, "Enabling PCI device has failed: 0x%04X",
+			err);
+		return -EACCES;
+	}
+
+	pci_set_master(pdev);
+
+	/* set up resources */
+	memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res));
+	res[0].start = pdev->resource[0].start;
+	res[0].end = pdev->resource[0].end;
+	res[0].name = PCI_DRIVER_NAME;
+	res[0].flags = IORESOURCE_MEM;
+	res[1].start = pdev->irq;
+	res[1].name = PCI_DRIVER_NAME;
+	res[1].flags = IORESOURCE_IRQ;
+
+	dev_info(&pdev->dev, "EMAC physical base addr = 0x%p\n",
+		 (void *)(uintptr_t)pci_resource_start(pdev, 0));
+
+	/* set up macb platform data */
+	memset(&plat_data, 0, sizeof(plat_data));
+
+	/* initialize clocks */
+	plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0,
+						 GEM_PCLK_RATE);
+	if (IS_ERR(plat_data.pclk)) {
+		err = PTR_ERR(plat_data.pclk);
+		goto err_pclk_register;
+	}
+
+	plat_data.hclk = clk_register_fixed_rate(&pdev->dev, "hclk", NULL, 0,
+						 GEM_HCLK_RATE);
+	if (IS_ERR(plat_data.hclk)) {
+		err = PTR_ERR(plat_data.hclk);
+		goto err_hclk_register;
+	}
+
+	/* set up platform device info */
+	memset(&plat_info, 0, sizeof(plat_info));
+	plat_info.parent = &pdev->dev;
+	plat_info.fwnode = pdev->dev.fwnode;
+	plat_info.name = PLAT_DRIVER_NAME;
+	plat_info.id = pdev->devfn;
+	plat_info.res = res;
+	plat_info.num_res = ARRAY_SIZE(res);
+	plat_info.data = &plat_data;
+	plat_info.size_data = sizeof(plat_data);
+	plat_info.dma_mask = DMA_BIT_MASK(32);
+
+	/* register platform device */
+	plat_dev = platform_device_register_full(&plat_info);
+	if (IS_ERR(plat_dev)) {
+		err = PTR_ERR(plat_dev);
+		goto err_plat_dev_register;
+	}
+
+	pci_set_drvdata(pdev, plat_dev);
+
+	return 0;
+
+err_plat_dev_register:
+	clk_unregister(plat_data.hclk);
+
+err_hclk_register:
+	clk_unregister(plat_data.pclk);
+
+err_pclk_register:
+	pci_disable_device(pdev);
+	return err;
+}
+
+static void macb_remove(struct pci_dev *pdev)
+{
+	struct platform_device *plat_dev = pci_get_drvdata(pdev);
+	struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev);
+
+	platform_device_unregister(plat_dev);
+	pci_disable_device(pdev);
+	clk_unregister(plat_data->pclk);
+	clk_unregister(plat_data->hclk);
+}
+
+static struct pci_device_id dev_id_table[] = {
+	{ PCI_DEVICE(CDNS_VENDOR_ID, CDNS_DEVICE_ID), },
+	{ 0, }
+};
+
+static struct pci_driver macb_pci_driver = {
+	.name     = PCI_DRIVER_NAME,
+	.id_table = dev_id_table,
+	.probe    = macb_probe,
+	.remove	  = macb_remove,
+};
+
+module_pci_driver(macb_pci_driver);
+MODULE_DEVICE_TABLE(pci, dev_id_table);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Cadence NIC PCI wrapper");
diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h
index 21b15f6fee25..7815d50c26ff 100644
--- a/include/linux/platform_data/macb.h
+++ b/include/linux/platform_data/macb.h
@@ -8,6 +8,8 @@
 #ifndef __MACB_PDATA_H__
 #define __MACB_PDATA_H__
 
+#include <linux/clk.h>
+
 /**
  * struct macb_platform_data - platform data for MACB Ethernet
  * @phy_mask:		phy mask passed when register the MDIO bus
@@ -15,12 +17,16 @@
  * @phy_irq_pin:	PHY IRQ
  * @is_rmii:		using RMII interface?
  * @rev_eth_addr:	reverse Ethernet address byte order
+ * @pclk:		platform clock
+ * @hclk:		AHB clock
  */
 struct macb_platform_data {
 	u32		phy_mask;
 	int		phy_irq_pin;
 	u8		is_rmii;
 	u8		rev_eth_addr;
+	struct clk	*pclk;
+	struct clk	*hclk;
 };
 
 #endif /* __MACB_PDATA_H__ */
-- 
cgit v1.2.3


From dcdc43d6642c828fa10d25130a92b712003d2ca4 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Thu, 15 Dec 2016 10:53:21 +0100
Subject: bpf: cgroup: annotate pointers in struct cgroup_bpf with __rcu

The member 'effective' in 'struct cgroup_bpf' is protected by RCU.
Annotate it accordingly to squelch a sparse warning.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 7b6e5d168c95..92bc89ae7e20 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -20,7 +20,7 @@ struct cgroup_bpf {
 	 * when this cgroup is accessed.
 	 */
 	struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
-	struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
+	struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE];
 };
 
 void cgroup_bpf_put(struct cgroup *cgrp);
-- 
cgit v1.2.3


From 24c946cc5d35e32c5bb0c07ebdad32756e2bd20d Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Thu, 15 Dec 2016 11:42:48 +0100
Subject: irnet: ppp: move IRNET_MINOR to include/linux/miscdevice.h

This patch move the define for IRNET_MINOR to include/linux/miscdevice.h
It is better that all minor number definitions are in the same place.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/miscdevice.h | 1 +
 net/irda/irnet/irnet_ppp.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 722698a43d79..86d5704c9e23 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -32,6 +32,7 @@
 #define STORE_QUEUE_MINOR	155	/* unused */
 #define I2O_MINOR		166
 #define MICROCODE_MINOR		184
+#define IRNET_MINOR		187
 #define VFIO_MINOR		196
 #define TUN_MINOR		200
 #define CUSE_MINOR		203
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
index 693ebc08fa2e..18fceadbb4bf 100644
--- a/net/irda/irnet/irnet_ppp.h
+++ b/net/irda/irnet/irnet_ppp.h
@@ -21,7 +21,6 @@
 
 /* /dev/irnet file constants */
 #define IRNET_MAJOR	10	/* Misc range */
-#define IRNET_MINOR	187	/* Official allocation */
 
 /* IrNET control channel stuff */
 #define IRNET_MAX_COMMAND	256	/* Max length of a command line */
-- 
cgit v1.2.3


From f23bc46c30ca5ef58b8549434899fcbac41b2cfc Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 15 Dec 2016 12:12:54 -0800
Subject: net: xdp: add invalid buffer warning

This adds a warning for drivers to use when encountering an invalid
buffer for XDP. For normal cases this should not happen but to catch
this in virtual/qemu setups that I may not have expected from the
emulation layer having a standard warning is useful.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 1 +
 net/core/filter.c      | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6a1658308612..af8a1804cac6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -602,6 +602,7 @@ bool bpf_helper_changes_pkt_data(void *func);
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 void bpf_warn_invalid_xdp_action(u32 act);
+void bpf_warn_invalid_xdp_buffer(void);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/net/core/filter.c b/net/core/filter.c
index b1461708a977..7190bd648154 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2972,6 +2972,12 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+void bpf_warn_invalid_xdp_buffer(void)
+{
+	WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n");
+}
+EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer);
+
 static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 					int src_reg, int ctx_off,
 					struct bpf_insn *insn_buf,
-- 
cgit v1.2.3


From e8465447d2f3366069115f7453153561ac9a1220 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@codeaurora.org>
Date: Fri, 16 Dec 2016 10:11:56 +0530
Subject: block: Remove unused member (busy) from struct blk_queue_tag

Signed-off-by: Ritesh Harjani <riteshh@codeaurora.org>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 286b2a264383..83695641bd5e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -288,7 +288,6 @@ enum blk_queue_state {
 struct blk_queue_tag {
 	struct request **tag_index;	/* map of busy tags */
 	unsigned long *tag_map;		/* bit map of free/busy tags */
-	int busy;			/* current depth */
 	int max_depth;			/* what we will send to device */
 	int real_max_depth;		/* what the array can hold */
 	atomic_t refcnt;		/* map can be shared */
-- 
cgit v1.2.3


From aafe6ae9cee32df85eb5e8bb6dd1d918e6807b09 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 18 Dec 2016 01:52:57 +0100
Subject: bpf: dynamically allocate digest scratch buffer

Geert rightfully complained that 7bd509e311f4 ("bpf: add prog_digest
and expose it via fdinfo/netlink") added a too large allocation of
variable 'raw' from bss section, and should instead be done dynamically:

  # ./scripts/bloat-o-meter kernel/bpf/core.o.1 kernel/bpf/core.o.2
  add/remove: 3/0 grow/shrink: 0/0 up/down: 33291/0 (33291)
  function                                     old     new   delta
  raw                                            -   32832  +32832
  [...]

Since this is only relevant during program creation path, which can be
considered slow-path anyway, lets allocate that dynamically and be not
implicitly dependent on verifier mutex. Move bpf_prog_calc_digest() at
the beginning of replace_map_fd_with_map_ptr() and also error handling
stays straight forward.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h    |  2 +-
 include/linux/filter.h | 14 +++++++++++---
 kernel/bpf/core.c      | 27 ++++++++++++++++-----------
 kernel/bpf/syscall.c   |  2 +-
 kernel/bpf/verifier.c  |  6 ++++--
 5 files changed, 33 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8796ff03f472..201eb483c46f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -216,7 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
-void bpf_prog_calc_digest(struct bpf_prog *fp);
+int bpf_prog_calc_digest(struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index af8a1804cac6..702314253797 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -57,9 +57,6 @@ struct bpf_prog_aux;
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
-/* Maximum BPF program size in bytes. */
-#define MAX_BPF_SIZE	(BPF_MAXINSNS * sizeof(struct bpf_insn))
-
 /* Helper macros for filter block array initializers. */
 
 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
@@ -517,6 +514,17 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
 	return BPF_PROG_RUN(prog, xdp);
 }
 
+static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
+{
+	return prog->len * sizeof(struct bpf_insn);
+}
+
+static inline u32 bpf_prog_digest_scratch_size(const struct bpf_prog *prog)
+{
+	return round_up(bpf_prog_insn_size(prog) +
+			sizeof(__be64) + 1, SHA_MESSAGE_BYTES);
+}
+
 static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
 	return max(sizeof(struct bpf_prog),
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 83e0d153b0b4..75c08b8068d6 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -136,28 +136,29 @@ void __bpf_prog_free(struct bpf_prog *fp)
 	vfree(fp);
 }
 
-#define SHA_BPF_RAW_SIZE						\
-	round_up(MAX_BPF_SIZE + sizeof(__be64) + 1, SHA_MESSAGE_BYTES)
-
-/* Called under verifier mutex. */
-void bpf_prog_calc_digest(struct bpf_prog *fp)
+int bpf_prog_calc_digest(struct bpf_prog *fp)
 {
 	const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
-	static u32 ws[SHA_WORKSPACE_WORDS];
-	static u8 raw[SHA_BPF_RAW_SIZE];
-	struct bpf_insn *dst = (void *)raw;
+	u32 raw_size = bpf_prog_digest_scratch_size(fp);
+	u32 ws[SHA_WORKSPACE_WORDS];
 	u32 i, bsize, psize, blocks;
+	struct bpf_insn *dst;
 	bool was_ld_map;
-	u8 *todo = raw;
+	u8 *raw, *todo;
 	__be32 *result;
 	__be64 *bits;
 
+	raw = vmalloc(raw_size);
+	if (!raw)
+		return -ENOMEM;
+
 	sha_init(fp->digest);
 	memset(ws, 0, sizeof(ws));
 
 	/* We need to take out the map fd for the digest calculation
 	 * since they are unstable from user space side.
 	 */
+	dst = (void *)raw;
 	for (i = 0, was_ld_map = false; i < fp->len; i++) {
 		dst[i] = fp->insnsi[i];
 		if (!was_ld_map &&
@@ -177,12 +178,13 @@ void bpf_prog_calc_digest(struct bpf_prog *fp)
 		}
 	}
 
-	psize = fp->len * sizeof(struct bpf_insn);
-	memset(&raw[psize], 0, sizeof(raw) - psize);
+	psize = bpf_prog_insn_size(fp);
+	memset(&raw[psize], 0, raw_size - psize);
 	raw[psize++] = 0x80;
 
 	bsize  = round_up(psize, SHA_MESSAGE_BYTES);
 	blocks = bsize / SHA_MESSAGE_BYTES;
+	todo   = raw;
 	if (bsize - psize >= sizeof(__be64)) {
 		bits = (__be64 *)(todo + bsize - sizeof(__be64));
 	} else {
@@ -199,6 +201,9 @@ void bpf_prog_calc_digest(struct bpf_prog *fp)
 	result = (__force __be32 *)fp->digest;
 	for (i = 0; i < SHA_DIGEST_WORDS; i++)
 		result[i] = cpu_to_be32(fp->digest[i]);
+
+	vfree(raw);
+	return 0;
 }
 
 static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4819ec9d95f6..35d674c1f12e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -811,7 +811,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 
 	err = -EFAULT;
 	if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
-			   prog->len * sizeof(struct bpf_insn)) != 0)
+			   bpf_prog_insn_size(prog)) != 0)
 		goto free_prog;
 
 	prog->orig_prog = NULL;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 81e267bc4640..64b7b1abe087 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2931,6 +2931,10 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 	int insn_cnt = env->prog->len;
 	int i, j, err;
 
+	err = bpf_prog_calc_digest(env->prog);
+	if (err)
+		return err;
+
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		if (BPF_CLASS(insn->code) == BPF_LDX &&
 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
@@ -3178,8 +3182,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 		log_level = 0;
 	}
 
-	bpf_prog_calc_digest(env->prog);
-
 	ret = replace_map_fd_with_map_ptr(env);
 	if (ret < 0)
 		goto skip_full_check;
-- 
cgit v1.2.3


From 5ccb071e97fbd9ffe623a0d3977cc6d013bee93c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 18 Dec 2016 01:52:58 +0100
Subject: bpf: fix overflow in prog accounting

Commit aaac3ba95e4c ("bpf: charge user for creation of BPF maps and
programs") made a wrong assumption of charging against prog->pages.
Unlike map->pages, prog->pages are still subject to change when we
need to expand the program through bpf_prog_realloc().

This can for example happen during verification stage when we need to
expand and rewrite parts of the program. Should the required space
cross a page boundary, then prog->pages is not the same anymore as
its original value that we used to bpf_prog_charge_memlock() on. Thus,
we'll hit a wrap-around during bpf_prog_uncharge_memlock() when prog
is freed eventually. I noticed this that despite having unlimited
memlock, programs suddenly refused to load with EPERM error due to
insufficient memlock.

There are two ways to fix this issue. One would be to add a cached
variable to struct bpf_prog that takes a snapshot of prog->pages at the
time of charging. The other approach is to also account for resizes. I
chose to go with the latter for a couple of reasons: i) We want accounting
rather to be more accurate instead of further fooling limits, ii) adding
yet another page counter on struct bpf_prog would also be a waste just
for this purpose. We also do want to charge as early as possible to
avoid going into the verifier just to find out later on that we crossed
limits. The only place that needs to be fixed is bpf_prog_realloc(),
since only here we expand the program, so we try to account for the
needed delta and should we fail, call-sites check for outcome anyway.
On cBPF to eBPF migrations, we don't grab a reference to the user as
they are charged differently. With that in place, my test case worked
fine.

Fixes: aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h  | 11 +++++++++++
 kernel/bpf/core.c    | 16 +++++++++++++---
 kernel/bpf/syscall.c | 36 ++++++++++++++++++++++++++++--------
 3 files changed, 52 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 201eb483c46f..f74ae68086dc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -238,6 +238,8 @@ struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
 void bpf_prog_sub(struct bpf_prog *prog, int i);
 struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
+int __bpf_prog_charge(struct user_struct *user, u32 pages);
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
@@ -318,6 +320,15 @@ static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+
+static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+	return 0;
+}
+
+static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 75c08b8068d6..1eb4f1303756 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -105,19 +105,29 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
 			  gfp_extra_flags;
 	struct bpf_prog *fp;
+	u32 pages, delta;
+	int ret;
 
 	BUG_ON(fp_old == NULL);
 
 	size = round_up(size, PAGE_SIZE);
-	if (size <= fp_old->pages * PAGE_SIZE)
+	pages = size / PAGE_SIZE;
+	if (pages <= fp_old->pages)
 		return fp_old;
 
+	delta = pages - fp_old->pages;
+	ret = __bpf_prog_charge(fp_old->aux->user, delta);
+	if (ret)
+		return NULL;
+
 	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
-	if (fp != NULL) {
+	if (fp == NULL) {
+		__bpf_prog_uncharge(fp_old->aux->user, delta);
+	} else {
 		kmemcheck_annotate_bitfield(fp, meta);
 
 		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
-		fp->pages = size / PAGE_SIZE;
+		fp->pages = pages;
 		fp->aux->prog = fp;
 
 		/* We keep fp->aux from fp_old around in the new
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35d674c1f12e..e89acea22ecf 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -615,19 +615,39 @@ static void free_used_maps(struct bpf_prog_aux *aux)
 	kfree(aux->used_maps);
 }
 
+int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	unsigned long user_bufs;
+
+	if (user) {
+		user_bufs = atomic_long_add_return(pages, &user->locked_vm);
+		if (user_bufs > memlock_limit) {
+			atomic_long_sub(pages, &user->locked_vm);
+			return -EPERM;
+		}
+	}
+
+	return 0;
+}
+
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+	if (user)
+		atomic_long_sub(pages, &user->locked_vm);
+}
+
 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 {
 	struct user_struct *user = get_current_user();
-	unsigned long memlock_limit;
-
-	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	int ret;
 
-	atomic_long_add(prog->pages, &user->locked_vm);
-	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
-		atomic_long_sub(prog->pages, &user->locked_vm);
+	ret = __bpf_prog_charge(user, prog->pages);
+	if (ret) {
 		free_uid(user);
-		return -EPERM;
+		return ret;
 	}
+
 	prog->aux->user = user;
 	return 0;
 }
@@ -636,7 +656,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 {
 	struct user_struct *user = prog->aux->user;
 
-	atomic_long_sub(prog->pages, &user->locked_vm);
+	__bpf_prog_uncharge(user, prog->pages);
 	free_uid(user);
 }
 
-- 
cgit v1.2.3


From 957ae5098185e763b5c06be6c3b4b6e98c048712 Mon Sep 17 00:00:00 2001
From: Nilesh Bacchewar <nilesh.bacchewar@intel.com>
Date: Mon, 7 Nov 2016 12:11:47 -0800
Subject: platform/x86: Add Whiskey Cove PMIC TMU support

This adds TMU (Time Management Unit) support for Intel BXT platform.
It enables the alarm wake-up functionality in the TMU unit of Whiskey Cove
PMIC.

Signed-off-by: Nilesh Bacchewar <nilesh.bacchewar@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
[andy: resolve merge conflict in Kconfig]
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/mfd/intel_soc_pmic_bxtwc.c     |  38 ++++++++
 drivers/platform/x86/Kconfig           |   9 ++
 drivers/platform/x86/Makefile          |   1 +
 drivers/platform/x86/intel_bxtwc_tmu.c | 162 +++++++++++++++++++++++++++++++++
 include/linux/mfd/intel_soc_pmic.h     |   1 +
 5 files changed, 211 insertions(+)
 create mode 100644 drivers/platform/x86/intel_bxtwc_tmu.c

(limited to 'include/linux')

diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c
index 43e54b7e908f..92aba02042f0 100644
--- a/drivers/mfd/intel_soc_pmic_bxtwc.c
+++ b/drivers/mfd/intel_soc_pmic_bxtwc.c
@@ -42,6 +42,7 @@
 #define BXTWC_GPIOIRQ0		0x4E0B
 #define BXTWC_GPIOIRQ1		0x4E0C
 #define BXTWC_CRITIRQ		0x4E0D
+#define BXTWC_TMUIRQ		0x4FB6
 
 /* Interrupt MASK Registers */
 #define BXTWC_MIRQLVL1		0x4E0E
@@ -59,6 +60,7 @@
 #define BXTWC_MGPIO0IRQ		0x4E19
 #define BXTWC_MGPIO1IRQ		0x4E1A
 #define BXTWC_MCRITIRQ		0x4E1B
+#define BXTWC_MTMUIRQ		0x4FB7
 
 /* Whiskey Cove PMIC share same ACPI ID between different platforms */
 #define BROXTON_PMIC_WC_HRV	4
@@ -91,6 +93,7 @@ enum bxtwc_irqs_level2 {
 	BXTWC_GPIO0_IRQ,
 	BXTWC_GPIO1_IRQ,
 	BXTWC_CRIT_IRQ,
+	BXTWC_TMU_IRQ,
 };
 
 static const struct regmap_irq bxtwc_regmap_irqs[] = {
@@ -118,6 +121,10 @@ static const struct regmap_irq bxtwc_regmap_irqs_level2[] = {
 	REGMAP_IRQ_REG(BXTWC_CRIT_IRQ, 9, 0x03),
 };
 
+static const struct regmap_irq bxtwc_regmap_irqs_tmu[] = {
+	REGMAP_IRQ_REG(BXTWC_TMU_IRQ, 0, 0x06),
+};
+
 static struct regmap_irq_chip bxtwc_regmap_irq_chip = {
 	.name = "bxtwc_irq_chip",
 	.status_base = BXTWC_IRQLVL1,
@@ -136,6 +143,15 @@ static struct regmap_irq_chip bxtwc_regmap_irq_chip_level2 = {
 	.num_regs = 10,
 };
 
+static struct regmap_irq_chip bxtwc_regmap_irq_chip_tmu = {
+	.name = "bxtwc_irq_chip_tmu",
+	.status_base = BXTWC_TMUIRQ,
+	.mask_base = BXTWC_MTMUIRQ,
+	.irqs = bxtwc_regmap_irqs_tmu,
+	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_tmu),
+	.num_regs = 1,
+};
+
 static struct resource gpio_resources[] = {
 	DEFINE_RES_IRQ_NAMED(BXTWC_GPIO0_IRQ, "GPIO0"),
 	DEFINE_RES_IRQ_NAMED(BXTWC_GPIO1_IRQ, "GPIO1"),
@@ -164,6 +180,10 @@ static struct resource bcu_resources[] = {
 	DEFINE_RES_IRQ_NAMED(BXTWC_BCU_IRQ, "BCU"),
 };
 
+static struct resource tmu_resources[] = {
+	DEFINE_RES_IRQ_NAMED(BXTWC_TMU_IRQ, "TMU"),
+};
+
 static struct mfd_cell bxt_wc_dev[] = {
 	{
 		.name = "bxt_wcove_gpadc",
@@ -190,6 +210,12 @@ static struct mfd_cell bxt_wc_dev[] = {
 		.num_resources = ARRAY_SIZE(bcu_resources),
 		.resources = bcu_resources,
 	},
+	{
+		.name = "bxt_wcove_tmu",
+		.num_resources = ARRAY_SIZE(tmu_resources),
+		.resources = tmu_resources,
+	},
+
 	{
 		.name = "bxt_wcove_gpio",
 		.num_resources = ARRAY_SIZE(gpio_resources),
@@ -400,6 +426,15 @@ static int bxtwc_probe(struct platform_device *pdev)
 		goto err_irq_chip_level2;
 	}
 
+	ret = regmap_add_irq_chip(pmic->regmap, pmic->irq,
+				  IRQF_ONESHOT | IRQF_SHARED,
+				  0, &bxtwc_regmap_irq_chip_tmu,
+				  &pmic->irq_chip_data_tmu);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to add TMU IRQ chip\n");
+		goto err_irq_chip_tmu;
+	}
+
 	ret = mfd_add_devices(&pdev->dev, PLATFORM_DEVID_NONE, bxt_wc_dev,
 			      ARRAY_SIZE(bxt_wc_dev), NULL, 0,
 			      NULL);
@@ -429,6 +464,8 @@ static int bxtwc_probe(struct platform_device *pdev)
 err_sysfs:
 	mfd_remove_devices(&pdev->dev);
 err_mfd:
+	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data_tmu);
+err_irq_chip_tmu:
 	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data_level2);
 err_irq_chip_level2:
 	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data);
@@ -444,6 +481,7 @@ static int bxtwc_remove(struct platform_device *pdev)
 	mfd_remove_devices(&pdev->dev);
 	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data);
 	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data_level2);
+	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data_tmu);
 
 	return 0;
 }
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 493e3386fbf6..5fe8be089b8b 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -1017,6 +1017,15 @@ config INTEL_PMC_IPC
 	The PMC is an ARC processor which defines IPC commands for communication
 	with other entities in the CPU.
 
+config INTEL_BXTWC_PMIC_TMU
+	tristate "Intel BXT Whiskey Cove TMU Driver"
+	depends on REGMAP
+	depends on INTEL_SOC_PMIC && INTEL_PMC_IPC
+	---help---
+	  Select this driver to use Intel BXT Whiskey Cove PMIC TMU feature.
+	  This driver enables the alarm wakeup functionality in the TMU unit
+	  of Whiskey Cove PMIC.
+
 config SURFACE_PRO3_BUTTON
 	tristate "Power/home/volume buttons driver for Microsoft Surface Pro 3/4 tablet"
 	depends on ACPI && INPUT
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index e9290290f026..d4111f0f8a78 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_INTEL_PMC_IPC)	+= intel_pmc_ipc.o
 obj-$(CONFIG_SURFACE_PRO3_BUTTON)	+= surfacepro3_button.o
 obj-$(CONFIG_SURFACE_3_BUTTON)	+= surface3_button.o
 obj-$(CONFIG_INTEL_PUNIT_IPC)  += intel_punit_ipc.o
+obj-$(CONFIG_INTEL_BXTWC_PMIC_TMU)	+= intel_bxtwc_tmu.o
 obj-$(CONFIG_INTEL_TELEMETRY)	+= intel_telemetry_core.o \
 				   intel_telemetry_pltdrv.o \
 				   intel_telemetry_debugfs.o
diff --git a/drivers/platform/x86/intel_bxtwc_tmu.c b/drivers/platform/x86/intel_bxtwc_tmu.c
new file mode 100644
index 000000000000..e202abd5b0df
--- /dev/null
+++ b/drivers/platform/x86/intel_bxtwc_tmu.c
@@ -0,0 +1,162 @@
+/*
+ * intel_bxtwc_tmu.c - Intel BXT Whiskey Cove PMIC TMU driver
+ *
+ * Copyright (C) 2016 Intel Corporation. All rights reserved.
+ *
+ * This driver adds TMU (Time Management Unit) support for Intel BXT platform.
+ * It enables the alarm wake-up functionality in the TMU unit of Whiskey Cove
+ * PMIC.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/intel_soc_pmic.h>
+
+#define BXTWC_TMUIRQ		0x4fb6
+#define BXTWC_MIRQLVL1		0x4e0e
+#define BXTWC_MTMUIRQ_REG	0x4fb7
+#define BXTWC_MIRQLVL1_MTMU	BIT(1)
+#define BXTWC_TMU_WK_ALRM	BIT(1)
+#define BXTWC_TMU_SYS_ALRM	BIT(2)
+#define BXTWC_TMU_ALRM_MASK	(BXTWC_TMU_WK_ALRM | BXTWC_TMU_SYS_ALRM)
+#define BXTWC_TMU_ALRM_IRQ	(BXTWC_TMU_WK_ALRM | BXTWC_TMU_SYS_ALRM)
+
+struct wcove_tmu {
+	int irq;
+	struct device *dev;
+	struct regmap *regmap;
+};
+
+static irqreturn_t bxt_wcove_tmu_irq_handler(int irq, void *data)
+{
+	struct wcove_tmu *wctmu = data;
+	unsigned int tmu_irq;
+
+	/* Read TMU interrupt reg */
+	regmap_read(wctmu->regmap, BXTWC_TMUIRQ, &tmu_irq);
+	if (tmu_irq & BXTWC_TMU_ALRM_IRQ) {
+		/* clear TMU irq */
+		regmap_write(wctmu->regmap, BXTWC_TMUIRQ, tmu_irq);
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+static int bxt_wcove_tmu_probe(struct platform_device *pdev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(pdev->dev.parent);
+	struct regmap_irq_chip_data *regmap_irq_chip;
+	struct wcove_tmu *wctmu;
+	int ret, virq, irq;
+
+	wctmu = devm_kzalloc(&pdev->dev, sizeof(*wctmu), GFP_KERNEL);
+	if (!wctmu)
+		return -ENOMEM;
+
+	wctmu->dev = &pdev->dev;
+	wctmu->regmap = pmic->regmap;
+
+	irq = platform_get_irq(pdev, 0);
+
+	if (irq < 0) {
+		dev_err(&pdev->dev, "invalid irq %d\n", irq);
+		return irq;
+	}
+
+	regmap_irq_chip = pmic->irq_chip_data_tmu;
+	virq = regmap_irq_get_virq(regmap_irq_chip, irq);
+	if (virq < 0) {
+		dev_err(&pdev->dev,
+			"failed to get virtual interrupt=%d\n", irq);
+		return virq;
+	}
+
+	ret = devm_request_threaded_irq(&pdev->dev, virq,
+					NULL, bxt_wcove_tmu_irq_handler,
+					IRQF_ONESHOT, "bxt_wcove_tmu", wctmu);
+	if (ret) {
+		dev_err(&pdev->dev, "request irq failed: %d,virq: %d\n",
+							ret, virq);
+		return ret;
+	}
+	wctmu->irq = virq;
+
+	/* Enable TMU interrupts */
+	regmap_update_bits(wctmu->regmap, BXTWC_MIRQLVL1,
+				  BXTWC_MIRQLVL1_MTMU, 0);
+
+	/* Unmask TMU second level Wake & System alarm */
+	regmap_update_bits(wctmu->regmap, BXTWC_MTMUIRQ_REG,
+				  BXTWC_TMU_ALRM_MASK, 0);
+
+	platform_set_drvdata(pdev, wctmu);
+	return 0;
+}
+
+static int bxt_wcove_tmu_remove(struct platform_device *pdev)
+{
+	struct wcove_tmu *wctmu = platform_get_drvdata(pdev);
+	unsigned int val;
+
+	/* Mask TMU interrupts */
+	regmap_read(wctmu->regmap, BXTWC_MIRQLVL1, &val);
+	regmap_write(wctmu->regmap, BXTWC_MIRQLVL1,
+			val | BXTWC_MIRQLVL1_MTMU);
+	regmap_read(wctmu->regmap, BXTWC_MTMUIRQ_REG, &val);
+	regmap_write(wctmu->regmap, BXTWC_MTMUIRQ_REG,
+			val | BXTWC_TMU_ALRM_MASK);
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int bxtwc_tmu_suspend(struct device *dev)
+{
+	struct wcove_tmu *wctmu = dev_get_drvdata(dev);
+
+	enable_irq_wake(wctmu->irq);
+	return 0;
+}
+
+static int bxtwc_tmu_resume(struct device *dev)
+{
+	struct wcove_tmu *wctmu = dev_get_drvdata(dev);
+
+	disable_irq_wake(wctmu->irq);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(bxtwc_tmu_pm_ops, bxtwc_tmu_suspend, bxtwc_tmu_resume);
+
+static const struct platform_device_id bxt_wcove_tmu_id_table[] = {
+	{ .name = "bxt_wcove_tmu" },
+	{},
+};
+MODULE_DEVICE_TABLE(platform, bxt_wcove_tmu_id_table);
+
+static struct platform_driver bxt_wcove_tmu_driver = {
+	.probe = bxt_wcove_tmu_probe,
+	.remove = bxt_wcove_tmu_remove,
+	.driver = {
+		.name = "bxt_wcove_tmu",
+		.pm     = &bxtwc_tmu_pm_ops,
+	},
+	.id_table = bxt_wcove_tmu_id_table,
+};
+
+module_platform_driver(bxt_wcove_tmu_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Nilesh Bacchewar <nilesh.bacchewar@intel.com>");
+MODULE_DESCRIPTION("BXT Whiskey Cove TMU Driver");
diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h
index cf619dbeace2..956caa0628f5 100644
--- a/include/linux/mfd/intel_soc_pmic.h
+++ b/include/linux/mfd/intel_soc_pmic.h
@@ -26,6 +26,7 @@ struct intel_soc_pmic {
 	struct regmap *regmap;
 	struct regmap_irq_chip_data *irq_chip_data;
 	struct regmap_irq_chip_data *irq_chip_data_level2;
+	struct regmap_irq_chip_data *irq_chip_data_tmu;
 	struct device *dev;
 };
 
-- 
cgit v1.2.3


From 3f642a13359468181f29db3d8926ba36530be85e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 16 Dec 2016 18:49:38 -0500
Subject: NFS: Remove unused function nfs_revalidate_inode_rcu()

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c         | 9 ---------
 include/linux/nfs_fs.h | 1 -
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e3194aa797f7..8224e96b5808 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1105,15 +1105,6 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
 
-int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
-{
-	if (!(NFS_I(inode)->cache_validity &
-			(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
-			&& !nfs_attribute_cache_expired(inode))
-		return NFS_STALE(inode) ? -ESTALE : 0;
-	return -ECHILD;
-}
-
 static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index cb631973839a..fe2e7810ae80 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -343,7 +343,6 @@ extern int nfs_open(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_attribute_cache_expired(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
-extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
-- 
cgit v1.2.3


From 187e593d2779fb92ae1de06f873d6e192ba35d88 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 16 Dec 2016 18:51:15 -0500
Subject: NFS: Clean up nfs_attribute_timeout()

It can be made static.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c         | 14 +++++++-------
 include/linux/nfs_fs.h |  1 -
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8224e96b5808..a0cd79646c1b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -160,6 +160,13 @@ int nfs_sync_mapping(struct address_space *mapping)
 	return ret;
 }
 
+static int nfs_attribute_timeout(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
+}
+
 static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags)
 {
 	unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
@@ -1076,13 +1083,6 @@ out:
 	return status;
 }
 
-int nfs_attribute_timeout(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
-}
-
 int nfs_attribute_cache_expired(struct inode *inode)
 {
 	if (nfs_have_delegated_attributes(inode))
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fe2e7810ae80..f1da8c8dd473 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -340,7 +340,6 @@ extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
 extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
 extern int nfs_permission(struct inode *, int);
 extern int nfs_open(struct inode *, struct file *);
-extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_attribute_cache_expired(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
-- 
cgit v1.2.3


From 7b8589cc29e7c35dcfd2d5138979f17b48f90110 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Mon, 19 Dec 2016 16:22:48 -0800
Subject: ima: on soft reboot, save the measurement list

The TPM PCRs are only reset on a hard reboot.  In order to validate a
TPM's quote after a soft reboot (eg.  kexec -e), the IMA measurement
list of the running kernel must be saved and restored on boot.

This patch uses the kexec buffer passing mechanism to pass the
serialized IMA binary_runtime_measurements to the next kernel.

Link: http://lkml.kernel.org/r/1480554346-29071-7-git-send-email-zohar@linux.vnet.ibm.com
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
Cc: Andreas Steffen <andreas.steffen@strongswan.org>
Cc: Josh Sklar <sklar@linux.vnet.ibm.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stewart Smith <stewart@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ima.h                |  12 ++++
 kernel/kexec_file.c                |   4 ++
 security/integrity/ima/ima.h       |   1 +
 security/integrity/ima/ima_fs.c    |   2 +-
 security/integrity/ima/ima_kexec.c | 117 +++++++++++++++++++++++++++++++++++++
 5 files changed, 135 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0eb7c2e7f0d6..7f6952f8d6aa 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -11,6 +11,7 @@
 #define _LINUX_IMA_H
 
 #include <linux/fs.h>
+#include <linux/kexec.h>
 struct linux_binprm;
 
 #ifdef CONFIG_IMA
@@ -23,6 +24,10 @@ extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
 			      enum kernel_read_file_id id);
 extern void ima_post_path_mknod(struct dentry *dentry);
 
+#ifdef CONFIG_IMA_KEXEC
+extern void ima_add_kexec_buffer(struct kimage *image);
+#endif
+
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
 {
@@ -62,6 +67,13 @@ static inline void ima_post_path_mknod(struct dentry *dentry)
 
 #endif /* CONFIG_IMA */
 
+#ifndef CONFIG_IMA_KEXEC
+struct kimage;
+
+static inline void ima_add_kexec_buffer(struct kimage *image)
+{}
+#endif
+
 #ifdef CONFIG_IMA_APPRAISE
 extern void ima_inode_post_setattr(struct dentry *dentry);
 extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 0c2df7f73792..b56a558e406d 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -19,6 +19,7 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/fs.h>
+#include <linux/ima.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <linux/syscalls.h>
@@ -132,6 +133,9 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 		return ret;
 	image->kernel_buf_len = size;
 
+	/* IMA needs to pass the measurement list to the next kernel. */
+	ima_add_kexec_buffer(image);
+
 	/* Call arch image probe handlers */
 	ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
 					    image->kernel_buf_len);
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index ea1dcc452911..139dec67dcbf 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -143,6 +143,7 @@ void ima_print_digest(struct seq_file *m, u8 *digest, u32 size);
 struct ima_template_desc *ima_template_desc_current(void);
 int ima_restore_measurement_entry(struct ima_template_entry *entry);
 int ima_restore_measurement_list(loff_t bufsize, void *buf);
+int ima_measurements_show(struct seq_file *m, void *v);
 unsigned long ima_get_binary_runtime_size(void);
 int ima_init_template(void);
 
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 3df46906492d..10bea0125fa1 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -116,7 +116,7 @@ void ima_putc(struct seq_file *m, void *data, int datalen)
  *       [eventdata length]
  *       eventdata[n]=template specific data
  */
-static int ima_measurements_show(struct seq_file *m, void *v)
+int ima_measurements_show(struct seq_file *m, void *v)
 {
 	/* the list never shrinks, so we don't need a lock here */
 	struct ima_queue_entry *qe = v;
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 36afd0fe9747..2c4824ac1ce1 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -10,8 +10,125 @@
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  */
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include <linux/kexec.h>
 #include "ima.h"
 
+#ifdef CONFIG_IMA_KEXEC
+static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
+				     unsigned long segment_size)
+{
+	struct ima_queue_entry *qe;
+	struct seq_file file;
+	struct ima_kexec_hdr khdr = {
+		.version = 1, .buffer_size = 0, .count = 0};
+	int ret = 0;
+
+	/* segment size can't change between kexec load and execute */
+	file.buf = vmalloc(segment_size);
+	if (!file.buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	file.size = segment_size;
+	file.read_pos = 0;
+	file.count = sizeof(khdr);	/* reserved space */
+
+	list_for_each_entry_rcu(qe, &ima_measurements, later) {
+		if (file.count < file.size) {
+			khdr.count++;
+			ima_measurements_show(&file, qe);
+		} else {
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	if (ret < 0)
+		goto out;
+
+	/*
+	 * fill in reserved space with some buffer details
+	 * (eg. version, buffer size, number of measurements)
+	 */
+	khdr.buffer_size = file.count;
+	memcpy(file.buf, &khdr, sizeof(khdr));
+	print_hex_dump(KERN_DEBUG, "ima dump: ", DUMP_PREFIX_NONE,
+			16, 1, file.buf,
+			file.count < 100 ? file.count : 100, true);
+
+	*buffer_size = file.count;
+	*buffer = file.buf;
+out:
+	if (ret == -EINVAL)
+		vfree(file.buf);
+	return ret;
+}
+
+/*
+ * Called during kexec_file_load so that IMA can add a segment to the kexec
+ * image for the measurement list for the next kernel.
+ *
+ * This function assumes that kexec_mutex is held.
+ */
+void ima_add_kexec_buffer(struct kimage *image)
+{
+	struct kexec_buf kbuf = { .image = image, .buf_align = PAGE_SIZE,
+				  .buf_min = 0, .buf_max = ULONG_MAX,
+				  .top_down = true };
+	unsigned long binary_runtime_size;
+
+	/* use more understandable variable names than defined in kbuf */
+	void *kexec_buffer = NULL;
+	size_t kexec_buffer_size;
+	size_t kexec_segment_size;
+	int ret;
+
+	/*
+	 * Reserve an extra half page of memory for additional measurements
+	 * added during the kexec load.
+	 */
+	binary_runtime_size = ima_get_binary_runtime_size();
+	if (binary_runtime_size >= ULONG_MAX - PAGE_SIZE)
+		kexec_segment_size = ULONG_MAX;
+	else
+		kexec_segment_size = ALIGN(ima_get_binary_runtime_size() +
+					   PAGE_SIZE / 2, PAGE_SIZE);
+	if ((kexec_segment_size == ULONG_MAX) ||
+	    ((kexec_segment_size >> PAGE_SHIFT) > totalram_pages / 2)) {
+		pr_err("Binary measurement list too large.\n");
+		return;
+	}
+
+	ima_dump_measurement_list(&kexec_buffer_size, &kexec_buffer,
+				  kexec_segment_size);
+	if (!kexec_buffer) {
+		pr_err("Not enough memory for the kexec measurement buffer.\n");
+		return;
+	}
+
+	kbuf.buffer = kexec_buffer;
+	kbuf.bufsz = kexec_buffer_size;
+	kbuf.memsz = kexec_segment_size;
+	ret = kexec_add_buffer(&kbuf);
+	if (ret) {
+		pr_err("Error passing over kexec measurement buffer.\n");
+		return;
+	}
+
+	ret = arch_ima_add_kexec_buffer(image, kbuf.mem, kexec_segment_size);
+	if (ret) {
+		pr_err("Error passing over kexec measurement buffer.\n");
+		return;
+	}
+
+	pr_debug("kexec measurement buffer for the loaded kernel at 0x%lx.\n",
+		 kbuf.mem);
+}
+#endif /* IMA_KEXEC */
+
 /*
  * Restore the measurement list from the previous kernel.
  */
-- 
cgit v1.2.3


From 1b011e2f13fcf37e1e577fed25b295808d6c83b9 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 19 Dec 2016 16:23:12 -0800
Subject: ratelimit: fix WARN_ON_RATELIMIT return value

The macro is to be used similarly as WARN_ON as:

  if (WARN_ON_RATELIMIT(condition, state))
	do_something();

One would expect only 'condition' to affect the 'if', but
WARN_ON_RATELIMIT does internally only:

  WARN_ON((condition) && __ratelimit(state))

So the 'if' is affected by the ratelimiting state too.  Fix this by
returning 'condition' in any case.

Note that nobody uses WARN_ON_RATELIMIT yet, so there is nothing to
worry about.  But I was about to use it and was a bit surprised.

Link: http://lkml.kernel.org/r/20161215093224.23126-1-jslaby@suse.cz
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ratelimit.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 57c9e0622a38..56375edf2ed2 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -77,8 +77,11 @@ extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
 
 #ifdef CONFIG_PRINTK
 
-#define WARN_ON_RATELIMIT(condition, state)			\
-		WARN_ON((condition) && __ratelimit(state))
+#define WARN_ON_RATELIMIT(condition, state)	({		\
+	bool __rtn_cond = !!(condition);			\
+	WARN_ON(__rtn_cond && __ratelimit(state));		\
+	__rtn_cond;						\
+})
 
 #define WARN_RATELIMIT(condition, format, ...)			\
 ({								\
-- 
cgit v1.2.3


From c00d2c7e89880036f288a764599b2b8b87c0a364 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 20 Dec 2016 07:04:57 -0500
Subject: move aio compat to fs/aio.c

... and fix the minor buglet in compat io_submit() - native one
kills ioctx as cleanup when put_user() fails.  Get rid of
bogus compat_... in !CONFIG_AIO case, while we are at it - they
should simply fail with ENOSYS, same as for native counterparts.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c            | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/compat.c         | 75 -----------------------------------------
 include/linux/aio.h |  5 ---
 kernel/sys_ni.c     |  3 ++
 4 files changed, 98 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index 8edf253484af..8c79e1a53af9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1367,6 +1367,39 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
+{
+	struct kioctx *ioctx = NULL;
+	unsigned long ctx;
+	long ret;
+
+	ret = get_user(ctx, ctx32p);
+	if (unlikely(ret))
+		goto out;
+
+	ret = -EINVAL;
+	if (unlikely(ctx || nr_events == 0)) {
+		pr_debug("EINVAL: ctx %lu nr_events %u\n",
+		         ctx, nr_events);
+		goto out;
+	}
+
+	ioctx = ioctx_alloc(nr_events);
+	ret = PTR_ERR(ioctx);
+	if (!IS_ERR(ioctx)) {
+		/* truncating is ok because it's a user address */
+		ret = put_user((u32)ioctx->user_id, ctx32p);
+		if (ret)
+			kill_ioctx(current->mm, ioctx, NULL);
+		percpu_ref_put(&ioctx->users);
+	}
+
+out:
+	return ret;
+}
+#endif
+
 /* sys_io_destroy:
  *	Destroy the aio_context specified.  May cancel any outstanding 
  *	AIOs and block on completion.  Will fail with -ENOSYS if not
@@ -1591,8 +1624,8 @@ out_put_req:
 	return ret;
 }
 
-long do_io_submit(aio_context_t ctx_id, long nr,
-		  struct iocb __user *__user *iocbpp, bool compat)
+static long do_io_submit(aio_context_t ctx_id, long nr,
+			  struct iocb __user *__user *iocbpp, bool compat)
 {
 	struct kioctx *ctx;
 	long ret = 0;
@@ -1662,6 +1695,44 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
 	return do_io_submit(ctx_id, nr, iocbpp, 0);
 }
 
+#ifdef CONFIG_COMPAT
+static inline long
+copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
+{
+	compat_uptr_t uptr;
+	int i;
+
+	for (i = 0; i < nr; ++i) {
+		if (get_user(uptr, ptr32 + i))
+			return -EFAULT;
+		if (put_user(compat_ptr(uptr), ptr64 + i))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+#define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
+
+COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
+		       int, nr, u32 __user *, iocb)
+{
+	struct iocb __user * __user *iocb64;
+	long ret;
+
+	if (unlikely(nr < 0))
+		return -EINVAL;
+
+	if (nr > MAX_AIO_SUBMITS)
+		nr = MAX_AIO_SUBMITS;
+
+	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
+	ret = copy_iocb(nr, iocb, iocb64);
+	if (!ret)
+		ret = do_io_submit(ctx_id, nr, iocb64, 1);
+	return ret;
+}
+#endif
+
 /* lookup_kiocb
  *	Finds a given iocb for cancellation.
  */
@@ -1761,3 +1832,25 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
 	}
 	return ret;
 }
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
+		       compat_long_t, min_nr,
+		       compat_long_t, nr,
+		       struct io_event __user *, events,
+		       struct compat_timespec __user *, timeout)
+{
+	struct timespec t;
+	struct timespec __user *ut = NULL;
+
+	if (timeout) {
+		if (compat_get_timespec(&t, timeout))
+			return -EFAULT;
+
+		ut = compat_alloc_user_space(sizeof(*ut));
+		if (copy_to_user(ut, &t, sizeof(t)))
+			return -EFAULT;
+	}
+	return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
+}
+#endif
diff --git a/fs/compat.c b/fs/compat.c
index 543b48c29ac3..3f4908c28698 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -487,45 +487,6 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
 	return compat_sys_fcntl64(fd, cmd, arg);
 }
 
-COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p)
-{
-	long ret;
-	aio_context_t ctx64;
-
-	mm_segment_t oldfs = get_fs();
-	if (unlikely(get_user(ctx64, ctx32p)))
-		return -EFAULT;
-
-	set_fs(KERNEL_DS);
-	/* The __user pointer cast is valid because of the set_fs() */
-	ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64);
-	set_fs(oldfs);
-	/* truncating is ok because it's a user address */
-	if (!ret)
-		ret = put_user((u32) ctx64, ctx32p);
-	return ret;
-}
-
-COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
-		       compat_long_t, min_nr,
-		       compat_long_t, nr,
-		       struct io_event __user *, events,
-		       struct compat_timespec __user *, timeout)
-{
-	struct timespec t;
-	struct timespec __user *ut = NULL;
-
-	if (timeout) {
-		if (compat_get_timespec(&t, timeout))
-			return -EFAULT;
-
-		ut = compat_alloc_user_space(sizeof(*ut));
-		if (copy_to_user(ut, &t, sizeof(t)) )
-			return -EFAULT;
-	} 
-	return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
-}
-
 /* A write operation does a read from user space and vice versa */
 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 
@@ -602,42 +563,6 @@ out:
 	return ret;
 }
 
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
-{
-	compat_uptr_t uptr;
-	int i;
-
-	for (i = 0; i < nr; ++i) {
-		if (get_user(uptr, ptr32 + i))
-			return -EFAULT;
-		if (put_user(compat_ptr(uptr), ptr64 + i))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-#define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
-
-COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
-		       int, nr, u32 __user *, iocb)
-{
-	struct iocb __user * __user *iocb64; 
-	long ret;
-
-	if (unlikely(nr < 0))
-		return -EINVAL;
-
-	if (nr > MAX_AIO_SUBMITS)
-		nr = MAX_AIO_SUBMITS;
-	
-	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
-	ret = copy_iocb(nr, iocb, iocb64);
-	if (!ret)
-		ret = do_io_submit(ctx_id, nr, iocb64, 1);
-	return ret;
-}
-
 struct compat_ncp_mount_data {
 	compat_int_t version;
 	compat_uint_t ncp_fd;
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 9eb42dbc5582..fdd0a343f455 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -14,14 +14,9 @@ typedef int (kiocb_cancel_fn)(struct kiocb *);
 /* prototypes */
 #ifdef CONFIG_AIO
 extern void exit_aio(struct mm_struct *mm);
-extern long do_io_submit(aio_context_t ctx_id, long nr,
-			 struct iocb __user *__user *iocbpp, bool compat);
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
 static inline void exit_aio(struct mm_struct *mm) { }
-static inline long do_io_submit(aio_context_t ctx_id, long nr,
-				struct iocb __user * __user *iocbpp,
-				bool compat) { return 0; }
 static inline void kiocb_set_cancel_fn(struct kiocb *req,
 				       kiocb_cancel_fn *cancel) { }
 #endif /* CONFIG_AIO */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 635482e60ca3..8acef8576ce9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -150,6 +150,9 @@ cond_syscall(sys_io_destroy);
 cond_syscall(sys_io_submit);
 cond_syscall(sys_io_cancel);
 cond_syscall(sys_io_getevents);
+cond_syscall(compat_sys_io_setup);
+cond_syscall(compat_sys_io_submit);
+cond_syscall(compat_sys_io_getevents);
 cond_syscall(sys_sysfs);
 cond_syscall(sys_syslog);
 cond_syscall(sys_process_vm_readv);
-- 
cgit v1.2.3


From 5c43c52d5fb6163120ae5d9a281c3b757ca6119c Mon Sep 17 00:00:00 2001
From: Steven Wahl <Steve_Wahl@Dell.com>
Date: Thu, 8 Dec 2016 17:02:28 +0000
Subject: NTB: correct ntb_peer_spad_read for case when callback is not
 supplied.

Correct ntb_peer_spad_read for case when callback is not supplied

Signed-off-by: Steve Wahl <Steve.Wahl@dell.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 6f47562d477b..816bd9835bee 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -968,6 +968,9 @@ static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
  */
 static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
 {
+	if (!ntb->ops->peer_spad_read)
+		return 0;
+
 	return ntb->ops->peer_spad_read(ntb, idx);
 }
 
-- 
cgit v1.2.3


From 7c0f6ba682b9c7632072ffbedf8d328c8f3c42ba Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 24 Dec 2016 11:46:01 -0800
Subject: Replace <asm/uaccess.h> with <linux/uaccess.h> globally

This was entirely automated, using the script by Al:

  PATT='^[[:blank:]]*#[[:blank:]]*include[[:blank:]]*<asm/uaccess.h>'
  sed -i -e "s!$PATT!#include <linux/uaccess.h>!" \
        $(git grep -l "$PATT"|grep -v ^include/linux/uaccess.h)

to do the replacement at the end of the merge window.

Requested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/boot/misc.c                                | 2 +-
 arch/alpha/kernel/irq.c                               | 2 +-
 arch/alpha/kernel/osf_sys.c                           | 2 +-
 arch/alpha/kernel/process.c                           | 2 +-
 arch/alpha/kernel/ptrace.c                            | 2 +-
 arch/alpha/kernel/setup.c                             | 2 +-
 arch/alpha/kernel/signal.c                            | 2 +-
 arch/alpha/kernel/srm_env.c                           | 2 +-
 arch/alpha/kernel/srmcons.c                           | 2 +-
 arch/alpha/kernel/time.c                              | 2 +-
 arch/alpha/kernel/traps.c                             | 2 +-
 arch/alpha/lib/csum_partial_copy.c                    | 2 +-
 arch/alpha/math-emu/math.c                            | 2 +-
 arch/alpha/mm/init.c                                  | 2 +-
 arch/arm/common/bL_switcher_dummy_if.c                | 2 +-
 arch/arm/kernel/swp_emulate.c                         | 2 +-
 arch/arm/kvm/arm.c                                    | 2 +-
 arch/arm/kvm/guest.c                                  | 2 +-
 arch/arm/mach-iop13xx/irq.c                           | 2 +-
 arch/arm/mach-ixp4xx/common.c                         | 2 +-
 arch/arm/mach-rpc/dma.c                               | 2 +-
 arch/arm/plat-iop/time.c                              | 2 +-
 arch/arm64/include/asm/word-at-a-time.h               | 2 +-
 arch/arm64/kernel/armv8_deprecated.c                  | 2 +-
 arch/arm64/kernel/entry.S                             | 2 +-
 arch/arm64/kernel/probes/kprobes.c                    | 2 +-
 arch/arm64/kernel/signal32.c                          | 2 +-
 arch/arm64/kvm/guest.c                                | 2 +-
 arch/arm64/lib/clear_user.S                           | 2 +-
 arch/arm64/lib/copy_from_user.S                       | 2 +-
 arch/arm64/lib/copy_in_user.S                         | 2 +-
 arch/arm64/lib/copy_to_user.S                         | 2 +-
 arch/arm64/mm/cache.S                                 | 2 +-
 arch/arm64/xen/hypercall.S                            | 2 +-
 arch/avr32/kernel/avr32_ksyms.c                       | 2 +-
 arch/avr32/kernel/ptrace.c                            | 2 +-
 arch/avr32/kernel/signal.c                            | 2 +-
 arch/avr32/mm/cache.c                                 | 2 +-
 arch/blackfin/kernel/bfin_dma.c                       | 2 +-
 arch/blackfin/kernel/kgdb_test.c                      | 2 +-
 arch/blackfin/kernel/module.c                         | 2 +-
 arch/c6x/mm/init.c                                    | 2 +-
 arch/cris/arch-v10/drivers/eeprom.c                   | 2 +-
 arch/cris/arch-v10/drivers/sync_serial.c              | 2 +-
 arch/cris/arch-v10/kernel/ptrace.c                    | 2 +-
 arch/cris/arch-v10/kernel/signal.c                    | 2 +-
 arch/cris/arch-v10/kernel/traps.c                     | 2 +-
 arch/cris/arch-v10/lib/usercopy.c                     | 2 +-
 arch/cris/arch-v10/mm/fault.c                         | 2 +-
 arch/cris/arch-v32/drivers/cryptocop.c                | 2 +-
 arch/cris/arch-v32/kernel/ptrace.c                    | 2 +-
 arch/cris/arch-v32/kernel/signal.c                    | 2 +-
 arch/cris/arch-v32/kernel/traps.c                     | 2 +-
 arch/cris/arch-v32/lib/usercopy.c                     | 2 +-
 arch/cris/kernel/crisksyms.c                          | 2 +-
 arch/cris/kernel/process.c                            | 2 +-
 arch/cris/kernel/profile.c                            | 2 +-
 arch/cris/kernel/ptrace.c                             | 2 +-
 arch/cris/kernel/sys_cris.c                           | 2 +-
 arch/cris/kernel/traps.c                              | 2 +-
 arch/frv/include/asm/futex.h                          | 2 +-
 arch/frv/kernel/irq.c                                 | 2 +-
 arch/frv/kernel/pm-mb93093.c                          | 2 +-
 arch/frv/kernel/pm.c                                  | 2 +-
 arch/frv/kernel/process.c                             | 2 +-
 arch/frv/kernel/ptrace.c                              | 2 +-
 arch/frv/kernel/signal.c                              | 2 +-
 arch/frv/kernel/sys_frv.c                             | 2 +-
 arch/frv/kernel/sysctl.c                              | 2 +-
 arch/frv/kernel/traps.c                               | 2 +-
 arch/frv/kernel/uaccess.c                             | 2 +-
 arch/frv/mm/dma-alloc.c                               | 2 +-
 arch/frv/mm/extable.c                                 | 2 +-
 arch/h8300/boot/compressed/misc.c                     | 2 +-
 arch/h8300/kernel/process.c                           | 2 +-
 arch/h8300/kernel/signal.c                            | 2 +-
 arch/hexagon/kernel/hexagon_ksyms.c                   | 2 +-
 arch/hexagon/kernel/signal.c                          | 2 +-
 arch/hexagon/mm/uaccess.c                             | 2 +-
 arch/hexagon/mm/vm_fault.c                            | 2 +-
 arch/ia64/kernel/brl_emu.c                            | 2 +-
 arch/ia64/kernel/crash_dump.c                         | 2 +-
 arch/ia64/kernel/init_task.c                          | 2 +-
 arch/ia64/kernel/irq.c                                | 2 +-
 arch/ia64/kernel/kprobes.c                            | 2 +-
 arch/ia64/kernel/perfmon.c                            | 2 +-
 arch/ia64/kernel/process.c                            | 2 +-
 arch/ia64/kernel/ptrace.c                             | 2 +-
 arch/ia64/kernel/salinfo.c                            | 2 +-
 arch/ia64/kernel/signal.c                             | 2 +-
 arch/ia64/kernel/sys_ia64.c                           | 2 +-
 arch/ia64/kernel/traps.c                              | 2 +-
 arch/ia64/kernel/unaligned.c                          | 2 +-
 arch/ia64/kernel/unwind.c                             | 2 +-
 arch/ia64/lib/csum_partial_copy.c                     | 2 +-
 arch/ia64/mm/extable.c                                | 2 +-
 arch/ia64/mm/init.c                                   | 2 +-
 arch/ia64/sn/kernel/sn2/sn_hwperf.c                   | 2 +-
 arch/ia64/sn/kernel/sn2/sn_proc_fs.c                  | 2 +-
 arch/ia64/sn/kernel/tiocx.c                           | 2 +-
 arch/m32r/kernel/align.c                              | 2 +-
 arch/m32r/kernel/irq.c                                | 2 +-
 arch/m32r/kernel/m32r_ksyms.c                         | 2 +-
 arch/m32r/kernel/process.c                            | 2 +-
 arch/m32r/kernel/ptrace.c                             | 2 +-
 arch/m32r/kernel/signal.c                             | 2 +-
 arch/m32r/kernel/sys_m32r.c                           | 2 +-
 arch/m32r/kernel/traps.c                              | 2 +-
 arch/m32r/lib/csum_partial_copy.c                     | 2 +-
 arch/m32r/lib/usercopy.c                              | 2 +-
 arch/m32r/mm/extable.c                                | 2 +-
 arch/m32r/mm/fault-nommu.c                            | 2 +-
 arch/m68k/bvme6000/rtc.c                              | 2 +-
 arch/m68k/kernel/process.c                            | 2 +-
 arch/m68k/kernel/ptrace.c                             | 2 +-
 arch/m68k/kernel/signal.c                             | 2 +-
 arch/m68k/kernel/sys_m68k.c                           | 2 +-
 arch/m68k/kernel/traps.c                              | 2 +-
 arch/m68k/lib/uaccess.c                               | 2 +-
 arch/m68k/mac/misc.c                                  | 2 +-
 arch/m68k/mm/init.c                                   | 2 +-
 arch/m68k/mm/motorola.c                               | 2 +-
 arch/m68k/mm/sun3mmu.c                                | 2 +-
 arch/m68k/mvme16x/rtc.c                               | 2 +-
 arch/m68k/sun3/mmu_emu.c                              | 2 +-
 arch/metag/kernel/irq.c                               | 2 +-
 arch/mips/alchemy/common/power.c                      | 2 +-
 arch/mips/dec/kn01-berr.c                             | 2 +-
 arch/mips/include/asm/checksum.h                      | 2 +-
 arch/mips/include/asm/compat-signal.h                 | 2 +-
 arch/mips/include/asm/r4kcache.h                      | 2 +-
 arch/mips/include/asm/termios.h                       | 2 +-
 arch/mips/jazz/jazzdma.c                              | 2 +-
 arch/mips/kernel/branch.c                             | 2 +-
 arch/mips/kernel/cpu-probe.c                          | 2 +-
 arch/mips/kernel/crash_dump.c                         | 2 +-
 arch/mips/kernel/irq.c                                | 2 +-
 arch/mips/kernel/kgdb.c                               | 2 +-
 arch/mips/kernel/linux32.c                            | 2 +-
 arch/mips/kernel/mips-mt-fpaff.c                      | 2 +-
 arch/mips/kernel/mips-r2-to-r6-emul.c                 | 2 +-
 arch/mips/kernel/mips_ksyms.c                         | 2 +-
 arch/mips/kernel/process.c                            | 2 +-
 arch/mips/kernel/ptrace.c                             | 2 +-
 arch/mips/kernel/ptrace32.c                           | 2 +-
 arch/mips/kernel/signal32.c                           | 2 +-
 arch/mips/kernel/signal_n32.c                         | 2 +-
 arch/mips/kernel/syscall.c                            | 2 +-
 arch/mips/kernel/traps.c                              | 2 +-
 arch/mips/kernel/unaligned.c                          | 2 +-
 arch/mips/math-emu/cp1emu.c                           | 2 +-
 arch/mips/math-emu/dsemul.c                           | 2 +-
 arch/mips/mm/extable.c                                | 2 +-
 arch/mips/mm/sc-debugfs.c                             | 2 +-
 arch/mips/oprofile/op_model_loongson3.c               | 2 +-
 arch/mips/sgi-ip22/ip28-berr.c                        | 2 +-
 arch/mips/sgi-ip27/ip27-berr.c                        | 2 +-
 arch/mips/sgi-ip32/ip32-berr.c                        | 2 +-
 arch/mips/sibyte/common/sb_tbprof.c                   | 2 +-
 arch/mn10300/kernel/fpu.c                             | 2 +-
 arch/mn10300/kernel/mn10300_ksyms.c                   | 2 +-
 arch/mn10300/kernel/process.c                         | 2 +-
 arch/mn10300/kernel/ptrace.c                          | 2 +-
 arch/mn10300/kernel/setup.c                           | 2 +-
 arch/mn10300/kernel/signal.c                          | 2 +-
 arch/mn10300/kernel/sys_mn10300.c                     | 2 +-
 arch/mn10300/lib/checksum.c                           | 2 +-
 arch/mn10300/mm/cache-smp.c                           | 2 +-
 arch/mn10300/mm/cache.c                               | 2 +-
 arch/mn10300/mm/extable.c                             | 2 +-
 arch/mn10300/mm/init.c                                | 2 +-
 arch/mn10300/mm/misalignment.c                        | 2 +-
 arch/mn10300/proc-mn2ws0050/proc-init.c               | 2 +-
 arch/nios2/kernel/traps.c                             | 2 +-
 arch/openrisc/kernel/or32_ksyms.c                     | 2 +-
 arch/openrisc/kernel/process.c                        | 2 +-
 arch/openrisc/kernel/signal.c                         | 2 +-
 arch/openrisc/kernel/traps.c                          | 2 +-
 arch/openrisc/mm/fault.c                              | 2 +-
 arch/parisc/kernel/asm-offsets.c                      | 2 +-
 arch/parisc/kernel/parisc_ksyms.c                     | 2 +-
 arch/parisc/kernel/pci-dma.c                          | 2 +-
 arch/parisc/kernel/perf.c                             | 2 +-
 arch/parisc/kernel/ptrace.c                           | 2 +-
 arch/parisc/kernel/signal.c                           | 2 +-
 arch/parisc/kernel/signal32.c                         | 2 +-
 arch/parisc/kernel/sys_parisc.c                       | 2 +-
 arch/parisc/kernel/time.c                             | 2 +-
 arch/parisc/kernel/unaligned.c                        | 2 +-
 arch/parisc/kernel/unwind.c                           | 2 +-
 arch/parisc/lib/checksum.c                            | 2 +-
 arch/powerpc/include/asm/asm-prototypes.h             | 2 +-
 arch/powerpc/kernel/align.c                           | 2 +-
 arch/powerpc/kernel/crash_dump.c                      | 2 +-
 arch/powerpc/kernel/hw_breakpoint.c                   | 2 +-
 arch/powerpc/kernel/irq.c                             | 2 +-
 arch/powerpc/kernel/kprobes.c                         | 2 +-
 arch/powerpc/kernel/module.c                          | 2 +-
 arch/powerpc/kernel/nvram_64.c                        | 2 +-
 arch/powerpc/kernel/pci_32.c                          | 2 +-
 arch/powerpc/kernel/proc_powerpc.c                    | 2 +-
 arch/powerpc/kernel/ptrace.c                          | 2 +-
 arch/powerpc/kernel/ptrace32.c                        | 2 +-
 arch/powerpc/kernel/rtas-proc.c                       | 2 +-
 arch/powerpc/kernel/rtas.c                            | 2 +-
 arch/powerpc/kernel/rtas_flash.c                      | 2 +-
 arch/powerpc/kernel/rtasd.c                           | 2 +-
 arch/powerpc/kernel/setup_32.c                        | 2 +-
 arch/powerpc/kernel/signal.c                          | 2 +-
 arch/powerpc/kernel/signal_32.c                       | 2 +-
 arch/powerpc/kernel/signal_64.c                       | 2 +-
 arch/powerpc/kernel/sys_ppc32.c                       | 2 +-
 arch/powerpc/kernel/syscalls.c                        | 2 +-
 arch/powerpc/kernel/time.c                            | 2 +-
 arch/powerpc/kernel/traps.c                           | 2 +-
 arch/powerpc/kernel/vecemu.c                          | 2 +-
 arch/powerpc/kvm/book3s.c                             | 2 +-
 arch/powerpc/kvm/book3s_hv.c                          | 2 +-
 arch/powerpc/kvm/book3s_pr.c                          | 2 +-
 arch/powerpc/kvm/book3s_pr_papr.c                     | 2 +-
 arch/powerpc/kvm/book3s_rtas.c                        | 2 +-
 arch/powerpc/kvm/book3s_xics.c                        | 2 +-
 arch/powerpc/kvm/booke.c                              | 2 +-
 arch/powerpc/kvm/mpic.c                               | 2 +-
 arch/powerpc/kvm/powerpc.c                            | 2 +-
 arch/powerpc/lib/checksum_wrappers.c                  | 2 +-
 arch/powerpc/lib/code-patching.c                      | 2 +-
 arch/powerpc/lib/sstep.c                              | 2 +-
 arch/powerpc/lib/usercopy_64.c                        | 2 +-
 arch/powerpc/math-emu/fabs.c                          | 2 +-
 arch/powerpc/math-emu/fadd.c                          | 2 +-
 arch/powerpc/math-emu/fadds.c                         | 2 +-
 arch/powerpc/math-emu/fcmpo.c                         | 2 +-
 arch/powerpc/math-emu/fcmpu.c                         | 2 +-
 arch/powerpc/math-emu/fctiw.c                         | 2 +-
 arch/powerpc/math-emu/fctiwz.c                        | 2 +-
 arch/powerpc/math-emu/fdiv.c                          | 2 +-
 arch/powerpc/math-emu/fdivs.c                         | 2 +-
 arch/powerpc/math-emu/fmadd.c                         | 2 +-
 arch/powerpc/math-emu/fmadds.c                        | 2 +-
 arch/powerpc/math-emu/fmr.c                           | 2 +-
 arch/powerpc/math-emu/fmsub.c                         | 2 +-
 arch/powerpc/math-emu/fmsubs.c                        | 2 +-
 arch/powerpc/math-emu/fmul.c                          | 2 +-
 arch/powerpc/math-emu/fmuls.c                         | 2 +-
 arch/powerpc/math-emu/fnabs.c                         | 2 +-
 arch/powerpc/math-emu/fneg.c                          | 2 +-
 arch/powerpc/math-emu/fnmadd.c                        | 2 +-
 arch/powerpc/math-emu/fnmadds.c                       | 2 +-
 arch/powerpc/math-emu/fnmsub.c                        | 2 +-
 arch/powerpc/math-emu/fnmsubs.c                       | 2 +-
 arch/powerpc/math-emu/fre.c                           | 2 +-
 arch/powerpc/math-emu/fres.c                          | 2 +-
 arch/powerpc/math-emu/frsp.c                          | 2 +-
 arch/powerpc/math-emu/frsqrte.c                       | 2 +-
 arch/powerpc/math-emu/frsqrtes.c                      | 2 +-
 arch/powerpc/math-emu/fsel.c                          | 2 +-
 arch/powerpc/math-emu/fsqrt.c                         | 2 +-
 arch/powerpc/math-emu/fsqrts.c                        | 2 +-
 arch/powerpc/math-emu/fsub.c                          | 2 +-
 arch/powerpc/math-emu/fsubs.c                         | 2 +-
 arch/powerpc/math-emu/lfd.c                           | 2 +-
 arch/powerpc/math-emu/lfs.c                           | 2 +-
 arch/powerpc/math-emu/math.c                          | 2 +-
 arch/powerpc/math-emu/math_efp.c                      | 2 +-
 arch/powerpc/math-emu/mcrfs.c                         | 2 +-
 arch/powerpc/math-emu/mffs.c                          | 2 +-
 arch/powerpc/math-emu/mtfsb0.c                        | 2 +-
 arch/powerpc/math-emu/mtfsb1.c                        | 2 +-
 arch/powerpc/math-emu/mtfsf.c                         | 2 +-
 arch/powerpc/math-emu/mtfsfi.c                        | 2 +-
 arch/powerpc/math-emu/stfd.c                          | 2 +-
 arch/powerpc/math-emu/stfiwx.c                        | 2 +-
 arch/powerpc/math-emu/stfs.c                          | 2 +-
 arch/powerpc/mm/40x_mmu.c                             | 2 +-
 arch/powerpc/mm/fsl_booke_mmu.c                       | 2 +-
 arch/powerpc/mm/hash_utils_64.c                       | 2 +-
 arch/powerpc/mm/init_64.c                             | 2 +-
 arch/powerpc/mm/subpage-prot.c                        | 2 +-
 arch/powerpc/platforms/cell/spufs/coredump.c          | 2 +-
 arch/powerpc/platforms/cell/spufs/file.c              | 2 +-
 arch/powerpc/platforms/cell/spufs/inode.c             | 2 +-
 arch/powerpc/platforms/cell/spufs/syscalls.c          | 2 +-
 arch/powerpc/platforms/chrp/nvram.c                   | 2 +-
 arch/powerpc/platforms/powernv/opal-elog.c            | 2 +-
 arch/powerpc/platforms/powernv/opal-lpc.c             | 2 +-
 arch/powerpc/platforms/powernv/opal-prd.c             | 2 +-
 arch/powerpc/platforms/pseries/cmm.c                  | 2 +-
 arch/powerpc/platforms/pseries/dlpar.c                | 2 +-
 arch/powerpc/platforms/pseries/dtl.c                  | 2 +-
 arch/powerpc/platforms/pseries/lparcfg.c              | 2 +-
 arch/powerpc/platforms/pseries/nvram.c                | 2 +-
 arch/powerpc/platforms/pseries/reconfig.c             | 2 +-
 arch/powerpc/platforms/pseries/scanlog.c              | 2 +-
 arch/powerpc/sysdev/scom.c                            | 2 +-
 arch/powerpc/sysdev/tsi108_pci.c                      | 2 +-
 arch/s390/appldata/appldata_base.c                    | 2 +-
 arch/s390/boot/compressed/misc.c                      | 2 +-
 arch/s390/crypto/prng.c                               | 2 +-
 arch/s390/include/asm/checksum.h                      | 2 +-
 arch/s390/include/asm/idals.h                         | 2 +-
 arch/s390/include/asm/mmu_context.h                   | 2 +-
 arch/s390/kernel/compat_linux.c                       | 2 +-
 arch/s390/kernel/compat_signal.c                      | 2 +-
 arch/s390/kernel/debug.c                              | 2 +-
 arch/s390/kernel/dis.c                                | 2 +-
 arch/s390/kernel/kprobes.c                            | 2 +-
 arch/s390/kernel/ptrace.c                             | 2 +-
 arch/s390/kernel/signal.c                             | 2 +-
 arch/s390/kernel/sys_s390.c                           | 2 +-
 arch/s390/kernel/time.c                               | 2 +-
 arch/s390/kernel/traps.c                              | 2 +-
 arch/s390/kvm/interrupt.c                             | 2 +-
 arch/s390/mm/init.c                                   | 2 +-
 arch/score/include/asm/checksum.h                     | 2 +-
 arch/score/kernel/ptrace.c                            | 2 +-
 arch/score/kernel/traps.c                             | 2 +-
 arch/score/lib/checksum_copy.c                        | 2 +-
 arch/sh/boards/mach-landisk/gio.c                     | 2 +-
 arch/sh/boot/compressed/misc.c                        | 2 +-
 arch/sh/include/asm/mmu_context.h                     | 2 +-
 arch/sh/kernel/cpu/init.c                             | 2 +-
 arch/sh/kernel/cpu/shmobile/cpuidle.c                 | 2 +-
 arch/sh/kernel/cpu/shmobile/pm.c                      | 2 +-
 arch/sh/kernel/crash_dump.c                           | 2 +-
 arch/sh/kernel/io_trapped.c                           | 2 +-
 arch/sh/kernel/irq.c                                  | 2 +-
 arch/sh/kernel/kprobes.c                              | 2 +-
 arch/sh/kernel/process_32.c                           | 2 +-
 arch/sh/kernel/process_64.c                           | 2 +-
 arch/sh/kernel/ptrace_32.c                            | 2 +-
 arch/sh/kernel/ptrace_64.c                            | 2 +-
 arch/sh/kernel/setup.c                                | 2 +-
 arch/sh/kernel/sh_ksyms_64.c                          | 2 +-
 arch/sh/kernel/signal_32.c                            | 2 +-
 arch/sh/kernel/signal_64.c                            | 2 +-
 arch/sh/kernel/sys_sh.c                               | 2 +-
 arch/sh/kernel/sys_sh32.c                             | 2 +-
 arch/sh/kernel/traps_64.c                             | 2 +-
 arch/sh/math-emu/math.c                               | 2 +-
 arch/sh/mm/cache-debugfs.c                            | 2 +-
 arch/sh/mm/cache-sh3.c                                | 2 +-
 arch/sh/mm/cache-sh5.c                                | 2 +-
 arch/sh/mm/cache-sh7705.c                             | 2 +-
 arch/sh/mm/extable_32.c                               | 2 +-
 arch/sh/mm/extable_64.c                               | 2 +-
 arch/sh/mm/nommu.c                                    | 2 +-
 arch/sh/mm/pmb.c                                      | 2 +-
 arch/sh/mm/tlb-sh3.c                                  | 2 +-
 arch/sh/mm/tlbex_64.c                                 | 2 +-
 arch/sh/mm/tlbflush_64.c                              | 2 +-
 arch/sh/oprofile/backtrace.c                          | 2 +-
 arch/sparc/include/asm/checksum_32.h                  | 2 +-
 arch/sparc/include/asm/checksum_64.h                  | 2 +-
 arch/sparc/kernel/apc.c                               | 2 +-
 arch/sparc/kernel/irq_64.c                            | 2 +-
 arch/sparc/kernel/kprobes.c                           | 2 +-
 arch/sparc/kernel/mdesc.c                             | 2 +-
 arch/sparc/kernel/pci.c                               | 2 +-
 arch/sparc/kernel/pcic.c                              | 2 +-
 arch/sparc/kernel/pmc.c                               | 2 +-
 arch/sparc/kernel/process_32.c                        | 2 +-
 arch/sparc/kernel/process_64.c                        | 2 +-
 arch/sparc/kernel/ptrace_32.c                         | 2 +-
 arch/sparc/kernel/ptrace_64.c                         | 2 +-
 arch/sparc/kernel/signal32.c                          | 2 +-
 arch/sparc/kernel/signal_32.c                         | 2 +-
 arch/sparc/kernel/signal_64.c                         | 2 +-
 arch/sparc/kernel/smp_64.c                            | 2 +-
 arch/sparc/kernel/sys_sparc32.c                       | 2 +-
 arch/sparc/kernel/sys_sparc_32.c                      | 2 +-
 arch/sparc/kernel/sys_sparc_64.c                      | 2 +-
 arch/sparc/kernel/time_64.c                           | 2 +-
 arch/sparc/kernel/traps_64.c                          | 2 +-
 arch/sparc/kernel/unaligned_32.c                      | 2 +-
 arch/sparc/kernel/unaligned_64.c                      | 2 +-
 arch/sparc/kernel/uprobes.c                           | 2 +-
 arch/sparc/kernel/visemul.c                           | 2 +-
 arch/sparc/kernel/windows.c                           | 2 +-
 arch/sparc/math-emu/math_32.c                         | 2 +-
 arch/sparc/math-emu/math_64.c                         | 2 +-
 arch/sparc/mm/extable.c                               | 2 +-
 arch/sparc/mm/init_64.c                               | 2 +-
 arch/tile/kernel/process.c                            | 2 +-
 arch/tile/kernel/single_step.c                        | 2 +-
 arch/tile/kernel/unaligned.c                          | 2 +-
 arch/um/drivers/harddog_kern.c                        | 2 +-
 arch/um/drivers/hostaudio_kern.c                      | 2 +-
 arch/um/drivers/mconsole_kern.c                       | 2 +-
 arch/um/drivers/mmapper_kern.c                        | 2 +-
 arch/um/drivers/random.c                              | 2 +-
 arch/um/kernel/exec.c                                 | 2 +-
 arch/um/kernel/exitcode.c                             | 2 +-
 arch/um/kernel/process.c                              | 2 +-
 arch/um/kernel/ptrace.c                               | 2 +-
 arch/um/kernel/syscall.c                              | 2 +-
 arch/x86/entry/common.c                               | 2 +-
 arch/x86/ia32/ia32_aout.c                             | 2 +-
 arch/x86/ia32/ia32_signal.c                           | 2 +-
 arch/x86/ia32/sys_ia32.c                              | 2 +-
 arch/x86/include/asm/asm-prototypes.h                 | 2 +-
 arch/x86/include/asm/checksum_64.h                    | 2 +-
 arch/x86/include/asm/xen/page.h                       | 2 +-
 arch/x86/kernel/apm_32.c                              | 2 +-
 arch/x86/kernel/cpu/mcheck/mce-severity.c             | 2 +-
 arch/x86/kernel/crash_dump_32.c                       | 2 +-
 arch/x86/kernel/doublefault.c                         | 2 +-
 arch/x86/kernel/kprobes/core.c                        | 2 +-
 arch/x86/kernel/kprobes/opt.c                         | 2 +-
 arch/x86/kernel/process.c                             | 2 +-
 arch/x86/kernel/ptrace.c                              | 2 +-
 arch/x86/kernel/test_nx.c                             | 2 +-
 arch/x86/kernel/tls.c                                 | 2 +-
 arch/x86/kernel/vm86_32.c                             | 2 +-
 arch/x86/lib/usercopy_32.c                            | 2 +-
 arch/x86/math-emu/errors.c                            | 2 +-
 arch/x86/math-emu/fpu_entry.c                         | 2 +-
 arch/x86/math-emu/get_address.c                       | 2 +-
 arch/x86/math-emu/load_store.c                        | 2 +-
 arch/x86/math-emu/reg_ld_str.c                        | 2 +-
 arch/x86/mm/extable.c                                 | 2 +-
 arch/x86/mm/init_32.c                                 | 2 +-
 arch/x86/mm/init_64.c                                 | 2 +-
 arch/x86/mm/pageattr.c                                | 2 +-
 arch/x86/um/ptrace_32.c                               | 2 +-
 arch/x86/um/ptrace_64.c                               | 2 +-
 arch/x86/um/signal.c                                  | 2 +-
 arch/x86/um/tls_32.c                                  | 2 +-
 arch/x86/xen/p2m.c                                    | 2 +-
 arch/xtensa/include/asm/checksum.h                    | 2 +-
 arch/xtensa/include/asm/segment.h                     | 2 +-
 arch/xtensa/kernel/asm-offsets.c                      | 2 +-
 arch/xtensa/kernel/irq.c                              | 2 +-
 arch/xtensa/kernel/process.c                          | 2 +-
 arch/xtensa/kernel/ptrace.c                           | 2 +-
 arch/xtensa/kernel/signal.c                           | 2 +-
 arch/xtensa/kernel/stacktrace.c                       | 2 +-
 arch/xtensa/kernel/syscall.c                          | 2 +-
 arch/xtensa/kernel/traps.c                            | 2 +-
 arch/xtensa/kernel/xtensa_ksyms.c                     | 2 +-
 arch/xtensa/platforms/iss/console.c                   | 2 +-
 arch/xtensa/platforms/iss/simdisk.c                   | 2 +-
 block/ioctl.c                                         | 2 +-
 block/partitions/ibm.c                                | 2 +-
 block/scsi_ioctl.c                                    | 2 +-
 drivers/acpi/acpi_video.c                             | 2 +-
 drivers/acpi/battery.c                                | 2 +-
 drivers/acpi/osl.c                                    | 2 +-
 drivers/acpi/proc.c                                   | 2 +-
 drivers/acpi/processor_thermal.c                      | 2 +-
 drivers/acpi/processor_throttling.c                   | 2 +-
 drivers/acpi/thermal.c                                | 2 +-
 drivers/atm/adummy.c                                  | 2 +-
 drivers/atm/atmtcp.c                                  | 2 +-
 drivers/atm/eni.c                                     | 2 +-
 drivers/atm/firestream.c                              | 2 +-
 drivers/atm/fore200e.c                                | 2 +-
 drivers/atm/he.c                                      | 2 +-
 drivers/atm/horizon.c                                 | 2 +-
 drivers/atm/idt77105.c                                | 2 +-
 drivers/atm/idt77252.c                                | 2 +-
 drivers/atm/iphase.c                                  | 2 +-
 drivers/atm/nicstar.c                                 | 2 +-
 drivers/atm/suni.c                                    | 2 +-
 drivers/atm/uPD98402.c                                | 2 +-
 drivers/atm/zatm.c                                    | 2 +-
 drivers/base/memory.c                                 | 2 +-
 drivers/block/DAC960.c                                | 2 +-
 drivers/block/amiflop.c                               | 2 +-
 drivers/block/brd.c                                   | 2 +-
 drivers/block/cciss.c                                 | 2 +-
 drivers/block/cryptoloop.c                            | 2 +-
 drivers/block/hd.c                                    | 2 +-
 drivers/block/loop.c                                  | 2 +-
 drivers/block/nbd.c                                   | 2 +-
 drivers/block/paride/pcd.c                            | 2 +-
 drivers/block/paride/pd.c                             | 2 +-
 drivers/block/paride/pf.c                             | 2 +-
 drivers/block/paride/pg.c                             | 2 +-
 drivers/block/paride/pt.c                             | 2 +-
 drivers/block/pktcdvd.c                               | 2 +-
 drivers/block/swim3.c                                 | 2 +-
 drivers/block/sx8.c                                   | 2 +-
 drivers/block/umem.c                                  | 2 +-
 drivers/cdrom/cdrom.c                                 | 2 +-
 drivers/char/agp/compat_ioctl.c                       | 2 +-
 drivers/char/agp/frontend.c                           | 2 +-
 drivers/char/applicom.c                               | 2 +-
 drivers/char/bfin-otp.c                               | 2 +-
 drivers/char/ds1620.c                                 | 2 +-
 drivers/char/dtlk.c                                   | 2 +-
 drivers/char/generic_nvram.c                          | 2 +-
 drivers/char/hangcheck-timer.c                        | 2 +-
 drivers/char/hw_random/core.c                         | 2 +-
 drivers/char/ipmi/ipmi_watchdog.c                     | 2 +-
 drivers/char/lp.c                                     | 2 +-
 drivers/char/mbcs.c                                   | 2 +-
 drivers/char/mmtimer.c                                | 2 +-
 drivers/char/mwave/3780i.c                            | 2 +-
 drivers/char/mwave/mwavedd.h                          | 2 +-
 drivers/char/nsc_gpio.c                               | 2 +-
 drivers/char/nwbutton.c                               | 2 +-
 drivers/char/nwflash.c                                | 2 +-
 drivers/char/pc8736x_gpio.c                           | 2 +-
 drivers/char/pcmcia/cm4040_cs.c                       | 2 +-
 drivers/char/pcmcia/synclink_cs.c                     | 2 +-
 drivers/char/random.c                                 | 2 +-
 drivers/char/raw.c                                    | 2 +-
 drivers/char/scx200_gpio.c                            | 2 +-
 drivers/char/sonypi.c                                 | 2 +-
 drivers/char/tlclk.c                                  | 2 +-
 drivers/char/toshiba.c                                | 2 +-
 drivers/char/xilinx_hwicap/xilinx_hwicap.c            | 2 +-
 drivers/cpufreq/ia64-acpi-cpufreq.c                   | 2 +-
 drivers/cpuidle/governors/ladder.c                    | 2 +-
 drivers/dio/dio.c                                     | 2 +-
 drivers/edac/edac_device.c                            | 2 +-
 drivers/edac/edac_mc.c                                | 2 +-
 drivers/edac/edac_pci.c                               | 2 +-
 drivers/i2c/i2c-core.c                                | 2 +-
 drivers/ide/hpt366.c                                  | 2 +-
 drivers/ide/ide-disk.c                                | 2 +-
 drivers/ide/ide-io.c                                  | 2 +-
 drivers/ide/ide-iops.c                                | 2 +-
 drivers/ide/ide-probe.c                               | 2 +-
 drivers/ide/ide-proc.c                                | 2 +-
 drivers/infiniband/core/ucm.c                         | 2 +-
 drivers/infiniband/core/user_mad.c                    | 2 +-
 drivers/infiniband/core/uverbs_cmd.c                  | 2 +-
 drivers/infiniband/core/uverbs_main.c                 | 2 +-
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c             | 2 +-
 drivers/infiniband/ulp/iser/iscsi_iser.c              | 2 +-
 drivers/input/input-compat.c                          | 2 +-
 drivers/input/misc/atlas_btns.c                       | 2 +-
 drivers/input/mouse/amimouse.c                        | 2 +-
 drivers/input/mouse/atarimouse.c                      | 2 +-
 drivers/input/mouse/trackpoint.c                      | 2 +-
 drivers/input/serio/hp_sdc.c                          | 2 +-
 drivers/input/serio/q40kbd.c                          | 2 +-
 drivers/input/serio/serport.c                         | 2 +-
 drivers/input/tablet/aiptek.c                         | 2 +-
 drivers/input/tablet/gtco.c                           | 2 +-
 drivers/isdn/capi/kcapi.c                             | 2 +-
 drivers/isdn/hardware/avm/b1.c                        | 2 +-
 drivers/isdn/hardware/avm/b1dma.c                     | 2 +-
 drivers/isdn/hardware/avm/c4.c                        | 2 +-
 drivers/isdn/hardware/eicon/capimain.c                | 2 +-
 drivers/isdn/hardware/eicon/divamnt.c                 | 2 +-
 drivers/isdn/hardware/eicon/divasi.c                  | 2 +-
 drivers/isdn/hardware/eicon/divasmain.c               | 2 +-
 drivers/isdn/hardware/eicon/divasproc.c               | 2 +-
 drivers/isdn/hysdn/hysdn_boot.c                       | 2 +-
 drivers/lguest/core.c                                 | 2 +-
 drivers/lguest/page_tables.c                          | 2 +-
 drivers/lguest/x86/core.c                             | 2 +-
 drivers/macintosh/ans-lcd.c                           | 2 +-
 drivers/macintosh/smu.c                               | 2 +-
 drivers/macintosh/via-pmu.c                           | 2 +-
 drivers/macintosh/via-pmu68k.c                        | 2 +-
 drivers/md/dm-ioctl.c                                 | 2 +-
 drivers/media/dvb-core/dmxdev.c                       | 2 +-
 drivers/media/dvb-core/dvb_demux.c                    | 2 +-
 drivers/media/dvb-core/dvb_net.c                      | 2 +-
 drivers/media/dvb-core/dvb_ringbuffer.c               | 2 +-
 drivers/media/i2c/adv7170.c                           | 2 +-
 drivers/media/i2c/adv7175.c                           | 2 +-
 drivers/media/i2c/bt856.c                             | 2 +-
 drivers/media/i2c/bt866.c                             | 2 +-
 drivers/media/i2c/cs53l32a.c                          | 2 +-
 drivers/media/i2c/m52790.c                            | 2 +-
 drivers/media/i2c/saa6588.c                           | 2 +-
 drivers/media/i2c/saa7110.c                           | 2 +-
 drivers/media/i2c/saa7185.c                           | 2 +-
 drivers/media/i2c/tlv320aic23b.c                      | 2 +-
 drivers/media/i2c/vp27smpx.c                          | 2 +-
 drivers/media/i2c/vpx3220.c                           | 2 +-
 drivers/media/i2c/wm8739.c                            | 2 +-
 drivers/media/i2c/wm8775.c                            | 2 +-
 drivers/media/pci/ivtv/ivtv-driver.h                  | 2 +-
 drivers/media/pci/meye/meye.c                         | 2 +-
 drivers/media/pci/zoran/videocodec.c                  | 2 +-
 drivers/media/pci/zoran/zoran_driver.c                | 2 +-
 drivers/media/platform/arv.c                          | 2 +-
 drivers/media/usb/pvrusb2/pvrusb2-ioread.c            | 2 +-
 drivers/media/usb/pwc/pwc-ctrl.c                      | 2 +-
 drivers/media/v4l2-core/v4l2-common.c                 | 2 +-
 drivers/media/v4l2-core/v4l2-dev.c                    | 2 +-
 drivers/message/fusion/mptctl.c                       | 2 +-
 drivers/message/fusion/mptlan.h                       | 2 +-
 drivers/misc/ibmasm/ibmasmfs.c                        | 2 +-
 drivers/mmc/core/block.c                              | 2 +-
 drivers/mmc/host/android-goldfish.c                   | 2 +-
 drivers/mtd/devices/pmc551.c                          | 2 +-
 drivers/mtd/devices/slram.c                           | 2 +-
 drivers/mtd/ftl.c                                     | 2 +-
 drivers/mtd/inftlcore.c                               | 2 +-
 drivers/mtd/inftlmount.c                              | 2 +-
 drivers/mtd/maps/sun_uflash.c                         | 2 +-
 drivers/mtd/mtd_blkdevs.c                             | 2 +-
 drivers/mtd/mtdchar.c                                 | 2 +-
 drivers/mtd/nftlcore.c                                | 2 +-
 drivers/net/appletalk/ipddp.c                         | 2 +-
 drivers/net/eql.c                                     | 2 +-
 drivers/net/ethernet/3com/3c509.c                     | 2 +-
 drivers/net/ethernet/3com/3c515.c                     | 2 +-
 drivers/net/ethernet/3com/3c574_cs.c                  | 2 +-
 drivers/net/ethernet/3com/3c59x.c                     | 2 +-
 drivers/net/ethernet/3com/typhoon.c                   | 2 +-
 drivers/net/ethernet/8390/axnet_cs.c                  | 2 +-
 drivers/net/ethernet/8390/ne2k-pci.c                  | 2 +-
 drivers/net/ethernet/8390/pcnet_cs.c                  | 2 +-
 drivers/net/ethernet/adaptec/starfire.c               | 2 +-
 drivers/net/ethernet/alteon/acenic.c                  | 2 +-
 drivers/net/ethernet/amd/amd8111e.c                   | 2 +-
 drivers/net/ethernet/amd/nmclan_cs.c                  | 2 +-
 drivers/net/ethernet/broadcom/b44.c                   | 2 +-
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c             | 2 +-
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c       | 2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c       | 2 +-
 drivers/net/ethernet/dec/tulip/de2104x.c              | 2 +-
 drivers/net/ethernet/dec/tulip/de4x5.c                | 2 +-
 drivers/net/ethernet/dec/tulip/dmfe.c                 | 2 +-
 drivers/net/ethernet/dec/tulip/tulip_core.c           | 2 +-
 drivers/net/ethernet/dec/tulip/uli526x.c              | 2 +-
 drivers/net/ethernet/dec/tulip/winbond-840.c          | 2 +-
 drivers/net/ethernet/dec/tulip/xircom_cb.c            | 2 +-
 drivers/net/ethernet/dlink/dl2k.h                     | 2 +-
 drivers/net/ethernet/dlink/sundance.c                 | 2 +-
 drivers/net/ethernet/fealnx.c                         | 2 +-
 drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c | 2 +-
 drivers/net/ethernet/freescale/fs_enet/mac-fcc.c      | 2 +-
 drivers/net/ethernet/freescale/fs_enet/mac-fec.c      | 2 +-
 drivers/net/ethernet/freescale/fs_enet/mac-scc.c      | 2 +-
 drivers/net/ethernet/freescale/fs_enet/mii-fec.c      | 2 +-
 drivers/net/ethernet/freescale/gianfar.c              | 2 +-
 drivers/net/ethernet/freescale/gianfar.h              | 2 +-
 drivers/net/ethernet/freescale/gianfar_ethtool.c      | 2 +-
 drivers/net/ethernet/freescale/ucc_geth.c             | 2 +-
 drivers/net/ethernet/freescale/ucc_geth_ethtool.c     | 2 +-
 drivers/net/ethernet/fujitsu/fmvj18x_cs.c             | 2 +-
 drivers/net/ethernet/ibm/emac/core.c                  | 2 +-
 drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c        | 2 +-
 drivers/net/ethernet/natsemi/natsemi.c                | 2 +-
 drivers/net/ethernet/natsemi/ns83820.c                | 2 +-
 drivers/net/ethernet/packetengines/hamachi.c          | 2 +-
 drivers/net/ethernet/packetengines/yellowfin.c        | 2 +-
 drivers/net/ethernet/realtek/8139cp.c                 | 2 +-
 drivers/net/ethernet/sgi/ioc3-eth.c                   | 2 +-
 drivers/net/ethernet/sis/sis900.c                     | 2 +-
 drivers/net/ethernet/smsc/epic100.c                   | 2 +-
 drivers/net/ethernet/smsc/smc91c92_cs.c               | 2 +-
 drivers/net/ethernet/sun/cassini.c                    | 2 +-
 drivers/net/ethernet/sun/sungem.c                     | 2 +-
 drivers/net/ethernet/sun/sunhme.c                     | 2 +-
 drivers/net/ethernet/via/via-rhine.c                  | 2 +-
 drivers/net/ethernet/xircom/xirc2ps_cs.c              | 2 +-
 drivers/net/fddi/skfp/skfddi.c                        | 2 +-
 drivers/net/hamradio/6pack.c                          | 2 +-
 drivers/net/hamradio/baycom_epp.c                     | 2 +-
 drivers/net/hamradio/baycom_par.c                     | 2 +-
 drivers/net/hamradio/baycom_ser_fdx.c                 | 2 +-
 drivers/net/hamradio/baycom_ser_hdx.c                 | 2 +-
 drivers/net/hamradio/bpqether.c                       | 2 +-
 drivers/net/hamradio/dmascc.c                         | 2 +-
 drivers/net/hamradio/hdlcdrv.c                        | 2 +-
 drivers/net/hamradio/mkiss.c                          | 2 +-
 drivers/net/hamradio/scc.c                            | 2 +-
 drivers/net/hamradio/yam.c                            | 2 +-
 drivers/net/hippi/rrunner.c                           | 2 +-
 drivers/net/irda/irtty-sir.c                          | 2 +-
 drivers/net/irda/kingsun-sir.c                        | 2 +-
 drivers/net/irda/ks959-sir.c                          | 2 +-
 drivers/net/irda/ksdazzle-sir.c                       | 2 +-
 drivers/net/irda/mcs7780.c                            | 2 +-
 drivers/net/irda/vlsi_ir.c                            | 2 +-
 drivers/net/loopback.c                                | 2 +-
 drivers/net/phy/davicom.c                             | 2 +-
 drivers/net/phy/icplus.c                              | 2 +-
 drivers/net/phy/lxt.c                                 | 2 +-
 drivers/net/phy/qsemi.c                               | 2 +-
 drivers/net/ppp/ppp_async.c                           | 2 +-
 drivers/net/ppp/ppp_synctty.c                         | 2 +-
 drivers/net/ppp/pppoe.c                               | 2 +-
 drivers/net/ppp/pppox.c                               | 2 +-
 drivers/net/sb1000.c                                  | 2 +-
 drivers/net/slip/slhc.c                               | 2 +-
 drivers/net/slip/slip.c                               | 2 +-
 drivers/net/tun.c                                     | 2 +-
 drivers/net/usb/catc.c                                | 2 +-
 drivers/net/usb/kaweth.c                              | 2 +-
 drivers/net/usb/pegasus.c                             | 2 +-
 drivers/net/usb/rtl8150.c                             | 2 +-
 drivers/net/wan/dlci.c                                | 2 +-
 drivers/net/wan/dscc4.c                               | 2 +-
 drivers/net/wan/farsync.c                             | 2 +-
 drivers/net/wan/hd64570.c                             | 2 +-
 drivers/net/wan/hd64572.c                             | 2 +-
 drivers/net/wan/lapbether.c                           | 2 +-
 drivers/net/wan/lmc/lmc_main.c                        | 2 +-
 drivers/net/wan/lmc/lmc_media.c                       | 2 +-
 drivers/net/wan/sbni.c                                | 2 +-
 drivers/net/wan/sdla.c                                | 2 +-
 drivers/net/wireless/atmel/atmel.c                    | 2 +-
 drivers/net/wireless/intel/ipw2x00/ipw2100.c          | 2 +-
 drivers/net/wireless/intel/ipw2x00/libipw_geo.c       | 2 +-
 drivers/net/wireless/intel/ipw2x00/libipw_module.c    | 2 +-
 drivers/net/wireless/intel/ipw2x00/libipw_rx.c        | 2 +-
 drivers/net/wireless/intel/ipw2x00/libipw_tx.c        | 2 +-
 drivers/net/wireless/intersil/hostap/hostap_hw.c      | 2 +-
 drivers/net/wireless/intersil/hostap/hostap_main.c    | 2 +-
 drivers/net/wireless/intersil/prism54/isl_38xx.c      | 2 +-
 drivers/net/wireless/intersil/prism54/isl_ioctl.c     | 2 +-
 drivers/net/wireless/ray_cs.c                         | 2 +-
 drivers/net/wireless/wl3501_cs.c                      | 2 +-
 drivers/nubus/proc.c                                  | 2 +-
 drivers/oprofile/event_buffer.c                       | 2 +-
 drivers/oprofile/oprofilefs.c                         | 2 +-
 drivers/parisc/ccio-dma.c                             | 2 +-
 drivers/parisc/ccio-rm-dma.c                          | 2 +-
 drivers/parisc/eisa_eeprom.c                          | 2 +-
 drivers/parisc/eisa_enumerator.c                      | 2 +-
 drivers/parisc/led.c                                  | 2 +-
 drivers/parisc/pdc_stable.c                           | 2 +-
 drivers/parport/daisy.c                               | 2 +-
 drivers/parport/ieee1284_ops.c                        | 2 +-
 drivers/parport/parport_gsc.c                         | 2 +-
 drivers/parport/probe.c                               | 2 +-
 drivers/parport/procfs.c                              | 2 +-
 drivers/pci/hotplug/acpiphp_ibm.c                     | 2 +-
 drivers/pci/hotplug/cpqphp_core.c                     | 2 +-
 drivers/pci/hotplug/cpqphp_nvram.c                    | 2 +-
 drivers/pci/hotplug/pci_hotplug_core.c                | 2 +-
 drivers/pci/proc.c                                    | 2 +-
 drivers/pci/syscall.c                                 | 2 +-
 drivers/platform/x86/sony-laptop.c                    | 2 +-
 drivers/platform/x86/thinkpad_acpi.c                  | 2 +-
 drivers/pnp/interface.c                               | 2 +-
 drivers/pnp/pnpbios/proc.c                            | 2 +-
 drivers/s390/block/dasd_devmap.c                      | 2 +-
 drivers/s390/block/dasd_eckd.c                        | 2 +-
 drivers/s390/block/dasd_eer.c                         | 2 +-
 drivers/s390/block/dasd_erp.c                         | 2 +-
 drivers/s390/block/dasd_genhd.c                       | 2 +-
 drivers/s390/block/dasd_ioctl.c                       | 2 +-
 drivers/s390/block/dasd_proc.c                        | 2 +-
 drivers/s390/block/xpram.c                            | 2 +-
 drivers/s390/char/con3215.c                           | 2 +-
 drivers/s390/char/keyboard.c                          | 2 +-
 drivers/s390/char/monreader.c                         | 2 +-
 drivers/s390/char/monwriter.c                         | 2 +-
 drivers/s390/char/sclp_rw.c                           | 2 +-
 drivers/s390/char/sclp_tty.c                          | 2 +-
 drivers/s390/char/sclp_vt220.c                        | 2 +-
 drivers/s390/char/tape_char.c                         | 2 +-
 drivers/s390/char/tty3270.c                           | 2 +-
 drivers/s390/char/vmcp.c                              | 2 +-
 drivers/s390/char/vmlogrdr.c                          | 2 +-
 drivers/s390/char/vmur.c                              | 2 +-
 drivers/s390/char/zcore.c                             | 2 +-
 drivers/s390/cio/blacklist.c                          | 2 +-
 drivers/s390/crypto/zcrypt_api.c                      | 2 +-
 drivers/s390/crypto/zcrypt_cex2a.c                    | 2 +-
 drivers/s390/crypto/zcrypt_pcixcc.c                   | 2 +-
 drivers/s390/net/netiucv.c                            | 2 +-
 drivers/sbus/char/display7seg.c                       | 2 +-
 drivers/sbus/char/envctrl.c                           | 2 +-
 drivers/sbus/char/flash.c                             | 2 +-
 drivers/sbus/char/jsflash.c                           | 2 +-
 drivers/sbus/char/openprom.c                          | 2 +-
 drivers/scsi/3w-9xxx.c                                | 2 +-
 drivers/scsi/3w-sas.c                                 | 2 +-
 drivers/scsi/3w-xxxx.c                                | 2 +-
 drivers/scsi/aacraid/aachba.c                         | 2 +-
 drivers/scsi/aacraid/commctrl.c                       | 2 +-
 drivers/scsi/arcmsr/arcmsr_hba.c                      | 2 +-
 drivers/scsi/bfa/bfad.c                               | 2 +-
 drivers/scsi/dpt_i2o.c                                | 2 +-
 drivers/scsi/gdth.c                                   | 2 +-
 drivers/scsi/hptiop.c                                 | 2 +-
 drivers/scsi/ips.h                                    | 2 +-
 drivers/scsi/megaraid.c                               | 2 +-
 drivers/scsi/megaraid/megaraid_mm.h                   | 2 +-
 drivers/scsi/megaraid/megaraid_sas_base.c             | 2 +-
 drivers/scsi/osst.c                                   | 2 +-
 drivers/scsi/qla2xxx/qla_sup.c                        | 2 +-
 drivers/scsi/scsi_ioctl.c                             | 2 +-
 drivers/scsi/scsi_proc.c                              | 2 +-
 drivers/scsi/sd.c                                     | 2 +-
 drivers/scsi/sr.c                                     | 2 +-
 drivers/scsi/sr_ioctl.c                               | 2 +-
 drivers/scsi/st.c                                     | 2 +-
 drivers/tty/amiserial.c                               | 2 +-
 drivers/tty/hvc/hvc_console.c                         | 2 +-
 drivers/tty/hvc/hvcs.c                                | 2 +-
 drivers/tty/hvc/hvsi.c                                | 2 +-
 drivers/tty/moxa.c                                    | 2 +-
 drivers/tty/mxser.c                                   | 2 +-
 drivers/tty/n_hdlc.c                                  | 2 +-
 drivers/tty/n_r3964.c                                 | 2 +-
 drivers/tty/serial/icom.c                             | 2 +-
 drivers/tty/serial/serial_core.c                      | 2 +-
 drivers/tty/synclink.c                                | 2 +-
 drivers/tty/synclink_gt.c                             | 2 +-
 drivers/tty/synclinkmp.c                              | 2 +-
 drivers/tty/tty_ioctl.c                               | 2 +-
 drivers/tty/vt/consolemap.c                           | 2 +-
 drivers/tty/vt/selection.c                            | 2 +-
 drivers/tty/vt/vc_screen.c                            | 2 +-
 drivers/tty/vt/vt_ioctl.c                             | 2 +-
 drivers/usb/atm/usbatm.c                              | 2 +-
 drivers/usb/core/hub.c                                | 2 +-
 drivers/usb/gadget/legacy/inode.c                     | 2 +-
 drivers/usb/host/uhci-hcd.c                           | 2 +-
 drivers/usb/misc/ftdi-elan.c                          | 2 +-
 drivers/usb/misc/idmouse.c                            | 2 +-
 drivers/usb/misc/ldusb.c                              | 2 +-
 drivers/usb/misc/legousbtower.c                       | 2 +-
 drivers/usb/mon/mon_bin.c                             | 2 +-
 drivers/usb/mon/mon_stat.c                            | 2 +-
 drivers/usb/mon/mon_text.c                            | 2 +-
 drivers/usb/musb/musb_debugfs.c                       | 2 +-
 drivers/video/console/newport_con.c                   | 2 +-
 drivers/video/fbdev/68328fb.c                         | 2 +-
 drivers/video/fbdev/hitfb.c                           | 2 +-
 drivers/video/fbdev/hpfb.c                            | 2 +-
 drivers/video/fbdev/mx3fb.c                           | 2 +-
 drivers/video/fbdev/q40fb.c                           | 2 +-
 drivers/video/fbdev/sm501fb.c                         | 2 +-
 drivers/video/fbdev/stifb.c                           | 2 +-
 drivers/video/fbdev/w100fb.c                          | 2 +-
 drivers/zorro/proc.c                                  | 2 +-
 fs/9p/vfs_file.c                                      | 2 +-
 fs/afs/proc.c                                         | 2 +-
 fs/aio.c                                              | 2 +-
 fs/anon_inodes.c                                      | 2 +-
 fs/bfs/inode.c                                        | 2 +-
 fs/binfmt_aout.c                                      | 2 +-
 fs/binfmt_elf.c                                       | 2 +-
 fs/binfmt_elf_fdpic.c                                 | 2 +-
 fs/block_dev.c                                        | 2 +-
 fs/cifs/cifs_debug.c                                  | 2 +-
 fs/cifs/cifssmb.c                                     | 2 +-
 fs/cifs/connect.c                                     | 2 +-
 fs/cifs/transport.c                                   | 2 +-
 fs/compat.c                                           | 2 +-
 fs/compat_ioctl.c                                     | 2 +-
 fs/configfs/file.c                                    | 2 +-
 fs/coredump.c                                         | 2 +-
 fs/dcache.c                                           | 2 +-
 fs/dcookies.c                                         | 2 +-
 fs/dlm/dlm_internal.h                                 | 2 +-
 fs/efs/efs.h                                          | 2 +-
 fs/eventpoll.c                                        | 2 +-
 fs/exec.c                                             | 2 +-
 fs/ext2/ioctl.c                                       | 2 +-
 fs/ext2/super.c                                       | 2 +-
 fs/ext4/extents.c                                     | 2 +-
 fs/ext4/ioctl.c                                       | 2 +-
 fs/ext4/super.c                                       | 2 +-
 fs/fcntl.c                                            | 2 +-
 fs/fhandle.c                                          | 2 +-
 fs/filesystems.c                                      | 2 +-
 fs/gfs2/file.c                                        | 2 +-
 fs/gfs2/glock.c                                       | 2 +-
 fs/gfs2/inode.c                                       | 2 +-
 fs/gfs2/sys.c                                         | 2 +-
 fs/gfs2/util.c                                        | 2 +-
 fs/gfs2/xattr.c                                       | 2 +-
 fs/hfs/hfs_fs.h                                       | 2 +-
 fs/hfsplus/ioctl.c                                    | 2 +-
 fs/hugetlbfs/inode.c                                  | 2 +-
 fs/jbd2/journal.c                                     | 2 +-
 fs/jfs/ioctl.c                                        | 2 +-
 fs/jfs/jfs_debug.c                                    | 2 +-
 fs/jfs/super.c                                        | 2 +-
 fs/libfs.c                                            | 2 +-
 fs/locks.c                                            | 2 +-
 fs/namei.c                                            | 2 +-
 fs/ncpfs/dir.c                                        | 2 +-
 fs/ncpfs/file.c                                       | 2 +-
 fs/ncpfs/inode.c                                      | 2 +-
 fs/ncpfs/ioctl.c                                      | 2 +-
 fs/ncpfs/mmap.c                                       | 2 +-
 fs/ncpfs/ncplib_kernel.h                              | 2 +-
 fs/ncpfs/sock.c                                       | 2 +-
 fs/ncpfs/symlink.c                                    | 2 +-
 fs/nfs/direct.c                                       | 2 +-
 fs/nfs/file.c                                         | 2 +-
 fs/nfs/getroot.c                                      | 2 +-
 fs/nfs/inode.c                                        | 2 +-
 fs/nfs/super.c                                        | 2 +-
 fs/nfs/write.c                                        | 2 +-
 fs/nfsd/fault_inject.c                                | 2 +-
 fs/nfsd/vfs.c                                         | 2 +-
 fs/ntfs/file.c                                        | 2 +-
 fs/ocfs2/cluster/masklog.c                            | 2 +-
 fs/ocfs2/cluster/tcp.c                                | 2 +-
 fs/ocfs2/dlmfs/dlmfs.c                                | 2 +-
 fs/ocfs2/stack_user.c                                 | 2 +-
 fs/open.c                                             | 2 +-
 fs/openpromfs/inode.c                                 | 2 +-
 fs/pipe.c                                             | 2 +-
 fs/proc/base.c                                        | 2 +-
 fs/proc/generic.c                                     | 2 +-
 fs/proc/inode.c                                       | 2 +-
 fs/proc/kcore.c                                       | 2 +-
 fs/proc/kmsg.c                                        | 2 +-
 fs/proc/nommu.c                                       | 2 +-
 fs/proc/page.c                                        | 2 +-
 fs/proc/proc_net.c                                    | 2 +-
 fs/proc/proc_tty.c                                    | 2 +-
 fs/proc/root.c                                        | 2 +-
 fs/proc/task_mmu.c                                    | 2 +-
 fs/proc/vmcore.c                                      | 2 +-
 fs/ramfs/file-nommu.c                                 | 2 +-
 fs/ramfs/inode.c                                      | 2 +-
 fs/read_write.c                                       | 2 +-
 fs/readdir.c                                          | 2 +-
 fs/select.c                                           | 2 +-
 fs/seq_file.c                                         | 2 +-
 fs/stat.c                                             | 2 +-
 fs/ufs/inode.c                                        | 2 +-
 fs/ufs/super.c                                        | 2 +-
 fs/utimes.c                                           | 2 +-
 fs/xattr.c                                            | 2 +-
 fs/xfs/xfs_ioctl32.c                                  | 2 +-
 fs/xfs/xfs_linux.h                                    | 2 +-
 include/asm-generic/termios-base.h                    | 2 +-
 include/asm-generic/termios.h                         | 2 +-
 include/drm/drmP.h                                    | 2 +-
 include/linux/isdnif.h                                | 2 +-
 include/linux/pagemap.h                               | 2 +-
 include/linux/poll.h                                  | 2 +-
 include/net/checksum.h                                | 2 +-
 include/net/sctp/sctp.h                               | 2 +-
 include/rdma/ib_verbs.h                               | 2 +-
 init/init_task.c                                      | 2 +-
 kernel/capability.c                                   | 2 +-
 kernel/compat.c                                       | 2 +-
 kernel/configs.c                                      | 2 +-
 kernel/cpuset.c                                       | 2 +-
 kernel/exit.c                                         | 2 +-
 kernel/extable.c                                      | 2 +-
 kernel/fork.c                                         | 2 +-
 kernel/futex_compat.c                                 | 2 +-
 kernel/groups.c                                       | 2 +-
 kernel/kmod.c                                         | 2 +-
 kernel/kprobes.c                                      | 2 +-
 kernel/locking/lockdep_proc.c                         | 2 +-
 kernel/module.c                                       | 2 +-
 kernel/power/snapshot.c                               | 2 +-
 kernel/power/user.c                                   | 2 +-
 kernel/printk/printk.c                                | 2 +-
 kernel/profile.c                                      | 2 +-
 kernel/signal.c                                       | 2 +-
 kernel/sys.c                                          | 2 +-
 kernel/sysctl.c                                       | 2 +-
 kernel/time/hrtimer.c                                 | 2 +-
 kernel/time/itimer.c                                  | 2 +-
 kernel/time/posix-cpu-timers.c                        | 2 +-
 kernel/time/posix-timers.c                            | 2 +-
 kernel/time/time.c                                    | 2 +-
 kernel/time/timer.c                                   | 2 +-
 kernel/time/timer_list.c                              | 2 +-
 kernel/time/timer_stats.c                             | 2 +-
 kernel/uid16.c                                        | 2 +-
 lib/extable.c                                         | 2 +-
 lib/kstrtox.c                                         | 2 +-
 mm/memcontrol.c                                       | 2 +-
 mm/memory.c                                           | 2 +-
 mm/mempolicy.c                                        | 2 +-
 mm/mincore.c                                          | 2 +-
 mm/mmap.c                                             | 2 +-
 mm/mprotect.c                                         | 2 +-
 mm/nommu.c                                            | 2 +-
 mm/shmem.c                                            | 2 +-
 mm/util.c                                             | 2 +-
 mm/vmalloc.c                                          | 2 +-
 net/802/fc.c                                          | 2 +-
 net/802/hippi.c                                       | 2 +-
 net/8021q/vlan.c                                      | 2 +-
 net/ax25/af_ax25.c                                    | 2 +-
 net/ax25/ax25_addr.c                                  | 2 +-
 net/ax25/ax25_dev.c                                   | 2 +-
 net/ax25/ax25_ds_in.c                                 | 2 +-
 net/ax25/ax25_ds_subr.c                               | 2 +-
 net/ax25/ax25_ds_timer.c                              | 2 +-
 net/ax25/ax25_iface.c                                 | 2 +-
 net/ax25/ax25_in.c                                    | 2 +-
 net/ax25/ax25_ip.c                                    | 2 +-
 net/ax25/ax25_out.c                                   | 2 +-
 net/ax25/ax25_route.c                                 | 2 +-
 net/ax25/ax25_std_in.c                                | 2 +-
 net/ax25/ax25_std_subr.c                              | 2 +-
 net/ax25/ax25_std_timer.c                             | 2 +-
 net/ax25/ax25_subr.c                                  | 2 +-
 net/ax25/ax25_timer.c                                 | 2 +-
 net/ax25/ax25_uid.c                                   | 2 +-
 net/bridge/br_device.c                                | 2 +-
 net/bridge/br_ioctl.c                                 | 2 +-
 net/bridge/br_netfilter_hooks.c                       | 2 +-
 net/bridge/br_netfilter_ipv6.c                        | 2 +-
 net/bridge/netfilter/ebtables.c                       | 2 +-
 net/compat.c                                          | 2 +-
 net/core/datagram.c                                   | 2 +-
 net/core/dev.c                                        | 2 +-
 net/core/filter.c                                     | 2 +-
 net/core/gen_estimator.c                              | 2 +-
 net/core/rtnetlink.c                                  | 2 +-
 net/core/scm.c                                        | 2 +-
 net/core/skbuff.c                                     | 2 +-
 net/core/sock.c                                       | 2 +-
 net/core/utils.c                                      | 2 +-
 net/decnet/dn_dev.c                                   | 2 +-
 net/decnet/dn_fib.c                                   | 2 +-
 net/decnet/dn_table.c                                 | 2 +-
 net/decnet/sysctl_net_decnet.c                        | 2 +-
 net/ipv4/af_inet.c                                    | 2 +-
 net/ipv4/devinet.c                                    | 2 +-
 net/ipv4/fib_frontend.c                               | 2 +-
 net/ipv4/fib_semantics.c                              | 2 +-
 net/ipv4/fib_trie.c                                   | 2 +-
 net/ipv4/icmp.c                                       | 2 +-
 net/ipv4/igmp.c                                       | 2 +-
 net/ipv4/ip_gre.c                                     | 2 +-
 net/ipv4/ip_options.c                                 | 2 +-
 net/ipv4/ip_output.c                                  | 2 +-
 net/ipv4/ip_sockglue.c                                | 2 +-
 net/ipv4/ipconfig.c                                   | 2 +-
 net/ipv4/ipip.c                                       | 2 +-
 net/ipv4/ipmr.c                                       | 2 +-
 net/ipv4/netfilter/arp_tables.c                       | 2 +-
 net/ipv4/netfilter/ip_tables.c                        | 2 +-
 net/ipv4/raw.c                                        | 2 +-
 net/ipv4/route.c                                      | 2 +-
 net/ipv4/tcp.c                                        | 2 +-
 net/ipv4/udp.c                                        | 2 +-
 net/ipv6/af_inet6.c                                   | 2 +-
 net/ipv6/datagram.c                                   | 2 +-
 net/ipv6/icmp.c                                       | 2 +-
 net/ipv6/ip6_flowlabel.c                              | 2 +-
 net/ipv6/ip6_tunnel.c                                 | 2 +-
 net/ipv6/ip6mr.c                                      | 2 +-
 net/ipv6/ipv6_sockglue.c                              | 2 +-
 net/ipv6/netfilter/ip6_tables.c                       | 2 +-
 net/ipv6/route.c                                      | 2 +-
 net/ipv6/sit.c                                        | 2 +-
 net/ipv6/udp.c                                        | 2 +-
 net/ipx/af_ipx.c                                      | 2 +-
 net/irda/af_irda.c                                    | 2 +-
 net/irda/ircomm/ircomm_tty.c                          | 2 +-
 net/irda/ircomm/ircomm_tty_ioctl.c                    | 2 +-
 net/irda/irda_device.c                                | 2 +-
 net/irda/irnet/irnet.h                                | 2 +-
 net/lapb/lapb_iface.c                                 | 2 +-
 net/lapb/lapb_in.c                                    | 2 +-
 net/lapb/lapb_out.c                                   | 2 +-
 net/lapb/lapb_subr.c                                  | 2 +-
 net/lapb/lapb_timer.c                                 | 2 +-
 net/netfilter/ipvs/ip_vs_ctl.c                        | 2 +-
 net/netfilter/nfnetlink.c                             | 2 +-
 net/netlink/af_netlink.c                              | 2 +-
 net/packet/af_packet.c                                | 2 +-
 net/rose/af_rose.c                                    | 2 +-
 net/rose/rose_route.c                                 | 2 +-
 net/sctp/ipv6.c                                       | 2 +-
 net/socket.c                                          | 2 +-
 net/sunrpc/auth_gss/auth_gss.c                        | 2 +-
 net/sunrpc/cache.c                                    | 2 +-
 net/sunrpc/svcsock.c                                  | 2 +-
 net/sunrpc/sysctl.c                                   | 2 +-
 net/unix/af_unix.c                                    | 2 +-
 net/x25/af_x25.c                                      | 2 +-
 net/x25/x25_link.c                                    | 2 +-
 net/xfrm/xfrm_state.c                                 | 2 +-
 net/xfrm/xfrm_user.c                                  | 2 +-
 security/keys/keyctl.c                                | 2 +-
 security/keys/process_keys.c                          | 2 +-
 security/keys/request_key_auth.c                      | 2 +-
 security/keys/user_defined.c                          | 2 +-
 sound/oss/dmasound/dmasound_atari.c                   | 2 +-
 sound/oss/dmasound/dmasound_core.c                    | 2 +-
 sound/oss/dmasound/dmasound_paula.c                   | 2 +-
 sound/oss/dmasound/dmasound_q40.c                     | 2 +-
 sound/oss/msnd.c                                      | 2 +-
 sound/oss/os.h                                        | 2 +-
 sound/oss/swarm_cs4297a.c                             | 2 +-
 virt/kvm/kvm_main.c                                   | 2 +-
 1088 files changed, 1088 insertions(+), 1088 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c
index 3ff9a957a25c..1b568ed74f95 100644
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -21,7 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define memzero(s,n)	memset ((s),0,(n))
 #define puts		srm_printk
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 2d6efcff3bf3..2f26ae74b61a 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -26,7 +26,7 @@
 #include <linux/bitops.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 volatile unsigned long irq_err_count;
 DEFINE_PER_CPU(unsigned long, irq_pmi_count);
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 56e427c7aa3c..54d8616644e2 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -39,7 +39,7 @@
 
 #include <asm/fpu.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sysinfo.h>
 #include <asm/thread_info.h>
 #include <asm/hwrpb.h>
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index b483156698d5..bca963a4aa48 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -31,7 +31,7 @@
 #include <linux/rcupdate.h>
 
 #include <asm/reg.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/hwrpb.h>
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index 04abdec7f496..bc4d2cdcf21d 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -16,7 +16,7 @@
 #include <linux/tracehook.h>
 #include <linux/audit.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/fpu.h>
 
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 4811e54069fc..491e6a604e82 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -53,7 +53,7 @@ static struct notifier_block alpha_panic_block = {
         INT_MAX /* try to do it first */
 };
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/hwrpb.h>
 #include <asm/dma.h>
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 8dbfb15f1745..17308f925306 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -22,7 +22,7 @@
 #include <linux/syscalls.h>
 #include <linux/tracehook.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sigcontext.h>
 #include <asm/ucontext.h>
 
diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c
index ffe996a54fad..705ae12acd15 100644
--- a/arch/alpha/kernel/srm_env.c
+++ b/arch/alpha/kernel/srm_env.c
@@ -35,7 +35,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <asm/console.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/machvec.h>
 
 #define BASE_DIR	"srm_environment"	/* Subdir in /proc/		*/
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 72b59511e59a..e9c45b65a905 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -18,7 +18,7 @@
 #include <linux/tty_flip.h>
 
 #include <asm/console.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 static DEFINE_SPINLOCK(srmcons_callback_lock);
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 5b6202a825ff..992000e3d9e4 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -34,7 +34,7 @@
 #include <linux/profile.h>
 #include <linux/irq_work.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/hwrpb.h>
 
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 74aceead06e9..3328af7c2776 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -18,7 +18,7 @@
 #include <linux/ratelimit.h>
 
 #include <asm/gentrap.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 #include <asm/sysinfo.h>
 #include <asm/hwrpb.h>
diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c
index b4ff3b683bcd..5dfb7975895f 100644
--- a/arch/alpha/lib/csum_partial_copy.c
+++ b/arch/alpha/lib/csum_partial_copy.c
@@ -11,7 +11,7 @@
 
 #include <linux/types.h>
 #include <linux/string.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 #define ldq_u(x,y) \
diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
index 58c2669a1dd4..fa5ae0ad8983 100644
--- a/arch/alpha/math-emu/math.c
+++ b/arch/alpha/math-emu/math.c
@@ -3,7 +3,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "sfp-util.h"
 #include <math-emu/soft-fp.h>
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index a1bea91df56a..0542e973c73d 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -22,7 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/gfp.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/hwrpb.h>
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
index 6053f64c3752..4c10c6452678 100644
--- a/arch/arm/common/bL_switcher_dummy_if.c
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -15,7 +15,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/miscdevice.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/bL_switcher.h>
 
 static ssize_t bL_switcher_write(struct file *file, const char __user *buf,
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index c3fe769d7558..853221f81104 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -29,7 +29,7 @@
 #include <asm/opcodes.h>
 #include <asm/system_info.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Error-checking SWP macros implemented using ldrex{b}/strex{b}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 8f92efa8460e..11676787ad49 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -33,7 +33,7 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/mman.h>
 #include <asm/tlbflush.h>
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 9aca92074f85..fa6182a40941 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -23,7 +23,7 @@
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <asm/cputype.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/kvm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
diff --git a/arch/arm/mach-iop13xx/irq.c b/arch/arm/mach-iop13xx/irq.c
index c702cc4092de..bd9b43c8004e 100644
--- a/arch/arm/mach-iop13xx/irq.c
+++ b/arch/arm/mach-iop13xx/irq.c
@@ -20,7 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/sysctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mach/irq.h>
 #include <asm/irq.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 26874f608ca9..0f08f611c1a6 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -34,7 +34,7 @@
 #include <mach/udc.h>
 #include <mach/hardware.h>
 #include <mach/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/irq.h>
diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c
index 6d3517dc4772..fb48f3141fb4 100644
--- a/arch/arm/mach-rpc/dma.c
+++ b/arch/arm/mach-rpc/dma.c
@@ -20,7 +20,7 @@
 #include <asm/fiq.h>
 #include <asm/irq.h>
 #include <mach/hardware.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/mach/dma.h>
 #include <asm/hardware/iomd.h>
diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c
index 101e8f2c7abe..ed8d129d4bea 100644
--- a/arch/arm/plat-iop/time.c
+++ b/arch/arm/plat-iop/time.c
@@ -25,7 +25,7 @@
 #include <linux/sched_clock.h>
 #include <mach/hardware.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 #include <mach/time.h>
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index 2b79b8a89457..b0d708ff7f4e 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -16,7 +16,7 @@
 #ifndef __ASM_WORD_AT_A_TIME_H
 #define __ASM_WORD_AT_A_TIME_H
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifndef __AARCH64EB__
 
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 04de188a36c9..fde04f029ec3 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -19,7 +19,7 @@
 #include <asm/sysreg.h>
 #include <asm/system_misc.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cpufeature.h>
 
 #define CREATE_TRACE_POINTS
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 4f0d76339414..a7504f40d7ee 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -31,7 +31,7 @@
 #include <asm/memory.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 /*
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 1decd2b2c730..f0593c92279b 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -29,7 +29,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/system_misc.h>
 #include <asm/insn.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq.h>
 #include <asm/sections.h>
 
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index b7063de792f7..c747a0fc5d7d 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -26,7 +26,7 @@
 #include <asm/esr.h>
 #include <asm/fpsimd.h>
 #include <asm/signal32.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 struct compat_sigcontext {
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 3f9e15722473..b37446a8ffdb 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -26,7 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <asm/cputype.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/kvm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index d7150e30438a..add4a1334085 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -17,7 +17,7 @@
  */
 #include <linux/linkage.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 	.text
 
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index cfe13396085b..fd6cd05593f9 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -17,7 +17,7 @@
 #include <linux/linkage.h>
 
 #include <asm/cache.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Copy from user space to a kernel buffer (alignment handled by the hardware)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 718b1c4e2f85..d828540ded6f 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -19,7 +19,7 @@
 #include <linux/linkage.h>
 
 #include <asm/cache.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Copy from user space to user space (alignment handled by the hardware)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index e99e31c9acac..3e6ae2663b82 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -17,7 +17,7 @@
 #include <linux/linkage.h>
 
 #include <asm/cache.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Copy to user space from a kernel buffer (alignment handled by the hardware)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index da9576932322..17f422a4dc55 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -23,7 +23,7 @@
 #include <asm/assembler.h>
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  *	flush_icache_range(start,end)
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index b41aff25426d..47cf3f9d89ff 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -49,7 +49,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <xen/interface/xen.h>
 
 
diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c
index 7c6cf14f0985..0d05fd095468 100644
--- a/arch/avr32/kernel/avr32_ksyms.c
+++ b/arch/avr32/kernel/avr32_ksyms.c
@@ -12,7 +12,7 @@
 #include <linux/module.h>
 
 #include <asm/checksum.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * GCC functions
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
index 4aedcab7cd4b..a89b893279bb 100644
--- a/arch/avr32/kernel/ptrace.c
+++ b/arch/avr32/kernel/ptrace.c
@@ -17,7 +17,7 @@
 #include <linux/notifier.h>
 
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ocd.h>
 #include <asm/mmu_context.h>
 #include <linux/kdebug.h>
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index 8f1c63b9b983..b5fcc4914fe4 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -17,7 +17,7 @@
 #include <linux/unistd.h>
 #include <linux/tracehook.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ucontext.h>
 #include <asm/syscalls.h>
 
diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c
index 85d635cd7b28..d9476825fc43 100644
--- a/arch/avr32/mm/cache.c
+++ b/arch/avr32/mm/cache.c
@@ -12,7 +12,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cachectl.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/syscalls.h>
 
 /*
diff --git a/arch/blackfin/kernel/bfin_dma.c b/arch/blackfin/kernel/bfin_dma.c
index 4a32f2dd5ddc..9d3eb0cf8ccc 100644
--- a/arch/blackfin/kernel/bfin_dma.c
+++ b/arch/blackfin/kernel/bfin_dma.c
@@ -19,7 +19,7 @@
 #include <asm/blackfin.h>
 #include <asm/cacheflush.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/early_printk.h>
 
 /*
diff --git a/arch/blackfin/kernel/kgdb_test.c b/arch/blackfin/kernel/kgdb_test.c
index 18ab004aea1c..b8b785dc4e3b 100644
--- a/arch/blackfin/kernel/kgdb_test.c
+++ b/arch/blackfin/kernel/kgdb_test.c
@@ -12,7 +12,7 @@
 #include <linux/proc_fs.h>
 
 #include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/blackfin.h>
 
diff --git a/arch/blackfin/kernel/module.c b/arch/blackfin/kernel/module.c
index 4489efc52883..0188c933b155 100644
--- a/arch/blackfin/kernel/module.c
+++ b/arch/blackfin/kernel/module.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <asm/dma.h>
 #include <asm/cacheflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Transfer the section to the L1 memory */
 int
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index 63f5560d6eb2..4cc72b0d1c1d 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -18,7 +18,7 @@
 #include <linux/initrd.h>
 
 #include <asm/sections.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * ZERO_PAGE is a special page that is used for zero-initialized
diff --git a/arch/cris/arch-v10/drivers/eeprom.c b/arch/cris/arch-v10/drivers/eeprom.c
index c903a9e53a47..33558d270a53 100644
--- a/arch/cris/arch-v10/drivers/eeprom.c
+++ b/arch/cris/arch-v10/drivers/eeprom.c
@@ -29,7 +29,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/wait.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "i2c.h"
 
 #define D(x)
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 0f3983241e60..9ac75d68f184 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -27,7 +27,7 @@
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <arch/svinto.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sync_serial.h>
 #include <arch/io_interface_mux.h>
 
diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c
index bfddfb99401f..eca94c7d56e7 100644
--- a/arch/cris/arch-v10/kernel/ptrace.c
+++ b/arch/cris/arch-v10/kernel/ptrace.c
@@ -12,7 +12,7 @@
 #include <linux/signal.h>
 #include <linux/security.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index 7122d9773b13..db30c98e4926 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -26,7 +26,7 @@
 
 #include <asm/processor.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <arch/system.h>
 
 #define DEBUG_SIG 0
diff --git a/arch/cris/arch-v10/kernel/traps.c b/arch/cris/arch-v10/kernel/traps.c
index 7001beda716c..96d004fe9740 100644
--- a/arch/cris/arch-v10/kernel/traps.c
+++ b/arch/cris/arch-v10/kernel/traps.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/ptrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <arch/sv_addr_ag.h>
 #include <arch/system.h>
 
diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c
index b964c667aced..1ba7cc000dfc 100644
--- a/arch/cris/arch-v10/lib/usercopy.c
+++ b/arch/cris/arch-v10/lib/usercopy.c
@@ -8,7 +8,7 @@
  * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Asm:s have been tweaked (within the domain of correctness) to give
    satisfactory results for "gcc version 2.96 20000427 (experimental)".
diff --git a/arch/cris/arch-v10/mm/fault.c b/arch/cris/arch-v10/mm/fault.c
index ed60588f8467..75210cbe61ce 100644
--- a/arch/cris/arch-v10/mm/fault.c
+++ b/arch/cris/arch-v10/mm/fault.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/mm.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <arch/svinto.h>
 #include <asm/mmu_context.h>
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index 0068fd411a84..ae6903d7fdbe 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -14,7 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
 
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
index fe1f9cf7b391..c366bc05466a 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -12,7 +12,7 @@
 #include <linux/signal.h>
 #include <linux/security.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index 150d1d76c29d..816bf2ca93ef 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -18,7 +18,7 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <arch/hwregs/cpu_vect.h>
 
 extern unsigned long cris_signal_return_page;
diff --git a/arch/cris/arch-v32/kernel/traps.c b/arch/cris/arch-v32/kernel/traps.c
index 8bbe09c93132..d79666aefd71 100644
--- a/arch/cris/arch-v32/kernel/traps.c
+++ b/arch/cris/arch-v32/kernel/traps.c
@@ -4,7 +4,7 @@
 
 #include <linux/ptrace.h>
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <hwregs/supp_reg.h>
 #include <hwregs/intr_vect_defs.h>
 #include <asm/irq.h>
diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c
index f0f335d8aa79..05e58dab800d 100644
--- a/arch/cris/arch-v32/lib/usercopy.c
+++ b/arch/cris/arch-v32/lib/usercopy.c
@@ -8,7 +8,7 @@
  * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Asm:s have been tweaked (within the domain of correctness) to give
    satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32".
diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c
index 31b4bd288cad..3166d1cf2f84 100644
--- a/arch/cris/kernel/crisksyms.c
+++ b/arch/cris/kernel/crisksyms.c
@@ -10,7 +10,7 @@
 #include <linux/tty.h>
 
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/checksum.h>
 #include <asm/io.h>
 #include <asm/delay.h>
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index b78498eb079b..50a7dd451456 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -14,7 +14,7 @@
 
 #include <linux/atomic.h>
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
diff --git a/arch/cris/kernel/profile.c b/arch/cris/kernel/profile.c
index cd9f15b92f8f..ad56b37f8e11 100644
--- a/arch/cris/kernel/profile.c
+++ b/arch/cris/kernel/profile.c
@@ -5,7 +5,7 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define SAMPLE_BUFFER_SIZE 8192
 
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index fd3427e563c5..806b764059d5 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -18,7 +18,7 @@
 #include <linux/user.h>
 #include <linux/tracehook.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c
index 7aa036ec78ff..8febb032fdd7 100644
--- a/arch/cris/kernel/sys_cris.c
+++ b/arch/cris/kernel/sys_cris.c
@@ -23,7 +23,7 @@
 #include <linux/file.h>
 #include <linux/ipc.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/segment.h>
 
 asmlinkage long
diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c
index da4c72401e27..b2a312a7afc6 100644
--- a/arch/cris/kernel/traps.c
+++ b/arch/cris/kernel/traps.c
@@ -20,7 +20,7 @@
 #endif
 
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <arch/system.h>
 
 extern void arch_enable_nmi(void);
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h
index 4bea27f50a7a..2e1da71e27a4 100644
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -5,7 +5,7 @@
 
 #include <linux/futex.h>
 #include <asm/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
 
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index 2239346fa3db..93513e4ccd2b 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -28,7 +28,7 @@
 #include <linux/atomic.h>
 #include <asm/io.h>
 #include <asm/smp.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
diff --git a/arch/frv/kernel/pm-mb93093.c b/arch/frv/kernel/pm-mb93093.c
index eaa7b582ef52..8358e34a3fad 100644
--- a/arch/frv/kernel/pm-mb93093.c
+++ b/arch/frv/kernel/pm-mb93093.c
@@ -17,7 +17,7 @@
 #include <linux/sysctl.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/mb86943a.h>
 
diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index ac767d94a880..051ccecbf7f1 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -19,7 +19,7 @@
 #include <linux/sysctl.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/mb86943a.h>
 
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 5d40aeb7712e..b306241c4ef2 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -28,7 +28,7 @@
 #include <linux/rcupdate.h>
 
 #include <asm/asm-offsets.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/setup.h>
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 3987ff88dab0..49768401ce0f 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -23,7 +23,7 @@
 #include <linux/elf.h>
 #include <linux/tracehook.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 82d5e914dc15..bf6e07a7a1b1 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -22,7 +22,7 @@
 #include <linux/personality.h>
 #include <linux/tracehook.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 
 #define DEBUG_SIG 0
diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c
index 9c4980825bbb..f80cc8b9bd45 100644
--- a/arch/frv/kernel/sys_frv.c
+++ b/arch/frv/kernel/sys_frv.c
@@ -25,7 +25,7 @@
 #include <linux/ipc.h>
 
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
diff --git a/arch/frv/kernel/sysctl.c b/arch/frv/kernel/sysctl.c
index f4dfae2c75ad..b54a64971cf1 100644
--- a/arch/frv/kernel/sysctl.c
+++ b/arch/frv/kernel/sysctl.c
@@ -12,7 +12,7 @@
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static const char frv_cache_wback[] = "wback";
 static const char frv_cache_wthru[] = "wthru";
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index a6d105d61b26..31221fb4348e 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -23,7 +23,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/setup.h>
 #include <asm/fpu.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/siginfo.h>
 #include <asm/unaligned.h>
diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c
index 374f88d6cc00..8b360b4222a5 100644
--- a/arch/frv/kernel/uaccess.c
+++ b/arch/frv/kernel/uaccess.c
@@ -11,7 +11,7 @@
 
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*****************************************************************************/
 /*
diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c
index 7a73aaeae3ac..e701aa9e6a14 100644
--- a/arch/frv/mm/dma-alloc.c
+++ b/arch/frv/mm/dma-alloc.c
@@ -44,7 +44,7 @@
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/smp.h>
 
 static int map_page(unsigned long va, unsigned long pa, pgprot_t prot)
diff --git a/arch/frv/mm/extable.c b/arch/frv/mm/extable.c
index 8863d6c1df6e..9a641c1b085a 100644
--- a/arch/frv/mm/extable.c
+++ b/arch/frv/mm/extable.c
@@ -4,7 +4,7 @@
 
 #include <linux/module.h>
 #include <linux/spinlock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern const void __memset_end, __memset_user_error_lr, __memset_user_error_handler;
 extern const void __memcpy_end, __memcpy_user_error_lr, __memcpy_user_error_handler;
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c
index 9f64fe8f29ff..a947dbb4fd91 100644
--- a/arch/h8300/boot/compressed/misc.c
+++ b/arch/h8300/boot/compressed/misc.c
@@ -9,7 +9,7 @@
  * Adapted for h8300 by Yoshinori Sato 2006
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * gzip declarations
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index dee41256922c..891974a11704 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -38,7 +38,7 @@
 #include <linux/slab.h>
 #include <linux/rcupdate.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/traps.h>
 #include <asm/setup.h>
 #include <asm/pgtable.h>
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index 7138303cbbf2..d784f7117f9a 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -41,7 +41,7 @@
 #include <linux/tracehook.h>
 
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/traps.h>
 #include <asm/ucontext.h>
diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c
index c041d8ecb1e2..af9dec4c28eb 100644
--- a/arch/hexagon/kernel/hexagon_ksyms.c
+++ b/arch/hexagon/kernel/hexagon_ksyms.c
@@ -21,7 +21,7 @@
 #include <linux/dma-mapping.h>
 #include <asm/hexagon_vm.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Additional functions */
 EXPORT_SYMBOL(__clear_user_hexagon);
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index b039a624c170..c6b22b9945a7 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -24,7 +24,7 @@
 #include <asm/registers.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ucontext.h>
 #include <asm/cacheflush.h>
 #include <asm/signal.h>
diff --git a/arch/hexagon/mm/uaccess.c b/arch/hexagon/mm/uaccess.c
index 34127261c2b7..ec90afdb3ad0 100644
--- a/arch/hexagon/mm/uaccess.c
+++ b/arch/hexagon/mm/uaccess.c
@@ -23,7 +23,7 @@
  * we implement here as subroutines.
  */
 #include <linux/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 
 /*
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index bd7c251e2bce..de863d6d802b 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -26,7 +26,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/mm.h>
 #include <linux/signal.h>
 #include <linux/module.h>
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
index 0b286ca164f9..8682df6263d6 100644
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@@ -9,7 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>
 
 extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5;
diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c
index c8c9298666fb..9c12b794e774 100644
--- a/arch/ia64/kernel/crash_dump.c
+++ b/arch/ia64/kernel/crash_dump.c
@@ -11,7 +11,7 @@
 #include <linux/crash_dump.h>
 
 #include <asm/page.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /**
  * copy_oldmem_page - copy one page from "oldmem"
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index 0eaa89f3defd..fa8ee64adac2 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -14,7 +14,7 @@
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index de4fc00dea98..2ff1df7b14ea 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -17,7 +17,7 @@
  */
 
 #include <asm/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index c7c51445c3be..45ff27e9edbb 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -33,7 +33,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/sections.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern void jprobe_inst_return(void);
 
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 2436ad5f92c1..677a86826771 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -50,7 +50,7 @@
 #include <asm/perfmon.h>
 #include <asm/processor.h>
 #include <asm/signal.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/delay.h>
 
 #ifdef CONFIG_PERFMON
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index aae6c4dc7ae7..52deab683ba1 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -41,7 +41,7 @@
 #include <asm/sal.h>
 #include <asm/switch_to.h>
 #include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unwind.h>
 #include <asm/user.h>
 
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 36f660da8124..0b1153e610ea 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -26,7 +26,7 @@
 #include <asm/processor.h>
 #include <asm/ptrace_offsets.h>
 #include <asm/rse.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unwind.h>
 #ifdef CONFIG_PERFMON
 #include <asm/perfmon.h>
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index aaf74f36cfa1..d194d5c83d32 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -48,7 +48,7 @@
 #include <linux/semaphore.h>
 
 #include <asm/sal.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
 MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index b3a124da71e5..5db52c6813c4 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -22,7 +22,7 @@
 #include <linux/wait.h>
 
 #include <asm/intrinsics.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/rse.h>
 #include <asm/sigcontext.h>
 
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 41e33f84c185..a09c12230bc5 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -18,7 +18,7 @@
 #include <linux/hugetlb.h>
 
 #include <asm/shmparam.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 unsigned long
 arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len,
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 77edd68c5161..095bfaff82d0 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -21,7 +21,7 @@
 #include <asm/fpswa.h>
 #include <asm/intrinsics.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/setup.h>
 
 fpswa_interface_t *fpswa_interface;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 7f0d31656b4d..9cd01c2200ee 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -22,7 +22,7 @@
 #include <asm/intrinsics.h>
 #include <asm/processor.h>
 #include <asm/rse.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 extern int die_if_kernel(char *str, struct pt_regs *regs, long err);
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 8f66195999e7..9704e2cd9878 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -41,7 +41,7 @@
 #include <asm/ptrace_offsets.h>
 #include <asm/rse.h>
 #include <asm/sections.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "entry.h"
 #include "unwind_i.h"
diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c
index 118daf5a0632..42f7678ef6ad 100644
--- a/arch/ia64/lib/csum_partial_copy.c
+++ b/arch/ia64/lib/csum_partial_copy.c
@@ -11,7 +11,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * XXX Fixme: those 2 inlines are meant for debugging and will go away
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
index 8f70bb2d0c37..4edb816aba9a 100644
--- a/arch/ia64/mm/extable.c
+++ b/arch/ia64/mm/extable.c
@@ -5,7 +5,7 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 void
 ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1841ef69183d..bb4610faca84 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -31,7 +31,7 @@
 #include <asm/sal.h>
 #include <asm/sections.h>
 #include <asm/tlb.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/mca.h>
 
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index b9992571c036..4c3b84d8406a 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -37,7 +37,7 @@
 
 #include <asm/processor.h>
 #include <asm/topology.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sal.h>
 #include <asm/sn/io.h>
 #include <asm/sn/sn_sal.h>
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index 7aab87f48060..29cf8f8c08e9 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -9,7 +9,7 @@
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sn/sn_sal.h>
 
 static int partition_id_show(struct seq_file *s, void *p)
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index e35f6485c1fd..32d0380eb72e 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -14,7 +14,7 @@
 #include <linux/capability.h>
 #include <linux/device.h>
 #include <linux/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/io.h>
diff --git a/arch/m32r/kernel/align.c b/arch/m32r/kernel/align.c
index ab871ccd33f8..ec51e5b34860 100644
--- a/arch/m32r/kernel/align.c
+++ b/arch/m32r/kernel/align.c
@@ -5,7 +5,7 @@
  */
 
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static int get_reg(struct pt_regs *regs, int nr)
 {
diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c
index c7272b894283..5537f7397297 100644
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c
@@ -19,7 +19,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * do_IRQ handles all normal device IRQs (the special
diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c
index 23f26f4adfff..d763f0bd2106 100644
--- a/arch/m32r/kernel/m32r_ksyms.c
+++ b/arch/m32r/kernel/m32r_ksyms.c
@@ -8,7 +8,7 @@
 #include <linux/string.h>
 
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/checksum.h>
 #include <asm/io.h>
 #include <asm/delay.h>
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index a88b1f01e91f..e0568bee60c0 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -29,7 +29,7 @@
 #include <linux/rcupdate.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/elf.h>
 #include <asm/m32r.h>
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index c145605a981f..a68acb9fa515 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -27,7 +27,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 1c81e24fd006..1ed597041fba 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -23,7 +23,7 @@
 #include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define DEBUG_SIG 0
 
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index c3fdd632fba7..f34957032504 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -22,7 +22,7 @@
 #include <linux/utsname.h>
 #include <linux/ipc.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cachectl.h>
 #include <asm/cacheflush.h>
 #include <asm/syscall.h>
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index a7a424f852e4..c3c5fdfae920 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -18,7 +18,7 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
 
diff --git a/arch/m32r/lib/csum_partial_copy.c b/arch/m32r/lib/csum_partial_copy.c
index 5596f3df833f..b3cd59c12b8e 100644
--- a/arch/m32r/lib/csum_partial_copy.c
+++ b/arch/m32r/lib/csum_partial_copy.c
@@ -22,7 +22,7 @@
 
 #include <net/checksum.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Copy while checksumming, otherwise like csum_partial
diff --git a/arch/m32r/lib/usercopy.c b/arch/m32r/lib/usercopy.c
index 82abd159dbef..fd03f2731f20 100644
--- a/arch/m32r/lib/usercopy.c
+++ b/arch/m32r/lib/usercopy.c
@@ -9,7 +9,7 @@
 #include <linux/prefetch.h>
 #include <linux/string.h>
 #include <linux/thread_info.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 unsigned long
 __generic_copy_to_user(void __user *to, const void *from, unsigned long n)
diff --git a/arch/m32r/mm/extable.c b/arch/m32r/mm/extable.c
index 1743f23d49a3..40ccf80d29cf 100644
--- a/arch/m32r/mm/extable.c
+++ b/arch/m32r/mm/extable.c
@@ -3,7 +3,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
diff --git a/arch/m32r/mm/fault-nommu.c b/arch/m32r/mm/fault-nommu.c
index 80f18cc6f547..e22d5ddae5cb 100644
--- a/arch/m32r/mm/fault-nommu.c
+++ b/arch/m32r/mm/fault-nommu.c
@@ -22,7 +22,7 @@
 #include <linux/vt_kern.h>              /* For unblank_screen() */
 
 #include <asm/m32r.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/hardirq.h>
diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c
index f7984f44ff0f..d53c9b301f84 100644
--- a/arch/m68k/bvme6000/rtc.c
+++ b/arch/m68k/bvme6000/rtc.c
@@ -20,7 +20,7 @@
 #include <asm/bvme6000hw.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/setup.h>
 
 /*
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 4ba1ae7345c3..aaf28f8e342d 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -27,7 +27,7 @@
 #include <linux/mqueue.h>
 #include <linux/rcupdate.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/setup.h>
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 1bc10e62b9af..9cd86d7343a6 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -20,7 +20,7 @@
 #include <linux/signal.h>
 #include <linux/tracehook.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 58507edbdf1d..8ead291a902a 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -46,7 +46,7 @@
 #include <linux/tracehook.h>
 
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/traps.h>
 #include <asm/ucontext.h>
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 9aa01adb407f..98a2daaae30c 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -22,7 +22,7 @@
 #include <linux/ipc.h>
 
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cachectl.h>
 #include <asm/traps.h>
 #include <asm/page.h>
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 6c9ca24830e9..558f38402737 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -32,7 +32,7 @@
 
 #include <asm/setup.h>
 #include <asm/fpu.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/traps.h>
 #include <asm/pgalloc.h>
 #include <asm/machdep.h>
diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c
index 35d1442dee89..a76b73abaf64 100644
--- a/arch/m68k/lib/uaccess.c
+++ b/arch/m68k/lib/uaccess.c
@@ -5,7 +5,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 unsigned long __generic_copy_from_user(void *to, const void __user *from,
 				       unsigned long n)
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 0fb54a90eac2..c6d351f5bd79 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -16,7 +16,7 @@
 #include <linux/cuda.h>
 #include <linux/pmu.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/segment.h>
 #include <asm/setup.h>
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index b09a3cb29b68..9c1e656b1f8f 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -20,7 +20,7 @@
 #include <linux/gfp.h>
 
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/traps.h>
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 8f37fdd80be9..7cb72dbc2eaa 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -21,7 +21,7 @@
 #include <linux/gfp.h>
 
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/machdep.h>
diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c
index 269f81158a33..b5b7d53f7283 100644
--- a/arch/m68k/mm/sun3mmu.c
+++ b/arch/m68k/mm/sun3mmu.c
@@ -18,7 +18,7 @@
 #include <linux/bootmem.h>
 
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/machdep.h>
diff --git a/arch/m68k/mvme16x/rtc.c b/arch/m68k/mvme16x/rtc.c
index 1cdc73268188..8f00847a0e4b 100644
--- a/arch/m68k/mvme16x/rtc.c
+++ b/arch/m68k/mvme16x/rtc.c
@@ -19,7 +19,7 @@
 #include <asm/mvme16xhw.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/setup.h>
 
 /*
diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c
index 3f258e230ba5..0f95134e9b85 100644
--- a/arch/m68k/sun3/mmu_emu.c
+++ b/arch/m68k/sun3/mmu_emu.c
@@ -18,7 +18,7 @@
 
 #include <asm/setup.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/sun3mmu.h>
diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c
index 3074b64793e6..c9939604a38f 100644
--- a/arch/metag/kernel/irq.c
+++ b/arch/metag/kernel/irq.c
@@ -13,7 +13,7 @@
 
 #include <asm/core_reg.h>
 #include <asm/mach/arch.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_4KSTACKS
 union irq_ctx {
diff --git a/arch/mips/alchemy/common/power.c b/arch/mips/alchemy/common/power.c
index 921ed30b440c..303257b697c2 100644
--- a/arch/mips/alchemy/common/power.c
+++ b/arch/mips/alchemy/common/power.c
@@ -33,7 +33,7 @@
 #include <linux/sysctl.h>
 #include <linux/jiffies.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mach-au1x00/au1000.h>
 
 /*
diff --git a/arch/mips/dec/kn01-berr.c b/arch/mips/dec/kn01-berr.c
index 44d8a87a8a68..e9d2db480aeb 100644
--- a/arch/mips/dec/kn01-berr.c
+++ b/arch/mips/dec/kn01-berr.c
@@ -23,7 +23,7 @@
 #include <asm/page.h>
 #include <asm/ptrace.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/dec/kn01.h>
 
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index bce1ce53149a..7749daf2a465 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -18,7 +18,7 @@
 
 #include <linux/in6.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * computes the checksum of a memory block at buff, length len,
diff --git a/arch/mips/include/asm/compat-signal.h b/arch/mips/include/asm/compat-signal.h
index 64e0b9343b8c..4c6176467146 100644
--- a/arch/mips/include/asm/compat-signal.h
+++ b/arch/mips/include/asm/compat-signal.h
@@ -8,7 +8,7 @@
 #include <asm/signal.h>
 #include <asm/siginfo.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static inline int __copy_conv_sigset_to_user(compat_sigset_t __user *d,
 	const sigset_t *s)
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 667ca3c467b7..b42b513007a2 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -20,7 +20,7 @@
 #include <asm/cpu-features.h>
 #include <asm/cpu-type.h>
 #include <asm/mipsmtregs.h>
-#include <asm/uaccess.h> /* for segment_eq() */
+#include <linux/uaccess.h> /* for segment_eq() */
 
 extern void (*r4k_blast_dcache)(void);
 extern void (*r4k_blast_icache)(void);
diff --git a/arch/mips/include/asm/termios.h b/arch/mips/include/asm/termios.h
index 6245b68a69a8..ce2d72e34274 100644
--- a/arch/mips/include/asm/termios.h
+++ b/arch/mips/include/asm/termios.h
@@ -9,7 +9,7 @@
 #ifndef _ASM_TERMIOS_H
 #define _ASM_TERMIOS_H
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <uapi/asm/termios.h>
 
 /*
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index db6f5afff4ff..1900f39588ae 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -18,7 +18,7 @@
 #include <asm/mipsregs.h>
 #include <asm/jazz.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/dma.h>
 #include <asm/jazzdma.h>
 #include <asm/pgtable.h>
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 12c718181e5e..ae037a304ee4 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -18,7 +18,7 @@
 #include <asm/inst.h>
 #include <asm/mips-r2-to-r6-emul.h>
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Calculate and return exception PC in case of branch delay slot
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index dd3175442c9e..07718bb5fc9d 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -30,7 +30,7 @@
 #include <asm/elf.h>
 #include <asm/pgtable-bits.h>
 #include <asm/spram.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Hardware capabilities */
 unsigned int elf_hwcap __read_mostly;
diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c
index 6fe7790e5868..77ee99a2d0aa 100644
--- a/arch/mips/kernel/crash_dump.c
+++ b/arch/mips/kernel/crash_dump.c
@@ -1,7 +1,7 @@
 #include <linux/highmem.h>
 #include <linux/bootmem.h>
 #include <linux/crash_dump.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/slab.h>
 
 static void *kdump_buf_page;
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index f25f7eab7307..f8f5836eb3c1 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -23,7 +23,7 @@
 #include <linux/ftrace.h>
 
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index de63d36af895..1f4bd222ba76 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -32,7 +32,7 @@
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static struct hard_trap_info {
 	unsigned char tt;	/* Trap type code for MIPS R3xxx and R4xxx */
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 50fb62544df7..0352f742d077 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -38,7 +38,7 @@
 
 #include <asm/compat-signal.h>
 #include <asm/sim.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/mman.h>
 
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 789d7bf4fef3..a12904ea9f65 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -11,7 +11,7 @@
 #include <linux/sched.h>
 #include <linux/security.h>
 #include <linux/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * CPU mask used to set process affinity for MT VPEs/TCs with FPUs
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index bd09853aecdf..ef2ca28a028b 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -29,7 +29,7 @@
 #include <asm/local.h>
 #include <asm/mipsregs.h>
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_64BIT
 #define ADDIU	"daddiu "
diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c
index e2b6ab74643d..93aeec705a6e 100644
--- a/arch/mips/kernel/mips_ksyms.c
+++ b/arch/mips/kernel/mips_ksyms.c
@@ -12,7 +12,7 @@
 #include <linux/export.h>
 #include <asm/checksum.h>
 #include <linux/mm.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ftrace.h>
 #include <asm/fpu.h>
 #include <asm/msa.h>
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 9514e5f2209f..5142b1dfe8a7 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -38,7 +38,7 @@
 #include <asm/mipsregs.h>
 #include <asm/processor.h>
 #include <asm/reg.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/elf.h>
 #include <asm/isadep.h>
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index a92994d60e91..c8ba26072132 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -41,7 +41,7 @@
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/syscall.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/bootinfo.h>
 #include <asm/reg.h>
 
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 5fcbdcd7abd0..4f0998525626 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -32,7 +32,7 @@
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/reg.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/bootinfo.h>
 
 /*
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 97b7c51b8251..84165f2b31ff 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -16,7 +16,7 @@
 
 #include <asm/compat.h>
 #include <asm/compat-signal.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 #include "signal-common.h"
diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c
index a7bc38430500..b672cebb4a1a 100644
--- a/arch/mips/kernel/signal_n32.c
+++ b/arch/mips/kernel/signal_n32.c
@@ -33,7 +33,7 @@
 #include <asm/cacheflush.h>
 #include <asm/compat-signal.h>
 #include <asm/sim.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ucontext.h>
 #include <asm/fpu.h>
 #include <asm/cpu-features.h>
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 53a7ef9a8f32..833f82210528 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -36,7 +36,7 @@
 #include <asm/sim.h>
 #include <asm/shmparam.h>
 #include <asm/sysmips.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/switch_to.h>
 
 /*
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 3905003dfe2b..6c7f9d7e92b3 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -61,7 +61,7 @@
 #include <asm/siginfo.h>
 #include <asm/tlbdebug.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/watch.h>
 #include <asm/mmu_context.h>
 #include <asm/types.h>
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index f1c308dbbc4a..7ed98354fe9d 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -89,7 +89,7 @@
 #include <asm/fpu.h>
 #include <asm/fpu_emulator.h>
 #include <asm/inst.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define STR(x)	__STR(x)
 #define __STR(x)  #x
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index f8b7bf836437..a298ac93edcc 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -42,7 +42,7 @@
 #include <asm/inst.h>
 #include <asm/ptrace.h>
 #include <asm/signal.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/cpu-info.h>
 #include <asm/processor.h>
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 4a094f7acb3d..c4469ff4a996 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -6,7 +6,7 @@
 #include <asm/fpu_emulator.h>
 #include <asm/inst.h>
 #include <asm/mipsregs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /**
  * struct emuframe - The 'emulation' frame structure
diff --git a/arch/mips/mm/extable.c b/arch/mips/mm/extable.c
index e474fa2efed4..81bc8a34a83f 100644
--- a/arch/mips/mm/extable.c
+++ b/arch/mips/mm/extable.c
@@ -8,7 +8,7 @@
 #include <linux/extable.h>
 #include <linux/spinlock.h>
 #include <asm/branch.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
diff --git a/arch/mips/mm/sc-debugfs.c b/arch/mips/mm/sc-debugfs.c
index 01f1154cdb0c..7e945e310b44 100644
--- a/arch/mips/mm/sc-debugfs.c
+++ b/arch/mips/mm/sc-debugfs.c
@@ -10,7 +10,7 @@
 
 #include <asm/bcache.h>
 #include <asm/debug.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/debugfs.h>
 #include <linux/init.h>
 
diff --git a/arch/mips/oprofile/op_model_loongson3.c b/arch/mips/oprofile/op_model_loongson3.c
index 85f3ee4ab456..40660392006f 100644
--- a/arch/mips/oprofile/op_model_loongson3.c
+++ b/arch/mips/oprofile/op_model_loongson3.c
@@ -11,7 +11,7 @@
 #include <linux/oprofile.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <irq.h>
 #include <loongson.h>
 #include "op_impl.h"
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index 712cc0f6a58d..9960a8302eac 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -19,7 +19,7 @@
 #include <asm/sgi/ioc.h>
 #include <asm/sgi/ip22.h>
 #include <asm/r4kcache.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/bootinfo.h>
 
 static unsigned int count_be_is_fixup;
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index 692778da9e76..2e0edb385656 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -19,7 +19,7 @@
 #include <asm/sn/sn0/hub.h>
 #include <asm/tlbdebug.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static void dump_hub_information(unsigned long errst0, unsigned long errst1)
 {
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index afc1cadbba37..ba8f46d80ab8 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -11,7 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/addrspace.h>
 #include <asm/ptrace.h>
 #include <asm/tlbdebug.h>
diff --git a/arch/mips/sibyte/common/sb_tbprof.c b/arch/mips/sibyte/common/sb_tbprof.c
index 059e28c8fd97..99c720be72d2 100644
--- a/arch/mips/sibyte/common/sb_tbprof.c
+++ b/arch/mips/sibyte/common/sb_tbprof.c
@@ -54,7 +54,7 @@
 #define K_INT_PERF_CNT K_BCM1480_INT_PERF_CNT
 #endif
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define SBPROF_TB_MAJOR 240
 
diff --git a/arch/mn10300/kernel/fpu.c b/arch/mn10300/kernel/fpu.c
index 064fa194de2b..2578b7ae7dd5 100644
--- a/arch/mn10300/kernel/fpu.c
+++ b/arch/mn10300/kernel/fpu.c
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the Licence, or (at your option) any later version.
  */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/fpu.h>
 #include <asm/elf.h>
 #include <asm/exceptions.h>
diff --git a/arch/mn10300/kernel/mn10300_ksyms.c b/arch/mn10300/kernel/mn10300_ksyms.c
index f9eb9753a404..ec6c4f8f93a6 100644
--- a/arch/mn10300/kernel/mn10300_ksyms.c
+++ b/arch/mn10300/kernel/mn10300_ksyms.c
@@ -9,7 +9,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 
 
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index cbede4e88dee..e5def2217f72 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -26,7 +26,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/rcupdate.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/processor.h>
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
index 5bd58514e739..976020f469c1 100644
--- a/arch/mn10300/kernel/ptrace.c
+++ b/arch/mn10300/kernel/ptrace.c
@@ -19,7 +19,7 @@
 #include <linux/regset.h>
 #include <linux/elf.h>
 #include <linux/tracehook.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/cacheflush.h>
diff --git a/arch/mn10300/kernel/setup.c b/arch/mn10300/kernel/setup.c
index 2ad7f32fa122..1b3d80d8a171 100644
--- a/arch/mn10300/kernel/setup.c
+++ b/arch/mn10300/kernel/setup.c
@@ -25,7 +25,7 @@
 #include <linux/cpu.h>
 #include <asm/processor.h>
 #include <linux/console.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/setup.h>
 #include <asm/io.h>
 #include <asm/smp.h>
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index cd8cb1d1176b..2f3cb5734235 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -25,7 +25,7 @@
 #include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/fpu.h>
 #include "sigframe.h"
 
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index 815f1355fad4..f999981e55c0 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -21,7 +21,7 @@
 #include <linux/file.h>
 #include <linux/tty.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 asmlinkage long old_mmap(unsigned long addr, unsigned long len,
 			 unsigned long prot, unsigned long flags,
diff --git a/arch/mn10300/lib/checksum.c b/arch/mn10300/lib/checksum.c
index b6580f5d89ee..0f569151ef11 100644
--- a/arch/mn10300/lib/checksum.c
+++ b/arch/mn10300/lib/checksum.c
@@ -12,7 +12,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/checksum.h>
 #include "internal.h"
 
diff --git a/arch/mn10300/mm/cache-smp.c b/arch/mn10300/mm/cache-smp.c
index 2d23b9eeee62..e80996064d3d 100644
--- a/arch/mn10300/mm/cache-smp.c
+++ b/arch/mn10300/mm/cache-smp.c
@@ -18,7 +18,7 @@
 #include <asm/processor.h>
 #include <asm/cacheflush.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/smp.h>
 #include "cache-smp.h"
 
diff --git a/arch/mn10300/mm/cache.c b/arch/mn10300/mm/cache.c
index 0a1f0aa92ebc..0b925cce2b83 100644
--- a/arch/mn10300/mm/cache.c
+++ b/arch/mn10300/mm/cache.c
@@ -17,7 +17,7 @@
 #include <asm/processor.h>
 #include <asm/cacheflush.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/smp.h>
 #include "cache-smp.h"
 
diff --git a/arch/mn10300/mm/extable.c b/arch/mn10300/mm/extable.c
index 25e5485ab87d..305de461cb8f 100644
--- a/arch/mn10300/mm/extable.c
+++ b/arch/mn10300/mm/extable.c
@@ -10,7 +10,7 @@
  */
 #include <linux/module.h>
 #include <linux/spinlock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index 97a1ec0beeec..8ce677d5575e 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -29,7 +29,7 @@
 #include <linux/gfp.h>
 
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/dma.h>
diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c
index b9920b1edd5a..31d04da85743 100644
--- a/arch/mn10300/mm/misalignment.c
+++ b/arch/mn10300/mm/misalignment.c
@@ -23,7 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
 #include <asm/smp.h>
diff --git a/arch/mn10300/proc-mn2ws0050/proc-init.c b/arch/mn10300/proc-mn2ws0050/proc-init.c
index 950cc8dbb284..25b1b453c515 100644
--- a/arch/mn10300/proc-mn2ws0050/proc-init.c
+++ b/arch/mn10300/proc-mn2ws0050/proc-init.c
@@ -16,7 +16,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
 #include <asm/smp.h>
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index 81f7da7b1d55..72ed30a93c85 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -19,7 +19,7 @@
 
 #include <asm/traps.h>
 #include <asm/sections.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static DEFINE_SPINLOCK(die_lock);
 
diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 83ccf7c0c58d..86e31cf1de1d 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -24,7 +24,7 @@
 #include <linux/semaphore.h>
 
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/checksum.h>
 #include <asm/io.h>
 #include <asm/hardirq.h>
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 277123bb4bf8..d7990df9025a 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -36,7 +36,7 @@
 #include <linux/mqueue.h>
 #include <linux/fs.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/processor.h>
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index c82be69b43c6..265f10fb3930 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -30,7 +30,7 @@
 #include <asm/processor.h>
 #include <asm/syscall.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define DEBUG_SIG 0
 
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 3d3f6062f49c..a4574cb4b0fb 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -31,7 +31,7 @@
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kallsyms.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/segment.h>
 #include <asm/io.h>
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index e94cd225e816..b1a7435e786a 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -20,7 +20,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/siginfo.h>
 #include <asm/signal.h>
 
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 78d30d2ea2d8..1c4fe61a592b 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -38,7 +38,7 @@
 #include <asm/ptrace.h>
 #include <asm/processor.h>
 #include <asm/pdc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_64BIT
 #define FRAME_SIZE	128
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 3cad8aadc69e..7484b3d11e0d 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(__xchg64);
 EXPORT_SYMBOL(__cmpxchg_u64);
 #endif
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 EXPORT_SYMBOL(lclear_user);
 EXPORT_SYMBOL(lstrnlen_user);
 
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index b6298a85e8ae..697c53543a4d 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -33,7 +33,7 @@
 #include <asm/io.h>
 #include <asm/page.h>	/* get_order */
 #include <asm/pgalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/tlbflush.h>	/* for purge_tlb_*() macros */
 
 static struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index 6eabce62463b..e282a5131d77 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -48,7 +48,7 @@
 #include <linux/miscdevice.h>
 #include <linux/spinlock.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/perf.h>
 #include <asm/parisc-device.h>
 #include <asm/processor.h>
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index e02d7b4d2b69..f8b6959d2d97 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -24,7 +24,7 @@
 #include <linux/signal.h>
 #include <linux/audit.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 2264f68f3c2f..e58925ac64d1 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -27,7 +27,7 @@
 #include <linux/elf.h>
 #include <asm/ucontext.h>
 #include <asm/rt_sigframe.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c
index c342b2e17492..70aaabb8b3cb 100644
--- a/arch/parisc/kernel/signal32.c
+++ b/arch/parisc/kernel/signal32.c
@@ -31,7 +31,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "signal32.h"
 
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index a81e177cac7b..bf3294171230 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -23,7 +23,7 @@
  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/elf.h>
 #include <linux/file.h>
 #include <linux/fs.h>
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 4215f5596c8b..037d81f00520 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -28,7 +28,7 @@
 #include <linux/platform_device.h>
 #include <linux/ftrace.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/page.h>
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 2b65c0177778..0a21067ac0a3 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -26,7 +26,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/ratelimit.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/hardirq.h>
 #include <asm/traps.h>
 
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index e278a87f43cc..1b73690477c5 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/sort.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/assembly.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
index ae66d31f9ecf..ba6384da6ade 100644
--- a/arch/parisc/lib/checksum.c
+++ b/arch/parisc/lib/checksum.c
@@ -20,7 +20,7 @@
 #include <net/checksum.h>
 #include <asm/byteorder.h>
 #include <asm/string.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define addc(_t,_r)                     \
 	__asm__ __volatile__ (          \
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 81592562e0f8..ba47c70712f9 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -15,7 +15,7 @@
 #include <linux/threads.h>
 #include <asm/cacheflush.h>
 #include <asm/checksum.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/epapr_hcalls.h>
 
 #include <uapi/asm/ucontext.h>
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 033f3385fa49..8d58c61908f7 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -20,7 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cache.h>
 #include <asm/cputable.h>
 #include <asm/emulated_ops.h>
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index cfa0f81a5bb0..d10ad258d41a 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -18,7 +18,7 @@
 #include <asm/kdump.h>
 #include <asm/prom.h>
 #include <asm/firmware.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/rtas.h>
 
 #ifdef DEBUG
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 03d089b3ed72..4d3aa05e28be 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -33,7 +33,7 @@
 #include <asm/hw_breakpoint.h>
 #include <asm/processor.h>
 #include <asm/sstep.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Stores the breakpoints currently in use on each breakpoint address
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 3c05c311e35e..a018f5cae899 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -55,7 +55,7 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/irq.h>
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index ad108b842669..735ff3d3f77d 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -35,7 +35,7 @@
 #include <asm/code-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/sstep.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 30b89d5cbb03..3f7ba0f5bf29 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -22,7 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/bug.h>
 #include <asm/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/firmware.h>
 #include <linux/sort.h>
 #include <asm/setup.h>
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 34d2c595de23..d5e2b8309939 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -28,7 +28,7 @@
 #include <linux/pagemap.h>
 #include <linux/pstore.h>
 #include <linux/zlib.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/nvram.h>
 #include <asm/rtas.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 678f87a63645..41c86c6b6e4d 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -24,7 +24,7 @@
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/machdep.h>
 
 #undef DEBUG
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index c30612aad68e..56548bf6231f 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -24,7 +24,7 @@
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/rtas.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/prom.h>
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index b1ec62f2cc31..e4744ff38a17 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -34,7 +34,7 @@
 #include <linux/perf_event.h>
 #include <linux/context_tracking.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/switch_to.h>
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 1e887f3a61a6..f37eb53de1a1 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -29,7 +29,7 @@
 #include <linux/signal.h>
 #include <linux/compat.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/switch_to.h>
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index c82eed97bd22..df56dfc4b681 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -24,7 +24,7 @@
 #include <linux/bitops.h>
 #include <linux/rtc.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>
 #include <asm/io.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 6a3e5de544ce..112cc3b2ee1a 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -35,7 +35,7 @@
 #include <asm/page.h>
 #include <asm/param.h>
 #include <asm/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/udbg.h>
 #include <asm/syscalls.h>
 #include <asm/smp.h>
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index db2b482af658..f6f6a8a5103a 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -19,7 +19,7 @@
 #include <linux/proc_fs.h>
 #include <linux/reboot.h>
 #include <asm/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/rtas.h>
 
 #define MODULE_VERS "1.0"
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index a26a02006576..2bf1f9b5b34b 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -22,7 +22,7 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/rtas.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 5fe79182f0fa..7fcf1f7f01c1 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -29,7 +29,7 @@
 #include <asm/bootx.h>
 #include <asm/btext.h>
 #include <asm/machdep.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pmac_feature.h>
 #include <asm/sections.h>
 #include <asm/nvram.h>
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index bbe77aed198d..3a3671172436 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -15,7 +15,7 @@
 #include <linux/key.h>
 #include <linux/context_tracking.h>
 #include <asm/hw_breakpoint.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/debug.h>
 #include <asm/tm.h>
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 27aa913ac91d..97bb1385e771 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -37,7 +37,7 @@
 #include <linux/binfmts.h>
 #endif
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/syscalls.h>
 #include <asm/sigcontext.h>
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 96698fdf93b4..c83c115858c1 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -27,7 +27,7 @@
 
 #include <asm/sigcontext.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/unistd.h>
 #include <asm/cacheflush.h>
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 8a285876aef8..15f216d022e2 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -44,7 +44,7 @@
 
 #include <asm/ptrace.h>
 #include <asm/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/time.h>
 #include <asm/mmu_context.h>
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 644cce3d8dce..de04c9fbb5cd 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -36,7 +36,7 @@
 #include <linux/file.h>
 #include <linux/personality.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/syscalls.h>
 #include <asm/time.h>
 #include <asm/unistd.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index be9751f1cb2a..19397e2a8bf5 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -64,7 +64,7 @@
 #include <asm/nvram.h>
 #include <asm/cache.h>
 #include <asm/machdep.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/time.h>
 #include <asm/prom.h>
 #include <asm/irq.h>
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 4239aaf74886..e6cc56b61d01 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -40,7 +40,7 @@
 
 #include <asm/emulated_ops.h>
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
 #include <asm/rtas.h>
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index c4bfadb2606b..2d8f6d8ccafc 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -7,7 +7,7 @@
 #include <linux/sched.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Functions in vector.S */
 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b6952dd23152..019f008775b9 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -25,7 +25,7 @@
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8dcbe37a4dac..66b2a35be424 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -39,7 +39,7 @@
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 826c541a12af..1482961ceb4d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -28,7 +28,7 @@
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 02176fd52f84..f102616febc7 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -17,7 +17,7 @@
 
 #include <linux/anon_inodes.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index ef27fbd5d9c5..20528701835b 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -11,7 +11,7 @@
 #include <linux/kvm.h>
 #include <linux/err.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_ppc.h>
 #include <asm/hvcall.h>
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 3bdc639157c1..20dff102a06f 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -14,7 +14,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/spinlock.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_ppc.h>
 #include <asm/hvcall.h>
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index df3f2706d3e5..0514cbd4e533 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -30,7 +30,7 @@
 #include <linux/fs.h>
 
 #include <asm/cputable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/kvm_ppc.h>
 #include <asm/cacheflush.h>
 #include <asm/dbell.h>
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index ed38f8114118..fe312c160d97 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -29,7 +29,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/anon_inodes.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mpic.h>
 #include <asm/kvm_para.h>
 #include <asm/kvm_host.h>
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index efd1183a6b16..cd892dec7cb6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -30,7 +30,7 @@
 #include <linux/irqbypass.h>
 #include <linux/kvm_irqfd.h>
 #include <asm/cputable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/kvm_ppc.h>
 #include <asm/tlbflush.h>
 #include <asm/cputhreads.h>
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
index 08e3a3356c40..a0cb63fb76a1 100644
--- a/arch/powerpc/lib/checksum_wrappers.c
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -21,7 +21,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/checksum.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 __wsum csum_and_copy_from_user(const void __user *src, void *dst,
 			       int len, __wsum sum, int *err_ptr)
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d5edbeb8eb82..c1746df0f88e 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -13,7 +13,7 @@
 #include <linux/mm.h>
 #include <asm/page.h>
 #include <asm/code-patching.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 int patch_instruction(unsigned int *addr, unsigned int instr)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 9c78a9c102c3..06c7e9b88408 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -14,7 +14,7 @@
 #include <linux/prefetch.h>
 #include <asm/sstep.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cpu_has_feature.h>
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c
index 5eea6f3c1e03..9bd3a3dad78d 100644
--- a/arch/powerpc/lib/usercopy_64.c
+++ b/arch/powerpc/lib/usercopy_64.c
@@ -7,7 +7,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
 {
diff --git a/arch/powerpc/math-emu/fabs.c b/arch/powerpc/math-emu/fabs.c
index 549baba5948f..a5e7ad1384ee 100644
--- a/arch/powerpc/math-emu/fabs.c
+++ b/arch/powerpc/math-emu/fabs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int
 fabs(u32 *frD, u32 *frB)
diff --git a/arch/powerpc/math-emu/fadd.c b/arch/powerpc/math-emu/fadd.c
index 0158a16e2b82..29de37e0e0da 100644
--- a/arch/powerpc/math-emu/fadd.c
+++ b/arch/powerpc/math-emu/fadd.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fadds.c b/arch/powerpc/math-emu/fadds.c
index 5930f40a8687..7093c5b58002 100644
--- a/arch/powerpc/math-emu/fadds.c
+++ b/arch/powerpc/math-emu/fadds.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fcmpo.c b/arch/powerpc/math-emu/fcmpo.c
index 5bce011c2aec..5d644467221c 100644
--- a/arch/powerpc/math-emu/fcmpo.c
+++ b/arch/powerpc/math-emu/fcmpo.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fcmpu.c b/arch/powerpc/math-emu/fcmpu.c
index d4fb1babc6ad..0f9bf4864832 100644
--- a/arch/powerpc/math-emu/fcmpu.c
+++ b/arch/powerpc/math-emu/fcmpu.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fctiw.c b/arch/powerpc/math-emu/fctiw.c
index f694440ddc00..716d6da7f204 100644
--- a/arch/powerpc/math-emu/fctiw.c
+++ b/arch/powerpc/math-emu/fctiw.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fctiwz.c b/arch/powerpc/math-emu/fctiwz.c
index 71e782fd4fe3..7212fa7cfd36 100644
--- a/arch/powerpc/math-emu/fctiwz.c
+++ b/arch/powerpc/math-emu/fctiwz.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fdiv.c b/arch/powerpc/math-emu/fdiv.c
index a29239c05e3e..e1e452069e49 100644
--- a/arch/powerpc/math-emu/fdiv.c
+++ b/arch/powerpc/math-emu/fdiv.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fdivs.c b/arch/powerpc/math-emu/fdivs.c
index 526bc261275f..5511e2d1c3ad 100644
--- a/arch/powerpc/math-emu/fdivs.c
+++ b/arch/powerpc/math-emu/fdivs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmadd.c b/arch/powerpc/math-emu/fmadd.c
index 8c3f20aa5a95..2b6fae0bc8c2 100644
--- a/arch/powerpc/math-emu/fmadd.c
+++ b/arch/powerpc/math-emu/fmadd.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmadds.c b/arch/powerpc/math-emu/fmadds.c
index 794fb31e59d1..aff35f24a236 100644
--- a/arch/powerpc/math-emu/fmadds.c
+++ b/arch/powerpc/math-emu/fmadds.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmr.c b/arch/powerpc/math-emu/fmr.c
index bd55384b8196..f6347911f6a3 100644
--- a/arch/powerpc/math-emu/fmr.c
+++ b/arch/powerpc/math-emu/fmr.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int
 fmr(u32 *frD, u32 *frB)
diff --git a/arch/powerpc/math-emu/fmsub.c b/arch/powerpc/math-emu/fmsub.c
index 626f6fed84ac..1fb26cebe04e 100644
--- a/arch/powerpc/math-emu/fmsub.c
+++ b/arch/powerpc/math-emu/fmsub.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmsubs.c b/arch/powerpc/math-emu/fmsubs.c
index 3425bc899760..f73965453e05 100644
--- a/arch/powerpc/math-emu/fmsubs.c
+++ b/arch/powerpc/math-emu/fmsubs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmul.c b/arch/powerpc/math-emu/fmul.c
index 2c1929779892..ffd31b549290 100644
--- a/arch/powerpc/math-emu/fmul.c
+++ b/arch/powerpc/math-emu/fmul.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmuls.c b/arch/powerpc/math-emu/fmuls.c
index f5ad5c9c77d0..21aee431ca9d 100644
--- a/arch/powerpc/math-emu/fmuls.c
+++ b/arch/powerpc/math-emu/fmuls.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fnabs.c b/arch/powerpc/math-emu/fnabs.c
index a7d34f3d9499..af877a53d264 100644
--- a/arch/powerpc/math-emu/fnabs.c
+++ b/arch/powerpc/math-emu/fnabs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int
 fnabs(u32 *frD, u32 *frB)
diff --git a/arch/powerpc/math-emu/fneg.c b/arch/powerpc/math-emu/fneg.c
index 1e988cd9c6cc..8417d174758c 100644
--- a/arch/powerpc/math-emu/fneg.c
+++ b/arch/powerpc/math-emu/fneg.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int
 fneg(u32 *frD, u32 *frB)
diff --git a/arch/powerpc/math-emu/fnmadd.c b/arch/powerpc/math-emu/fnmadd.c
index e817bc5453ef..6316ef0e0874 100644
--- a/arch/powerpc/math-emu/fnmadd.c
+++ b/arch/powerpc/math-emu/fnmadd.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fnmadds.c b/arch/powerpc/math-emu/fnmadds.c
index 4db4b7d9ba8d..9ffe037df2b9 100644
--- a/arch/powerpc/math-emu/fnmadds.c
+++ b/arch/powerpc/math-emu/fnmadds.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fnmsub.c b/arch/powerpc/math-emu/fnmsub.c
index f65979fa770e..f97a9cfb54ea 100644
--- a/arch/powerpc/math-emu/fnmsub.c
+++ b/arch/powerpc/math-emu/fnmsub.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fnmsubs.c b/arch/powerpc/math-emu/fnmsubs.c
index 9021dacc03b8..7fa1217bd930 100644
--- a/arch/powerpc/math-emu/fnmsubs.c
+++ b/arch/powerpc/math-emu/fnmsubs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c
index 49ccf2cc6a5a..b621a790aa67 100644
--- a/arch/powerpc/math-emu/fre.c
+++ b/arch/powerpc/math-emu/fre.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int fre(void *frD, void *frB)
 {
diff --git a/arch/powerpc/math-emu/fres.c b/arch/powerpc/math-emu/fres.c
index 10ecbd08b79e..211c30d0145f 100644
--- a/arch/powerpc/math-emu/fres.c
+++ b/arch/powerpc/math-emu/fres.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int
 fres(void *frD, void *frB)
diff --git a/arch/powerpc/math-emu/frsp.c b/arch/powerpc/math-emu/frsp.c
index ddcc14664b1a..3e3bc73e27ae 100644
--- a/arch/powerpc/math-emu/frsp.c
+++ b/arch/powerpc/math-emu/frsp.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/frsqrte.c b/arch/powerpc/math-emu/frsqrte.c
index 1d0a3a0fd0e6..7c2ce43750dc 100644
--- a/arch/powerpc/math-emu/frsqrte.c
+++ b/arch/powerpc/math-emu/frsqrte.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int
 frsqrte(void *frD, void *frB)
diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c
index 7e838e380314..269951a8c650 100644
--- a/arch/powerpc/math-emu/frsqrtes.c
+++ b/arch/powerpc/math-emu/frsqrtes.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int frsqrtes(void *frD, void *frB)
 {
diff --git a/arch/powerpc/math-emu/fsel.c b/arch/powerpc/math-emu/fsel.c
index 1b0c14498032..32b62c6c7f48 100644
--- a/arch/powerpc/math-emu/fsel.c
+++ b/arch/powerpc/math-emu/fsel.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fsqrt.c b/arch/powerpc/math-emu/fsqrt.c
index a55fc7d49983..0e2a34b616dc 100644
--- a/arch/powerpc/math-emu/fsqrt.c
+++ b/arch/powerpc/math-emu/fsqrt.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fsqrts.c b/arch/powerpc/math-emu/fsqrts.c
index 31dccbfc39ff..420cf19b5fd4 100644
--- a/arch/powerpc/math-emu/fsqrts.c
+++ b/arch/powerpc/math-emu/fsqrts.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fsub.c b/arch/powerpc/math-emu/fsub.c
index 02c5dff458ba..feedd705cf62 100644
--- a/arch/powerpc/math-emu/fsub.c
+++ b/arch/powerpc/math-emu/fsub.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fsubs.c b/arch/powerpc/math-emu/fsubs.c
index 5d9b18c35e07..74190514063e 100644
--- a/arch/powerpc/math-emu/fsubs.c
+++ b/arch/powerpc/math-emu/fsubs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/lfd.c b/arch/powerpc/math-emu/lfd.c
index 79ac76d596c3..d998a50740a0 100644
--- a/arch/powerpc/math-emu/lfd.c
+++ b/arch/powerpc/math-emu/lfd.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/double.h>
diff --git a/arch/powerpc/math-emu/lfs.c b/arch/powerpc/math-emu/lfs.c
index 434ed27be8db..1ee10b83d7e3 100644
--- a/arch/powerpc/math-emu/lfs.c
+++ b/arch/powerpc/math-emu/lfs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
index ab151f040502..76ee2e5dba65 100644
--- a/arch/powerpc/math-emu/math.c
+++ b/arch/powerpc/math-emu/math.c
@@ -5,7 +5,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/reg.h>
 #include <asm/switch_to.h>
 
diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c
index 28337c9709ae..581f404caa1d 100644
--- a/arch/powerpc/math-emu/math_efp.c
+++ b/arch/powerpc/math-emu/math_efp.c
@@ -22,7 +22,7 @@
 #include <linux/types.h>
 #include <linux/prctl.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/reg.h>
 
 #define FP_EX_BOOKE_E500_SPE
diff --git a/arch/powerpc/math-emu/mcrfs.c b/arch/powerpc/math-emu/mcrfs.c
index e948d5708e2b..8e8e72397ebc 100644
--- a/arch/powerpc/math-emu/mcrfs.c
+++ b/arch/powerpc/math-emu/mcrfs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mffs.c b/arch/powerpc/math-emu/mffs.c
index 5526cf96ede5..e00fdc22a0bc 100644
--- a/arch/powerpc/math-emu/mffs.c
+++ b/arch/powerpc/math-emu/mffs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mtfsb0.c b/arch/powerpc/math-emu/mtfsb0.c
index bc985585bca8..5ed3e7d5063e 100644
--- a/arch/powerpc/math-emu/mtfsb0.c
+++ b/arch/powerpc/math-emu/mtfsb0.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mtfsb1.c b/arch/powerpc/math-emu/mtfsb1.c
index fe6ed5ac85b3..602aa16eda81 100644
--- a/arch/powerpc/math-emu/mtfsb1.c
+++ b/arch/powerpc/math-emu/mtfsb1.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mtfsf.c b/arch/powerpc/math-emu/mtfsf.c
index 44b0fc8214f4..b0d5593ad357 100644
--- a/arch/powerpc/math-emu/mtfsf.c
+++ b/arch/powerpc/math-emu/mtfsf.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mtfsfi.c b/arch/powerpc/math-emu/mtfsfi.c
index fd2acc26813b..5df30541a784 100644
--- a/arch/powerpc/math-emu/mtfsfi.c
+++ b/arch/powerpc/math-emu/mtfsfi.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/stfd.c b/arch/powerpc/math-emu/stfd.c
index 33a165c8df0f..6baeaec134a2 100644
--- a/arch/powerpc/math-emu/stfd.c
+++ b/arch/powerpc/math-emu/stfd.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int
 stfd(void *frS, void *ea)
diff --git a/arch/powerpc/math-emu/stfiwx.c b/arch/powerpc/math-emu/stfiwx.c
index f15a35f67e2c..9da7c5d1a872 100644
--- a/arch/powerpc/math-emu/stfiwx.c
+++ b/arch/powerpc/math-emu/stfiwx.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int
 stfiwx(u32 *frS, void *ea)
diff --git a/arch/powerpc/math-emu/stfs.c b/arch/powerpc/math-emu/stfs.c
index 6122147356d1..62bd25264fb5 100644
--- a/arch/powerpc/math-emu/stfs.c
+++ b/arch/powerpc/math-emu/stfs.c
@@ -1,6 +1,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sfp-machine.h>
 #include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 31a5d42df8c9..61ac468c87c6 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -43,7 +43,7 @@
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/smp.h>
 #include <asm/bootx.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 139dec421e57..080d49b26c3a 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -48,7 +48,7 @@
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/smp.h>
 #include <asm/machdep.h>
 #include <asm/setup.h>
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 8410b4bb36ed..80334937e14f 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -42,7 +42,7 @@
 #include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/machdep.h>
 #include <asm/prom.h>
 #include <asm/tlbflush.h>
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a000c3585390..93abf8a9813d 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -51,7 +51,7 @@
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/smp.h>
 #include <asm/machdep.h>
 #include <asm/tlb.h>
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index d5543514c1df..5c096c01e8bd 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -15,7 +15,7 @@
 #include <linux/hugetlb.h>
 
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/tlbflush.h>
 
 /*
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 85c85eb3e245..e5a891ae80ee 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -30,7 +30,7 @@
 #include <linux/coredump.h>
 #include <linux/binfmts.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "spufs.h"
 
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 3a147122bc98..a35e2c29d7ee 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -35,7 +35,7 @@
 #include <asm/time.h>
 #include <asm/spu.h>
 #include <asm/spu_info.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "spufs.h"
 #include "sputrace.h"
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 5364d4a54249..d8af9bc0489f 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -38,7 +38,7 @@
 #include <asm/prom.h>
 #include <asm/spu.h>
 #include <asm/spu_priv1.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "spufs.h"
 
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index a87200a535fa..0d290ea83dc1 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -5,7 +5,7 @@
 #include <linux/namei.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "spufs.h"
 
diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
index 9ef8cc3378d0..c3ede2c365c3 100644
--- a/arch/powerpc/platforms/chrp/nvram.c
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -13,7 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/rtas.h>
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index f2344cbd2f46..ecd6d9177d13 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -18,7 +18,7 @@
 #include <linux/vmalloc.h>
 #include <linux/fcntl.h>
 #include <linux/kobject.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/opal.h>
 
 struct elog_obj {
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index e4169d68cb32..4886eb8b6381 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -21,7 +21,7 @@
 #include <asm/xics.h>
 #include <asm/opal.h>
 #include <asm/prom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/debug.h>
 
 static int opal_lpc_chip_id = -1;
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index e315e704cca7..2d6ee1c5ad85 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -29,7 +29,7 @@
 #include <asm/opal-prd.h>
 #include <asm/opal.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 /**
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 972328829387..4839db385bb0 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -37,7 +37,7 @@
 #include <asm/hvcall.h>
 #include <asm/mmu.h>
 #include <asm/pgalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/memory.h>
 #include <asm/plpar_wrappers.h>
 
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 76caa4a45ccd..5cb2e4beffc5 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -24,7 +24,7 @@
 
 #include <asm/prom.h>
 #include <asm/machdep.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/rtas.h>
 
 static struct workqueue_struct *pseries_hp_wq;
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 39049e4884fb..6b04e3f0f982 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -24,7 +24,7 @@
 #include <linux/debugfs.h>
 #include <linux/spinlock.h>
 #include <asm/smp.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/firmware.h>
 #include <asm/lppaca.h>
 #include <asm/debug.h>
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index e6397976060e..779fc2a1c8f7 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -25,7 +25,7 @@
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/lppaca.h>
 #include <asm/hvcall.h>
 #include <asm/firmware.h>
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 79aef8c1c5b3..69cedc1b3b8a 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -18,7 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/nvram.h>
 #include <asm/rtas.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index cc66c49f07aa..e5bf1e84047f 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -19,7 +19,7 @@
 
 #include <asm/prom.h>
 #include <asm/machdep.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu.h>
 
 #include "of_helpers.h"
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index 7d28cabf1206..c47585a78b69 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -27,7 +27,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/rtas.h>
 #include <asm/prom.h>
 
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index 6f5a8d177c42..d0e9f178a324 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -25,7 +25,7 @@
 #include <asm/debug.h>
 #include <asm/prom.h>
 #include <asm/scom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 const struct scom_controller *scom_controller;
 EXPORT_SYMBOL_GPL(scom_controller);
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 53a16aa4d384..5692dd569b9b 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -30,7 +30,7 @@
 #include <asm/byteorder.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/machdep.h>
 #include <asm/pci-bridge.h>
 #include <asm/tsi108.h>
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index f587c4811faf..5a8dfa22da7c 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -28,7 +28,7 @@
 #include <linux/platform_device.h>
 #include <asm/appldata.h>
 #include <asm/vtimer.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index 4da604ebf6fd..8515dd5a5663 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -6,7 +6,7 @@
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/sclp.h>
 #include <asm/ipl.h>
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 1113389d0a39..daf9bb063aaa 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -21,7 +21,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <asm/debug.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/timex.h>
 #include <asm/cpacf.h>
 
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index d7f100c53f07..12bf4fef2a68 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -11,7 +11,7 @@
 #ifndef _S390_CHECKSUM_H
 #define _S390_CHECKSUM_H
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * computes the checksum of a memory block at buff, length len,
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index a7b2d7504049..280b60a0bcd4 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -17,7 +17,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <asm/cio.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
 #define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 515fea5a3fc4..67f7a991c929 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -8,7 +8,7 @@
 #define __S390_MMU_CONTEXT_H
 
 #include <asm/pgalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/tlbflush.h>
 #include <asm/ctl_reg.h>
 
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 0f9cd90c11af..96df4547377a 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -51,7 +51,7 @@
 #include <linux/slab.h>
 
 #include <asm/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/scm.h>
 #include <net/sock.h>
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 6f2a6ab13cb5..362350cc485c 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -23,7 +23,7 @@
 #include <linux/personality.h>
 #include <linux/binfmts.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/lowcore.h>
 #include <asm/switch_to.h>
 #include "compat_linux.h"
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index aa12de72fd47..79f8ae933520 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -19,7 +19,7 @@
 #include <linux/ctype.h>
 #include <linux/string.h>
 #include <linux/sysctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/fs.h>
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index c74c59236f44..9f017cf417f6 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -22,7 +22,7 @@
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/dis.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index fdb40424acfe..84e0557b16fe 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -33,7 +33,7 @@
 #include <linux/ftrace.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/dis.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index b81ab8882e2e..7447ba509c30 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -26,7 +26,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/switch_to.h>
 #include "entry.h"
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 9f241d1efeda..62a4c263e887 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -26,7 +26,7 @@
 #include <linux/syscalls.h>
 #include <linux/compat.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/lowcore.h>
 #include <asm/switch_to.h>
 #include "entry.h"
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index f145490cce54..b7af452978ca 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -27,7 +27,7 @@
 #include <linux/personality.h>
 #include <linux/unistd.h>
 #include <linux/ipc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "entry.h"
 
 /*
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 867d0a057046..ec76315c9ee5 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -38,7 +38,7 @@
 #include <linux/clockchips.h>
 #include <linux/gfp.h>
 #include <linux/kprobes.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/facility.h>
 #include <asm/delay.h>
 #include <asm/div64.h>
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index d0539f76fd24..283ad7840335 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -19,7 +19,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/fpu/api.h>
 #include "entry.h"
 
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index af13f1a135b6..6843dd5a1cba 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -20,7 +20,7 @@
 #include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/dis.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sclp.h>
 #include <asm/isc.h>
 #include <asm/gmap.h>
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index b3e9d18f2ec6..b67454ad8408 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -29,7 +29,7 @@
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/dma.h>
diff --git a/arch/score/include/asm/checksum.h b/arch/score/include/asm/checksum.h
index 539d9fd45d21..0338927f4826 100644
--- a/arch/score/include/asm/checksum.h
+++ b/arch/score/include/asm/checksum.h
@@ -2,7 +2,7 @@
 #define _ASM_SCORE_CHECKSUM_H
 
 #include <linux/in6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * computes the checksum of a memory block at buff, length len,
diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c
index 4f7314d5f334..8b75e54816c1 100644
--- a/arch/score/kernel/ptrace.c
+++ b/arch/score/kernel/ptrace.c
@@ -29,7 +29,7 @@
 #include <linux/ptrace.h>
 #include <linux/regset.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * retrieve the contents of SCORE userspace general registers
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index 5cea1e750cec..d948a6818961 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -29,7 +29,7 @@
 #include <asm/cacheflush.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 unsigned long exception_handlers[32];
 
diff --git a/arch/score/lib/checksum_copy.c b/arch/score/lib/checksum_copy.c
index 9b770b30e8a5..39b99ef61804 100644
--- a/arch/score/lib/checksum_copy.c
+++ b/arch/score/lib/checksum_copy.c
@@ -25,7 +25,7 @@
 
 #include <net/checksum.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 unsigned int csum_partial_copy(const char *src, char *dst,
 				int len, unsigned int sum)
diff --git a/arch/sh/boards/mach-landisk/gio.c b/arch/sh/boards/mach-landisk/gio.c
index 8132dff078fb..32c317f5d991 100644
--- a/arch/sh/boards/mach-landisk/gio.c
+++ b/arch/sh/boards/mach-landisk/gio.c
@@ -18,7 +18,7 @@
 #include <linux/cdev.h>
 #include <linux/fs.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <mach-landisk/mach/gio.h>
 #include <mach-landisk/mach/iodata_landisk.h>
 
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c
index 208a9753ab38..ae1dfdb0013b 100644
--- a/arch/sh/boot/compressed/misc.c
+++ b/arch/sh/boot/compressed/misc.c
@@ -11,7 +11,7 @@
  * Modified to use standard LinuxSH BIOS by Greg Banks 7Jul2000
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/addrspace.h>
 #include <asm/page.h>
 
diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h
index 9f417feaf6e8..35ffdd081d26 100644
--- a/arch/sh/include/asm/mmu_context.h
+++ b/arch/sh/include/asm/mmu_context.h
@@ -10,7 +10,7 @@
 #ifdef __KERNEL__
 #include <cpu/mmu_context.h>
 #include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm-generic/mm_hooks.h>
 
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index c8b3be1b54e6..c4f01c5c8736 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -16,7 +16,7 @@
 #include <linux/log2.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 #include <asm/cache.h>
diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c
index 53b8eeb1db20..c32e66079f7c 100644
--- a/arch/sh/kernel/cpu/shmobile/cpuidle.c
+++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c
@@ -16,7 +16,7 @@
 #include <linux/cpuidle.h>
 #include <linux/export.h>
 #include <asm/suspend.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static unsigned long cpuidle_mode[] = {
 	SUSP_SH_SLEEP, /* regular sleep mode */
diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c
index ac37b7234f85..fba2be5d72e9 100644
--- a/arch/sh/kernel/cpu/shmobile/pm.c
+++ b/arch/sh/kernel/cpu/shmobile/pm.c
@@ -14,7 +14,7 @@
 #include <linux/io.h>
 #include <linux/suspend.h>
 #include <asm/suspend.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/bl_bit.h>
 
diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c
index 569e7b171c01..b33be505361e 100644
--- a/arch/sh/kernel/crash_dump.c
+++ b/arch/sh/kernel/crash_dump.c
@@ -7,7 +7,7 @@
 #include <linux/errno.h>
 #include <linux/crash_dump.h>
 #include <linux/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /**
  * copy_oldmem_page - copy one page from "oldmem"
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index f8ce36286cea..4d4e7a2a774b 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <asm/mmu_context.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/io_trapped.h>
 
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 6c0378c0b8b5..bc3591125df7 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -16,7 +16,7 @@
 #include <linux/ratelimit.h>
 #include <asm/processor.h>
 #include <asm/machvec.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/thread_info.h>
 #include <cpu/mmu_context.h>
 
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 83acbf3f6de8..1653ff64b103 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -15,7 +15,7 @@
 #include <linux/kdebug.h>
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index ee12e9451874..51741850a715 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -23,7 +23,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/prefetch.h>
 #include <linux/stackprotector.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/fpu.h>
 #include <asm/syscalls.h>
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 9d3e9916555d..e0b271bffd6a 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -26,7 +26,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <asm/syscalls.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index c1a6b89bfe70..1aabfd356b35 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -26,7 +26,7 @@
 #include <linux/elf.h>
 #include <linux/regset.h>
 #include <linux/hw_breakpoint.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index 5cea973a65b2..c49d0d05a215 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -32,7 +32,7 @@
 #include <linux/elf.h>
 #include <linux/regset.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index e7b49d81053e..3a44c753b642 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -31,7 +31,7 @@
 #include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/elf.h>
diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c
index 26a0774f5272..6ee3740e009e 100644
--- a/arch/sh/kernel/sh_ksyms_64.c
+++ b/arch/sh/kernel/sh_ksyms_64.c
@@ -18,7 +18,7 @@
 #include <linux/screen_info.h>
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/checksum.h>
 #include <asm/io.h>
 #include <asm/delay.h>
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index f7c3d5c25caf..5128d3001ee5 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -25,7 +25,7 @@
 #include <linux/io.h>
 #include <linux/tracehook.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 #include <asm/syscalls.h>
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index d8a3f0d22809..7b77f1812434 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -23,7 +23,7 @@
 #include <linux/stddef.h>
 #include <linux/tracehook.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index 8c6a350df751..6576e5ee1fc3 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -23,7 +23,7 @@
 #include <linux/fs.h>
 #include <linux/ipc.h>
 #include <asm/syscalls.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/cacheflush.h>
 #include <asm/cachectl.h>
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index b66d1c62eb19..d5287d76809c 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -13,7 +13,7 @@
 #include <linux/fs.h>
 #include <linux/ipc.h>
 #include <asm/cacheflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/syscalls.h>
 
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index d208c27ccc67..00835edb6e20 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -25,7 +25,7 @@
 #include <linux/sysctl.h>
 #include <linux/module.h>
 #include <linux/perf_event.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/alignment.h>
 #include <asm/processor.h>
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index 04aa55fa8c75..5078cb809750 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -14,7 +14,7 @@
 #include <linux/signal.h>
 #include <linux/perf_event.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>
 #include <asm/io.h>
 
diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c
index 777e50f33c00..4eb9d43578b4 100644
--- a/arch/sh/mm/cache-debugfs.c
+++ b/arch/sh/mm/cache-debugfs.c
@@ -12,7 +12,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cache.h>
 #include <asm/io.h>
 
diff --git a/arch/sh/mm/cache-sh3.c b/arch/sh/mm/cache-sh3.c
index e37523f65195..031634f273fa 100644
--- a/arch/sh/mm/cache-sh3.c
+++ b/arch/sh/mm/cache-sh3.c
@@ -17,7 +17,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index d1bffbcd9d52..d94dadedf74f 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -17,7 +17,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/pgalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 
 extern void __weak sh4__flush_region_init(void);
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 7729cca727eb..6cd2aa395817 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -20,7 +20,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
diff --git a/arch/sh/mm/extable_32.c b/arch/sh/mm/extable_32.c
index c1cf4463d09d..9cfcbb5848e4 100644
--- a/arch/sh/mm/extable_32.c
+++ b/arch/sh/mm/extable_32.c
@@ -5,7 +5,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
diff --git a/arch/sh/mm/extable_64.c b/arch/sh/mm/extable_64.c
index f05499688d88..96edaff8c983 100644
--- a/arch/sh/mm/extable_64.c
+++ b/arch/sh/mm/extable_64.c
@@ -12,7 +12,7 @@
  */
 #include <linux/rwsem.h>
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern unsigned long copy_user_memcpy, copy_user_memcpy_end;
 extern void __copy_user_fixup(void);
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
index 36312d254faf..82f8197b93f6 100644
--- a/arch/sh/mm/nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -14,7 +14,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Nothing too terribly exciting here ..
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index 7160c9fd6fe3..7b2cc490ebb7 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -25,7 +25,7 @@
 #include <linux/vmalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/sizes.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
index 6554fb439f0e..5c66665bff8b 100644
--- a/arch/sh/mm/tlb-sh3.c
+++ b/arch/sh/mm/tlb-sh3.c
@@ -21,7 +21,7 @@
 #include <linux/interrupt.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
diff --git a/arch/sh/mm/tlbex_64.c b/arch/sh/mm/tlbex_64.c
index 8557548fc53e..8ff966dd0c74 100644
--- a/arch/sh/mm/tlbex_64.c
+++ b/arch/sh/mm/tlbex_64.c
@@ -36,7 +36,7 @@
 #include <linux/kprobes.h>
 #include <asm/tlb.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index f33fdd2558e8..bd0715d5dca4 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -24,7 +24,7 @@
 #include <linux/interrupt.h>
 #include <asm/io.h>
 #include <asm/tlb.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 
diff --git a/arch/sh/oprofile/backtrace.c b/arch/sh/oprofile/backtrace.c
index 9c88dcd56e86..c7695f99c8c3 100644
--- a/arch/sh/oprofile/backtrace.c
+++ b/arch/sh/oprofile/backtrace.c
@@ -19,7 +19,7 @@
 #include <linux/mm.h>
 #include <asm/unwinder.h>
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sections.h>
 #include <asm/stacktrace.h>
 
diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h
index eff748c871ec..e25af5fc99fd 100644
--- a/arch/sparc/include/asm/checksum_32.h
+++ b/arch/sparc/include/asm/checksum_32.h
@@ -16,7 +16,7 @@
  */
 
 #include <linux/in6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* computes the checksum of a memory block at buff, length len,
  * and adds in "sum" (32-bit)
diff --git a/arch/sparc/include/asm/checksum_64.h b/arch/sparc/include/asm/checksum_64.h
index 0395d75322e9..96a5ed58cea6 100644
--- a/arch/sparc/include/asm/checksum_64.h
+++ b/arch/sparc/include/asm/checksum_64.h
@@ -16,7 +16,7 @@
  */
 
 #include <linux/in6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* computes the checksum of a memory block at buff, length len,
  * and adds in "sum" (32-bit)
diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index 742f6c4436bf..9ebc37e7d64c 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -17,7 +17,7 @@
 
 #include <asm/io.h>
 #include <asm/oplib.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/auxio.h>
 #include <asm/apc.h>
 #include <asm/processor.h>
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 34a7930b76ef..3bebf395252c 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -35,7 +35,7 @@
 #include <asm/timer.h>
 #include <asm/smp.h>
 #include <asm/starfire.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cache.h>
 #include <asm/cpudata.h>
 #include <asm/auxio.h>
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index b0377db12d83..2d13a4fc0384 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -11,7 +11,7 @@
 #include <linux/context_tracking.h>
 #include <asm/signal.h>
 #include <asm/cacheflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* We do not have hardware single-stepping on sparc64.
  * So we implement software single-stepping with breakpoint
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 8a6982dfd733..c0765bbf60ea 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -17,7 +17,7 @@
 #include <asm/hypervisor.h>
 #include <asm/mdesc.h>
 #include <asm/prom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/oplib.h>
 #include <asm/smp.h>
 
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 9c1878f4fa9f..015e55a7495d 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -21,7 +21,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/irq.h>
 #include <asm/prom.h>
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 24384e1dc33d..a38787b84322 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -33,7 +33,7 @@
 #include <asm/pcic.h>
 #include <asm/timex.h>
 #include <asm/timer.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq_regs.h>
 
 #include "kernel.h"
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c
index 97d123107ecb..f12b23f7b515 100644
--- a/arch/sparc/kernel/pmc.c
+++ b/arch/sparc/kernel/pmc.c
@@ -15,7 +15,7 @@
 
 #include <asm/io.h>
 #include <asm/oplib.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/auxio.h>
 #include <asm/processor.h>
 
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index b7780a5bef11..48ffc3e7d1dd 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -28,7 +28,7 @@
 
 #include <asm/auxio.h>
 #include <asm/oplib.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 47ff5588e521..d249ca10b203 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -33,7 +33,7 @@
 #include <linux/nmi.h>
 #include <linux/context_tracking.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c
index a331fdc11a2c..eca3dc76793c 100644
--- a/arch/sparc/kernel/ptrace_32.c
+++ b/arch/sparc/kernel/ptrace_32.c
@@ -23,7 +23,7 @@
 #include <linux/tracehook.h>
 
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 
 #include "kernel.h"
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 96494b2ef41f..901063c1cf7e 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -31,7 +31,7 @@
 
 #include <asm/asi.h>
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/psrcompat.h>
 #include <asm/visasm.h>
 #include <asm/spitfire.h>
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 91cc2f4ae4d9..b4096bb665b2 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -21,7 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/tracehook.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/pgtable.h>
 #include <asm/psrcompat.h>
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 9c0c8fd0b292..62c3e255ae7c 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -20,7 +20,7 @@
 #include <linux/bitops.h>
 #include <linux/tracehook.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index c782c9b716db..965d50e833e7 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -25,7 +25,7 @@
 #include <linux/bitops.h>
 #include <linux/context_tracking.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/pgtable.h>
 #include <asm/fpumacro.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 8182f7caf5b1..0ce347f8e4cc 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -43,7 +43,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/oplib.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/starfire.h>
 #include <asm/tlb.h>
 #include <asm/sections.h>
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index 022c30c72ebd..bca44f3e6b86 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -44,7 +44,7 @@
 #include <linux/slab.h>
 
 #include <asm/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/fpumacro.h>
 #include <asm/mmu_context.h>
 #include <asm/compat_signal.h>
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 646988d4c1a3..fb7b185ee941 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -21,7 +21,7 @@
 #include <linux/smp.h>
 #include <linux/ipc.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 #include "systbls.h"
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index fe8b8ee8e660..884c70331345 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -26,7 +26,7 @@
 #include <linux/export.h>
 #include <linux/context_tracking.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/utrap.h>
 #include <asm/unistd.h>
 
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index c69b21e51efc..807f7e2ce014 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -45,7 +45,7 @@
 #include <asm/smp.h>
 #include <asm/sections.h>
 #include <asm/cpudata.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq_regs.h>
 
 #include "entry.h"
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 496fa926e1e0..4bc10e44d1ca 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -29,7 +29,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/unistd.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/fpumacro.h>
 #include <asm/lsu.h>
 #include <asm/dcu.h>
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index 32b61d1b6379..d20d4e3fd129 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -12,7 +12,7 @@
 #include <linux/mm.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/smp.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index 52c00d90d4b4..cda7fd367c4f 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -16,7 +16,7 @@
 #include <asm/ptrace.h>
 #include <asm/pstate.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/smp.h>
 #include <linux/bitops.h>
 #include <linux/perf_event.h>
diff --git a/arch/sparc/kernel/uprobes.c b/arch/sparc/kernel/uprobes.c
index b68314050602..d852ae56ddc1 100644
--- a/arch/sparc/kernel/uprobes.c
+++ b/arch/sparc/kernel/uprobes.c
@@ -29,7 +29,7 @@
 #include <linux/kdebug.h>
 
 #include <asm/cacheflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Compute the address of the breakpoint instruction and return it.
  *
diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c
index c096c624ac4d..c4ac58e483a4 100644
--- a/arch/sparc/kernel/visemul.c
+++ b/arch/sparc/kernel/visemul.c
@@ -10,7 +10,7 @@
 #include <asm/ptrace.h>
 #include <asm/pstate.h>
 #include <asm/fpumacro.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 
 /* OPF field of various VIS instructions.  */
diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c
index 87bab0a3857a..435a467b0595 100644
--- a/arch/sparc/kernel/windows.c
+++ b/arch/sparc/kernel/windows.c
@@ -11,7 +11,7 @@
 #include <linux/smp.h>
 
 #include <asm/cacheflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "kernel.h"
 
diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c
index 5ce8f2f64604..4d7e0fff054f 100644
--- a/arch/sparc/math-emu/math_32.c
+++ b/arch/sparc/math-emu/math_32.c
@@ -68,7 +68,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/perf_event.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "sfp-util_32.h"
 #include <math-emu/soft-fp.h>
diff --git a/arch/sparc/math-emu/math_64.c b/arch/sparc/math-emu/math_64.c
index 034aadbff036..9647051853d3 100644
--- a/arch/sparc/math-emu/math_64.c
+++ b/arch/sparc/math-emu/math_64.c
@@ -15,7 +15,7 @@
 
 #include <asm/fpumacro.h>
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 
 #include "sfp-util_64.h"
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
index a61c349448e1..768a11e6bd4f 100644
--- a/arch/sparc/mm/extable.c
+++ b/arch/sparc/mm/extable.c
@@ -3,7 +3,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 37aa537b3ad8..5d2f91511c60 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -35,7 +35,7 @@
 #include <asm/oplib.h>
 #include <asm/iommu.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
 #include <asm/dma.h>
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 9f37106ef93a..c84c54a1ac55 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -35,7 +35,7 @@
 #include <asm/syscalls.h>
 #include <asm/traps.h>
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #ifdef CONFIG_HARDWALL
 #include <asm/hardwall.h>
 #endif
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 862973074bf9..de3eae813e52 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -25,7 +25,7 @@
 #include <linux/prctl.h>
 #include <asm/cacheflush.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 #include <arch/abi.h>
 #include <arch/spr_def.h>
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index 4fe78c5b8394..f229e979584e 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c
@@ -27,7 +27,7 @@
 #include <linux/prctl.h>
 #include <asm/cacheflush.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 #include <arch/abi.h>
 #include <arch/spr_def.h>
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 3282787bbcfb..6d381279b362 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -45,7 +45,7 @@
 #include <linux/mutex.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "mconsole.h"
 
 MODULE_LICENSE("GPL");
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 3a4b58730f5f..12bdb5996bf5 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -9,7 +9,7 @@
 #include <linux/sound.h>
 #include <linux/soundcard.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <init.h>
 #include <os.h>
 
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 8a6b57108ac2..8a4c72af3bc0 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -24,7 +24,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/switch_to.h>
 
 #include <init.h>
diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c
index 62145c276167..3645fcb2a787 100644
--- a/arch/um/drivers/mmapper_kern.c
+++ b/arch/um/drivers/mmapper_kern.c
@@ -17,7 +17,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <mem_user.h>
 
 /* These are set in mmapper_init, which is called at boot time */
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index dd16c902ff70..05523f14d7b2 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -12,7 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/miscdevice.h>
 #include <linux/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <irq_kern.h>
 #include <os.h>
 
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 0d7103c9eff3..770ec07b6a6a 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -11,7 +11,7 @@
 #include <linux/slab.h>
 #include <asm/current.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <as-layout.h>
 #include <mem_user.h>
 #include <skas.h>
diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c
index 41ebbfebb333..546302e3b7fb 100644
--- a/arch/um/kernel/exitcode.c
+++ b/arch/um/kernel/exitcode.c
@@ -10,7 +10,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * If read and write race, the read will still atomically read a valid
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 034b42c7ab40..078630d6448c 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -24,7 +24,7 @@
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <as-layout.h>
 #include <kern_util.h>
 #include <os.h>
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 6a826cbb15c4..bc2a516c190f 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -7,7 +7,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/tracehook.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ptrace-abi.h>
 
 void user_enable_single_step(struct task_struct *child)
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index c1d0ae069b53..6258676bed85 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -11,7 +11,7 @@
 #include <linux/syscalls.h>
 #include <asm/current.h>
 #include <asm/mman.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 long old_mmap(unsigned long addr, unsigned long len,
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index bdd9cc59d20f..b83c61cfd154 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -25,7 +25,7 @@
 #include <asm/desc.h>
 #include <asm/traps.h>
 #include <asm/vdso.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cpufeature.h>
 
 #define CREATE_TRACE_POINTS
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index cb26f18d43af..7c0a711989d2 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -27,7 +27,7 @@
 #include <linux/jiffies.h>
 #include <linux/perf_event.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/user32.h>
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index cb13c0564ea7..95c0b4ae09b0 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -20,7 +20,7 @@
 #include <linux/compat.h>
 #include <linux/binfmts.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/ptrace.h>
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 719cd702b0a4..47956c6a4fd8 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -42,7 +42,7 @@
 #include <linux/slab.h>
 #include <asm/mman.h>
 #include <asm/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 #include <asm/vgtod.h>
 #include <asm/sys_ia32.h>
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 44b8762fa0c7..830b19dbfa31 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -1,5 +1,5 @@
 #include <asm/ftrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/string.h>
 #include <asm/page.h>
 #include <asm/checksum.h>
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index c020ee75dce7..08e7efb2c140 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -8,7 +8,7 @@
  */
 
 #include <linux/compiler.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 /**
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f5fb840b43e8..33cbd3db97b9 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -7,7 +7,7 @@
 #include <linux/pfn.h>
 #include <linux/mm.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 643818a7688b..45d44c173cf9 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -234,7 +234,7 @@
 #include <linux/i8253.h>
 #include <linux/cpuidle.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/desc.h>
 #include <asm/olpc.h>
 #include <asm/paravirt.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c7efbcfbeda6..87cc9ab7a13c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/debugfs.h>
 #include <asm/mce.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "mce-internal.h"
 
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 11891ca7b716..538fedea9b3f 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -10,7 +10,7 @@
 #include <linux/highmem.h>
 #include <linux/crash_dump.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static void *kdump_buf_page;
 
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index f6dfd9334b67..b2f7207ba86c 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -3,7 +3,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index d9d8d16b69db..eb3509338ae0 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -56,7 +56,7 @@
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/alternative.h>
 #include <asm/insn.h>
 #include <asm/debugreg.h>
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 3bb4c5f021f6..3d1bee9d6a72 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -33,7 +33,7 @@
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/alternative.h>
 #include <asm/insn.h>
 #include <asm/debugreg.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 37363e46b1f0..b615a1113f58 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -23,7 +23,7 @@
 #include <asm/cpu.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mwait.h>
 #include <asm/fpu/internal.h>
 #include <asm/debugreg.h>
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0e63c0267f99..9cc7d5a330ef 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -24,7 +24,7 @@
 #include <linux/export.h>
 #include <linux/context_tracking.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/fpu/internal.h>
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 27538f183c3b..a3b875c9e6af 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -13,7 +13,7 @@
 #include <linux/sort.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/asm.h>
 
 extern int rodata_test_data;
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 9692a5e9fdab..6c8934406dc9 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -5,7 +5,7 @@
 #include <linux/regset.h>
 #include <linux/syscalls.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/desc.h>
 #include <asm/ldt.h>
 #include <asm/processor.h>
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 01f30e56f99e..ec5d7545e6dc 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -47,7 +47,7 @@
 #include <linux/slab.h>
 #include <linux/security.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/tlbflush.h>
 #include <asm/irq.h>
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 0b281217c890..1f65ff6540f0 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -11,7 +11,7 @@
 #include <linux/export.h>
 #include <linux/backing-dev.h>
 #include <linux/interrupt.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmx.h>
 #include <asm/asm.h>
 
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index 9e6545f269e5..2ccc424a57d9 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -19,7 +19,7 @@
 
 #include <linux/signal.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "fpu_emu.h"
 #include "fpu_system.h"
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index e945fedf1de2..0203baefb5c0 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -27,7 +27,7 @@
 #include <linux/signal.h>
 #include <linux/regset.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/traps.h>
 #include <asm/user.h>
 #include <asm/fpu/internal.h>
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index 8db26591d91a..b8ef9f9d2ffc 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -19,7 +19,7 @@
 
 #include <linux/stddef.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/vm86.h>
 
 #include "fpu_system.h"
diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c
index 95228ff042c0..1643054766eb 100644
--- a/arch/x86/math-emu/load_store.c
+++ b/arch/x86/math-emu/load_store.c
@@ -18,7 +18,7 @@
  |    other processes using the emulator while swapping is in progress.      |
  +---------------------------------------------------------------------------*/
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "fpu_system.h"
 #include "exception.h"
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index d597fe7423c9..2c98965a60ba 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -19,7 +19,7 @@
 
 #include "fpu_emu.h"
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "fpu_system.h"
 #include "exception.h"
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index fcd06f7526de..61a7e9ea9aa1 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,5 +1,5 @@
 #include <linux/extable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index cf8059016ec8..928d657de829 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -34,7 +34,7 @@
 #include <asm/asm.h>
 #include <asm/bios_ebda.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/dma.h>
 #include <asm/fixmap.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 963895f9af7f..af85b686a7b0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -36,7 +36,7 @@
 
 #include <asm/processor.h>
 #include <asm/bios_ebda.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/dma.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e3353c97d086..5a287e523eab 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -20,7 +20,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
 #include <asm/pat.h>
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index 60a5a5a85505..2497bac56066 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -5,7 +5,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ptrace-abi.h>
 #include <skas.h>
 
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index e30202b1716e..a5c9910d234f 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -10,7 +10,7 @@
 #include <linux/errno.h>
 #define __FRAME_OFFSETS
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ptrace-abi.h>
 
 /*
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 49e503697022..727ed442e0a5 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -9,7 +9,7 @@
 #include <linux/ptrace.h>
 #include <linux/kernel.h>
 #include <asm/unistd.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ucontext.h>
 #include <frame_kern.h>
 #include <skas.h>
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index 48e38584d5c1..5bd949da7a4a 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -6,7 +6,7 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/syscalls.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ptrace-abi.h>
 #include <os.h>
 #include <skas.h>
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 37129db76d33..276da636dd39 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -71,7 +71,7 @@
 
 #include <asm/cache.h>
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h
index ec35074fcb03..3ae74d7e074b 100644
--- a/arch/xtensa/include/asm/checksum.h
+++ b/arch/xtensa/include/asm/checksum.h
@@ -12,7 +12,7 @@
 #define _XTENSA_CHECKSUM_H
 
 #include <linux/in6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <variant/core.h>
 
 /*
diff --git a/arch/xtensa/include/asm/segment.h b/arch/xtensa/include/asm/segment.h
index a2eb547a1a75..98964ad15ca2 100644
--- a/arch/xtensa/include/asm/segment.h
+++ b/arch/xtensa/include/asm/segment.h
@@ -11,6 +11,6 @@
 #ifndef _XTENSA_SEGMENT_H
 #define _XTENSA_SEGMENT_H
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #endif	/* _XTENSA_SEGEMENT_H */
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 8e10e357ee32..bcb5beb81177 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -24,7 +24,7 @@
 
 #include <asm/ptrace.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int main(void)
 {
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 4ac3d23161cf..a265edd6ac37 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -25,7 +25,7 @@
 #include <linux/of.h>
 
 #include <asm/mxregs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/platform.h>
 
 DECLARE_PER_CPU(unsigned long, nmi_count);
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index e0ded48561db..826d25104846 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -35,7 +35,7 @@
 #include <linux/rcupdate.h>
 
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/platform.h>
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index a651f3a628ee..32519b71d914 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -29,7 +29,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 void user_enable_single_step(struct task_struct *child)
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index e87adaa07ff3..c41294745731 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -22,7 +22,7 @@
 #include <linux/tracehook.h>
 
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/coprocessor.h>
 #include <asm/unistd.h>
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 7538d802b65a..e7d30ee0fd48 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -14,7 +14,7 @@
 
 #include <asm/stacktrace.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #if IS_ENABLED(CONFIG_OPROFILE) || IS_ENABLED(CONFIG_PERF_EVENTS)
 
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index 83cf49685373..d3fd100dffc9 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -15,7 +15,7 @@
  * Kevin Chea
  *
  */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/syscall.h>
 #include <asm/unistd.h>
 #include <linux/linkage.h>
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index ce37d5b899fe..282bf721a4d6 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -35,7 +35,7 @@
 #include <asm/stacktrace.h>
 #include <asm/ptrace.h>
 #include <asm/timex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/traps.h>
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index 4d2872fd9bb5..d159e9b9c018 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -19,7 +19,7 @@
 #include <asm/irq.h>
 #include <linux/in6.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/checksum.h>
 #include <asm/dma.h>
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index c68f1e6158aa..0140a22551c8 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -20,7 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/serial.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq.h>
 
 #include <platform/simcall.h>
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index ede04cca30dd..02e94bb3ad3e 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -17,7 +17,7 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/proc_fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <platform/simcall.h>
 
 #define SIMDISK_MAJOR 240
diff --git a/block/ioctl.c b/block/ioctl.c
index 656c8c6ed206..be7f4de3eb3c 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -8,7 +8,7 @@
 #include <linux/fs.h>
 #include <linux/blktrace_api.h>
 #include <linux/pr.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
 {
diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c
index 47a61474e795..14b081af8d61 100644
--- a/block/partitions/ibm.c
+++ b/block/partitions/ibm.c
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 #include <asm/dasd.h>
 #include <asm/ebcdic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/vtoc.h>
 
 #include "check.h"
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index c6fee7437be4..c2b64923ab66 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -28,7 +28,7 @@
 #include <linux/slab.h>
 #include <linux/times.h>
 #include <linux/uio.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 201292e67ee8..d00bc0ef87a6 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -37,7 +37,7 @@
 #include <linux/suspend.h>
 #include <linux/acpi.h>
 #include <acpi/video.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define PREFIX "ACPI: "
 
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 05fe9ebfb9b5..4ef1e4624b2b 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -36,7 +36,7 @@
 #ifdef CONFIG_ACPI_PROCFS_POWER
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #endif
 
 #include <linux/acpi.h>
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index a404ff4d7151..57fb5f468ac2 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -42,7 +42,7 @@
 #include <linux/semaphore.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 
 #include "internal.h"
diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c
index 2a358154b770..a34669cc823b 100644
--- a/drivers/acpi/proc.c
+++ b/drivers/acpi/proc.c
@@ -4,7 +4,7 @@
 #include <linux/suspend.h>
 #include <linux/bcd.h>
 #include <linux/acpi.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "sleep.h"
 #include "internal.h"
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 1fed84a092c2..59c3a5d1e600 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -28,7 +28,7 @@
 #include <linux/cpufreq.h>
 #include <linux/acpi.h>
 #include <acpi/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define PREFIX "ACPI: "
 
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index d51ca1c05619..a12f96cc93ff 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -31,7 +31,7 @@
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define PREFIX "ACPI: "
 
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 35e8fbca10ad..1d0417b87cb7 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -40,7 +40,7 @@
 #include <linux/thermal.h>
 #include <linux/acpi.h>
 #include <linux/workqueue.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define PREFIX "ACPI: "
 
diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c
index f9b983ae6877..1fd25e872ece 100644
--- a/drivers/atm/adummy.c
+++ b/drivers/atm/adummy.c
@@ -16,7 +16,7 @@
 #include <linux/slab.h>
 #include <asm/io.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/atmdev.h>
 #include <linux/atm.h>
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index 480fa6ffbc09..3ef6253e1cce 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -10,7 +10,7 @@
 #include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 
 
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 40c2d561417b..c53a9dd1353f 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/string.h>
 #include <asm/byteorder.h>
 
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 85aaf2222587..80c2ddcfa92c 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -52,7 +52,7 @@
 #include <asm/string.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/wait.h>
 
 #include "firestream.h"
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 81aaa505862c..637c3e6b0f9e 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -43,7 +43,7 @@
 #include <asm/irq.h>
 #include <asm/dma.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 
 #ifdef CONFIG_SBUS
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 31b513a23ae0..3617659b9184 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -71,7 +71,7 @@
 #include <linux/slab.h>
 #include <asm/io.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/atmdev.h>
 #include <linux/atm.h>
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 5fc81e240c24..584aa881882b 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -45,7 +45,7 @@
 
 #include <asm/io.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/string.h>
 #include <asm/byteorder.h>
 
diff --git a/drivers/atm/idt77105.c b/drivers/atm/idt77105.c
index feb023d7eebd..082aa02abc57 100644
--- a/drivers/atm/idt77105.c
+++ b/drivers/atm/idt77105.c
@@ -17,7 +17,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <asm/param.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "idt77105.h"
 
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 074616b39f4d..471ddfd93ea8 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -45,7 +45,7 @@
 #include <linux/slab.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 #include <asm/byteorder.h>
 
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index b2756765950e..8640bafeb471 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -58,7 +58,7 @@
 #include <linux/slab.h>
 #include <asm/io.h>  
 #include <linux/atomic.h>
-#include <asm/uaccess.h>  
+#include <linux/uaccess.h>  
 #include <asm/string.h>  
 #include <asm/byteorder.h>  
 #include <linux/vmalloc.h>
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index c7296b583787..cb28579e8a94 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -50,7 +50,7 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 #include <linux/etherdevice.h>
 #include "nicstar.h"
diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c
index 02159345566c..b0363149b2fd 100644
--- a/drivers/atm/suni.c
+++ b/drivers/atm/suni.c
@@ -23,7 +23,7 @@
 #include <linux/atm_suni.h>
 #include <linux/slab.h>
 #include <asm/param.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 
 #include "suni.h"
diff --git a/drivers/atm/uPD98402.c b/drivers/atm/uPD98402.c
index 5120a96b3a89..4fa13a807873 100644
--- a/drivers/atm/uPD98402.c
+++ b/drivers/atm/uPD98402.c
@@ -10,7 +10,7 @@
 #include <linux/sonet.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 
 #include "uPD98402.h"
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index d3dc95484161..292dec18ffb8 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -27,7 +27,7 @@
 #include <asm/string.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "uPD98401.h"
 #include "uPD98402.h"
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index bb69e58c29f3..8ab8ea1253e6 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -23,7 +23,7 @@
 #include <linux/slab.h>
 
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static DEFINE_MUTEX(mem_sysfs_mutex);
 
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 0809cda93cc0..26a51be77227 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -48,7 +48,7 @@
 #include <linux/random.h>
 #include <linux/scatterlist.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "DAC960.h"
 
 #define DAC960_GAM_MINOR	252
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 5fd50a284168..a328f673adfe 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -70,7 +70,7 @@
 #include <linux/platform_device.h>
 
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/amigahw.h>
 #include <asm/amigaints.h>
 #include <asm/irq.h>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index ad793f35632c..3adc32a3153b 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -23,7 +23,7 @@
 #include <linux/pfn_t.h>
 #endif
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define SECTOR_SHIFT		9
 #define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index db9d6bb6352d..e5c5b8eb14a9 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -43,7 +43,7 @@
 #include <linux/mutex.h>
 #include <linux/bitmap.h>
 #include <linux/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c
index 3d31761c0ed0..74e03aa537ad 100644
--- a/drivers/block/cryptoloop.c
+++ b/drivers/block/cryptoloop.c
@@ -26,7 +26,7 @@
 #include <linux/string.h>
 #include <linux/blkdev.h>
 #include <linux/scatterlist.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "loop.h"
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 3abb121825bc..a9b48ed7a3cd 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -45,7 +45,7 @@
 
 #define REALLY_SLOW_IO
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef __arm__
 #undef  HD_IRQ
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4af818766797..f347285c67ec 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -78,7 +78,7 @@
 #include <linux/uio.h>
 #include "loop.h"
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_index_mutex);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 99c84468f154..38c576f76d36 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -36,7 +36,7 @@
 #include <linux/debugfs.h>
 #include <linux/blk-mq.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/types.h>
 
 #include <linux/nbd.h>
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 93362362aa55..5fd2d0e25567 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -139,7 +139,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY};
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static DEFINE_MUTEX(pcd_mutex);
 static DEFINE_SPINLOCK(pcd_lock);
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 78a39f736c64..c3ed2fc72daa 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -155,7 +155,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV};
 #include <linux/blkpg.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/workqueue.h>
 
 static DEFINE_MUTEX(pd_mutex);
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 7a7d977a76c5..ed93e8badf56 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -155,7 +155,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_LUN, D_DLY};
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static DEFINE_MUTEX(pf_mutex);
 static DEFINE_SPINLOCK(pf_spin_lock);
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index bfbd4c852dd9..5db955fe3a94 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -166,7 +166,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY};
 #include <linux/mutex.h>
 #include <linux/jiffies.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 module_param(verbose, int, 0644);
 module_param(major, int, 0);
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 216a94fed5b4..61fc6824299a 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -150,7 +150,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
 #include <linux/sched.h>	/* current, TASK_*, schedule_timeout() */
 #include <linux/mutex.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 module_param(verbose, int, 0);
 module_param(major, int, 0);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 95c98de92971..1b94c1ca5c5f 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -68,7 +68,7 @@
 #include <linux/debugfs.h>
 #include <linux/device.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define DRIVER_NAME	"pktcdvd"
 
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index c264f2d284a7..aabd8e9d3035 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -34,7 +34,7 @@
 #include <asm/io.h>
 #include <asm/dbdma.h>
 #include <asm/prom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mediabay.h>
 #include <asm/machdep.h>
 #include <asm/pmac_feature.h>
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index ba4bfe933276..0e93ad7b8511 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -29,7 +29,7 @@
 #include <linux/completion.h>
 #include <linux/scatterlist.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #if 0
 #define CARM_DEBUG
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 46f4c719fed9..c141cc3be22b 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -54,7 +54,7 @@
 
 #include "umem.h"
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 #define MM_MAXCARDS 4
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 5d475b3a0b2e..59cca72647a6 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -282,7 +282,7 @@
 #include <linux/blkdev.h>
 #include <linux/times.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* used to tell the module to turn on full debugging messages */
 static bool debug;
diff --git a/drivers/char/agp/compat_ioctl.c b/drivers/char/agp/compat_ioctl.c
index a48e05b31593..2053f70ef66b 100644
--- a/drivers/char/agp/compat_ioctl.c
+++ b/drivers/char/agp/compat_ioctl.c
@@ -31,7 +31,7 @@
 #include <linux/fs.h>
 #include <linux/agpgart.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "agp.h"
 #include "compat_ioctl.h"
 
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index 0f64d149c98d..f6955888e676 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -38,7 +38,7 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include "agp.h"
 
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index 14790304b84b..e5c62dcf2c11 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -34,7 +34,7 @@
 #include <linux/fs.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "applicom.h"
 
diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c
index 35d46da754d7..0584025bb0c2 100644
--- a/drivers/char/bfin-otp.c
+++ b/drivers/char/bfin-otp.c
@@ -20,7 +20,7 @@
 
 #include <asm/blackfin.h>
 #include <asm/bfrom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args)
 #define stampit() stamp("here i am")
diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c
index 0fae5296e311..eb53cbadb68f 100644
--- a/drivers/char/ds1620.c
+++ b/drivers/char/ds1620.c
@@ -13,7 +13,7 @@
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/therm.h>
 
 #ifdef CONFIG_PROC_FS
diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c
index 65a8d96c0e93..58471394beb9 100644
--- a/drivers/char/dtlk.c
+++ b/drivers/char/dtlk.c
@@ -59,7 +59,7 @@
 #include <linux/sched.h>
 #include <linux/mutex.h>
 #include <asm/io.h>		/* for inb_p, outb_p, inb, outb, etc. */
-#include <asm/uaccess.h>	/* for get_user, etc. */
+#include <linux/uaccess.h>	/* for get_user, etc. */
 #include <linux/wait.h>		/* for wait_queue */
 #include <linux/init.h>		/* for __init, module_{init,exit} */
 #include <linux/poll.h>		/* for POLLIN, etc. */
diff --git a/drivers/char/generic_nvram.c b/drivers/char/generic_nvram.c
index 073db9558379..14e728fbb8a0 100644
--- a/drivers/char/generic_nvram.c
+++ b/drivers/char/generic_nvram.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/nvram.h>
 #ifdef CONFIG_PPC_PMAC
 #include <asm/machdep.h>
diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
index a7c5c59675f0..4f337375252e 100644
--- a/drivers/char/hangcheck-timer.c
+++ b/drivers/char/hangcheck-timer.c
@@ -46,7 +46,7 @@
 #include <linux/reboot.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/sysrq.h>
 #include <linux/timer.h>
 #include <linux/hrtimer.h>
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index f9766415ff10..6ce5ce8be2f2 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -43,7 +43,7 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/err.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 #define RNG_MODULE_NAME		"hw_random"
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 4facc7517a6a..4035495f3a86 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -43,7 +43,7 @@
 #include <linux/kdebug.h>
 #include <linux/rwsem.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/notifier.h>
 #include <linux/nmi.h>
 #include <linux/reboot.h>
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index c4094c4e22c1..5b6742770656 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -134,7 +134,7 @@
 #include <linux/lp.h>
 
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* if you have more than 8 printers, remember to increase LP_NO */
 #define LP_NO 8
diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c
index 67d426470e53..8c9216a0f62e 100644
--- a/drivers/char/mbcs.c
+++ b/drivers/char/mbcs.c
@@ -28,7 +28,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/intr.h>
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index 3d6c0671e996..f786b18ac500 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -35,7 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/shub_mmr.h>
diff --git a/drivers/char/mwave/3780i.c b/drivers/char/mwave/3780i.c
index 972c40a19629..4a8937f80570 100644
--- a/drivers/char/mwave/3780i.c
+++ b/drivers/char/mwave/3780i.c
@@ -54,7 +54,7 @@
 #include <linux/sched.h>	/* cond_resched() */
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq.h>
 #include "smapi.h"
 #include "mwavedd.h"
diff --git a/drivers/char/mwave/mwavedd.h b/drivers/char/mwave/mwavedd.h
index 37e0a4926080..21cb09c7bed7 100644
--- a/drivers/char/mwave/mwavedd.h
+++ b/drivers/char/mwave/mwavedd.h
@@ -53,7 +53,7 @@
 #include "smapi.h"
 #include "mwavepub.h"
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/wait.h>
 
 extern int mwave_debug;
diff --git a/drivers/char/nsc_gpio.c b/drivers/char/nsc_gpio.c
index b07b119ae57f..2a91bf048804 100644
--- a/drivers/char/nsc_gpio.c
+++ b/drivers/char/nsc_gpio.c
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/nsc_gpio.h>
 #include <linux/platform_device.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 #define NAME "nsc_gpio"
diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c
index 0e184426db98..a5b1eb276c0b 100644
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -16,7 +16,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq.h>
 #include <asm/mach-types.h>
 
diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c
index dbe598de9b74..a284ae25e69a 100644
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -31,7 +31,7 @@
 #include <asm/hardware/dec21285.h>
 #include <asm/io.h>
 #include <asm/mach-types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*****************************************************************************/
 #include <asm/nwflash.h>
diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c
index 3f79a9fb6b1b..5f4be88c0dfc 100644
--- a/drivers/char/pc8736x_gpio.c
+++ b/drivers/char/pc8736x_gpio.c
@@ -20,7 +20,7 @@
 #include <linux/mutex.h>
 #include <linux/nsc_gpio.h>
 #include <linux/platform_device.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define DEVNAME "pc8736x_gpio"
 
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index fc061f7c2bd1..d7123259143e 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -26,7 +26,7 @@
 #include <linux/poll.h>
 #include <linux/mutex.h>
 #include <linux/wait.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 #include <pcmcia/cistpl.h>
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index a7dd5f4f2c5a..d136db1a10f0 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -84,7 +84,7 @@
 #define PUT_USER(error,value,addr) error = put_user(value,addr)
 #define COPY_TO_USER(error,dest,src,size) error = copy_to_user(dest,src,size) ? -EFAULT : 0
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static MGSL_PARAMS default_params = {
 	MGSL_MODE_HDLC,			/* unsigned long mode */
diff --git a/drivers/char/random.c b/drivers/char/random.c
index d6876d506220..1ef26403bcc8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -265,7 +265,7 @@
 #include <crypto/chacha20.h>
 
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
 #include <asm/io.h>
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index e83b2adc014a..293167c6e254 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -24,7 +24,7 @@
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 struct raw_device_data {
 	struct block_device *binding;
diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c
index 0bc135b9b16f..903761bc41c9 100644
--- a/drivers/char/scx200_gpio.c
+++ b/drivers/char/scx200_gpio.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 #include <linux/types.h>
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 719c5b4eed39..4fa7fcd8af36 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -52,7 +52,7 @@
 #include <linux/platform_device.h>
 #include <linux/gfp.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 #include <linux/sonypi.h>
diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c
index 100cd1de9939..572a51704e67 100644
--- a/drivers/char/tlclk.c
+++ b/drivers/char/tlclk.c
@@ -44,7 +44,7 @@
 #include <linux/miscdevice.h>
 #include <linux/platform_device.h>
 #include <asm/io.h>		/* inb/outb */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 MODULE_AUTHOR("Sebastien Bouchard <sebastien.bouchard@ca.kontron.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c
index f5a45d887a37..5488516da8ea 100644
--- a/drivers/char/toshiba.c
+++ b/drivers/char/toshiba.c
@@ -63,7 +63,7 @@
 #include <linux/miscdevice.h>
 #include <linux/ioport.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/init.h>
 #include <linux/stat.h>
 #include <linux/proc_fs.h>
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index c07dfe5c4da3..3e6b23c3453c 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -88,7 +88,7 @@
 #include <linux/slab.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_OF
 /* For open firmware. */
diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index 759612da4fdc..e28a31a40829 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -18,7 +18,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pal.h>
 
 #include <linux/acpi.h>
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index fe8f08948fcb..ac321f09e717 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -19,7 +19,7 @@
 #include <linux/tick.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define PROMOTION_COUNT 4
 #define DEMOTION_COUNT 1
diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c
index 55dd88d82d6d..830184529109 100644
--- a/drivers/dio/dio.c
+++ b/drivers/dio/dio.c
@@ -31,7 +31,7 @@
 #include <linux/init.h>
 #include <linux/dio.h>
 #include <linux/slab.h>                         /* kmalloc() */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>                             /* readb() */
 
 struct dio_bus dio_bus = {
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index de4d5d08af9e..65cf2b9355c4 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -13,7 +13,7 @@
  */
 
 #include <asm/page.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 5f2c717f8053..750891ea07de 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -28,7 +28,7 @@
 #include <linux/ctype.h>
 #include <linux/edac.h>
 #include <linux/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include "edac_mc.h"
 #include "edac_module.h"
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 4e9d5632041a..48c844a72a27 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -10,7 +10,7 @@
  *
  */
 #include <asm/page.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 3de95a29024c..cf9e396d7702 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -30,7 +30,7 @@
 #define pr_fmt(fmt) "i2c-core: " fmt
 
 #include <dt-bindings/i2c/i2c.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/acpi.h>
 #include <linux/clk/clk-conf.h>
 #include <linux/completion.h>
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 0ceae5cbd89a..4b5dc0162e67 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -130,7 +130,7 @@
 #include <linux/ide.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 #define DRV_NAME "hpt366"
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 83679da0c3f0..5ceace542b77 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -31,7 +31,7 @@
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/div64.h>
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 6360bbd37efe..201e43fcbc94 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -51,7 +51,7 @@
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 int ide_end_rq(ide_drive_t *drive, struct request *rq, int error,
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 376f2dc410c5..210a0887dd29 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -24,7 +24,7 @@
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 void SELECT_MASK(ide_drive_t *drive, int mask)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 0b63facd1d87..330e319419e6 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -36,7 +36,7 @@
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 /**
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 97c070077774..863db44c7916 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -16,7 +16,7 @@
 
 #include <linux/module.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/errno.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 579f9a7f6283..e0a995b85a2d 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -46,7 +46,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <rdma/ib.h>
 #include <rdma/ib_cm.h>
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 415a3185cde7..249b403b43a4 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -50,7 +50,7 @@
 #include <linux/semaphore.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <rdma/ib_mad.h>
 #include <rdma/ib_user_mad.h>
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 09b649159e6c..700782203483 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -38,7 +38,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "uverbs.h"
 #include "core_priv.h"
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 813593550c4b..b3f95d453fba 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -46,7 +46,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <rdma/ib.h>
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index a2f9f29c6ab5..fd811115af49 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -35,7 +35,7 @@
 #include <linux/init.h>
 #include <linux/seq_file.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "ipoib.h"
 
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 64b3d11dcf1e..9104e6b8cac9 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -62,7 +62,7 @@
 
 #include <net/sock.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
diff --git a/drivers/input/input-compat.c b/drivers/input/input-compat.c
index d84d20b9cec0..2186f71c9fe5 100644
--- a/drivers/input/input-compat.c
+++ b/drivers/input/input-compat.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/export.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "input-compat.h"
 
 #ifdef CONFIG_COMPAT
diff --git a/drivers/input/misc/atlas_btns.c b/drivers/input/misc/atlas_btns.c
index 638165c78e75..6423aaccc763 100644
--- a/drivers/input/misc/atlas_btns.c
+++ b/drivers/input/misc/atlas_btns.c
@@ -28,7 +28,7 @@
 #include <linux/input.h>
 #include <linux/types.h>
 #include <linux/acpi.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define ACPI_ATLAS_NAME		"Atlas ACPI"
 #define ACPI_ATLAS_CLASS	"Atlas"
diff --git a/drivers/input/mouse/amimouse.c b/drivers/input/mouse/amimouse.c
index a7fd8f22ba56..a33437c480e3 100644
--- a/drivers/input/mouse/amimouse.c
+++ b/drivers/input/mouse/amimouse.c
@@ -25,7 +25,7 @@
 
 #include <asm/irq.h>
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/amigahw.h>
 #include <asm/amigaints.h>
 
diff --git a/drivers/input/mouse/atarimouse.c b/drivers/input/mouse/atarimouse.c
index d1c43236b125..96f2f51604bd 100644
--- a/drivers/input/mouse/atarimouse.c
+++ b/drivers/input/mouse/atarimouse.c
@@ -47,7 +47,7 @@
 
 #include <asm/irq.h>
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/atarihw.h>
 #include <asm/atarikb.h>
 #include <asm/atariints.h>
diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c
index 354d47ecd66a..7331084973e1 100644
--- a/drivers/input/mouse/trackpoint.c
+++ b/drivers/input/mouse/trackpoint.c
@@ -15,7 +15,7 @@
 #include <linux/input.h>
 #include <linux/libps2.h>
 #include <linux/proc_fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "psmouse.h"
 #include "trackpoint.h"
 
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index 852858e5d8d0..559c99ca6592 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -79,7 +79,7 @@
 # define sdc_readb(p)		gsc_readb(p)
 # define sdc_writeb(v,p)	gsc_writeb((v),(p))
 #elif defined(__mc68000__)
-# include <asm/uaccess.h>
+#include <linux/uaccess.h>
 # define sdc_readb(p)		in_8(p)
 # define sdc_writeb(v,p)	out_8((p),(v))
 #else
diff --git a/drivers/input/serio/q40kbd.c b/drivers/input/serio/q40kbd.c
index 5a9d521510bf..d0fccc8ec259 100644
--- a/drivers/input/serio/q40kbd.c
+++ b/drivers/input/serio/q40kbd.c
@@ -38,7 +38,7 @@
 #include <linux/slab.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/q40_master.h>
 #include <asm/irq.h>
 #include <asm/q40ints.h>
diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c
index d189843f3727..f8ead9f9c77e 100644
--- a/drivers/input/serio/serport.c
+++ b/drivers/input/serio/serport.c
@@ -13,7 +13,7 @@
  * the Free Software Foundation.
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c
index 4613f0aefd08..d67547bded3e 100644
--- a/drivers/input/tablet/aiptek.c
+++ b/drivers/input/tablet/aiptek.c
@@ -75,7 +75,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/usb/input.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 /*
diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c
index abf09ac42ce4..b796e891e2ee 100644
--- a/drivers/input/tablet/gtco.c
+++ b/drivers/input/tablet/gtco.c
@@ -56,7 +56,7 @@ Scott Hill shill@gtcocalcomp.com
 #include <linux/slab.h>
 #include <linux/input.h>
 #include <linux/usb.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 #include <asm/byteorder.h>
 #include <linux/bitops.h>
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 823f6985b260..49d0f70c2bae 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -28,7 +28,7 @@
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/isdn/capicmd.h>
 #include <linux/isdn/capiutil.h>
 #ifdef AVMB1_COMPAT
diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c
index 4d9b195547c5..9fdbd99c7547 100644
--- a/drivers/isdn/hardware/avm/b1.c
+++ b/drivers/isdn/hardware/avm/b1.c
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <asm/io.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/netdevice.h>
 #include <linux/isdn/capilli.h>
 #include "avmcard.h"
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 19b113faeb7b..818bd8f231db 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -23,7 +23,7 @@
 #include <linux/gfp.h>
 #include <asm/io.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/netdevice.h>
 #include <linux/isdn/capilli.h>
 #include "avmcard.h"
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 5d00d72fe482..17beb2869dc1 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -24,7 +24,7 @@
 #include <linux/init.h>
 #include <linux/gfp.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/netdevice.h>
 #include <linux/isdn/capicmd.h>
 #include <linux/isdn/capiutil.h>
diff --git a/drivers/isdn/hardware/eicon/capimain.c b/drivers/isdn/hardware/eicon/capimain.c
index 997d46abf5b2..be36d82004d6 100644
--- a/drivers/isdn/hardware/eicon/capimain.c
+++ b/drivers/isdn/hardware/eicon/capimain.c
@@ -13,7 +13,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/seq_file.h>
 #include <linux/skbuff.h>
 
diff --git a/drivers/isdn/hardware/eicon/divamnt.c b/drivers/isdn/hardware/eicon/divamnt.c
index 0de29b7b712f..72e58bf07577 100644
--- a/drivers/isdn/hardware/eicon/divamnt.c
+++ b/drivers/isdn/hardware/eicon/divamnt.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/poll.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "platform.h"
 #include "di_defs.h"
diff --git a/drivers/isdn/hardware/eicon/divasi.c b/drivers/isdn/hardware/eicon/divasi.c
index 4103a8c178d7..cb88090f9cea 100644
--- a/drivers/isdn/hardware/eicon/divasi.c
+++ b/drivers/isdn/hardware/eicon/divasi.c
@@ -18,7 +18,7 @@
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
 #include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "platform.h"
 #include "di_defs.h"
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c
index 32f34511c416..8b7ad4f1ab01 100644
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -12,7 +12,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c
index 56ce98a4e248..b57efd6ad916 100644
--- a/drivers/isdn/hardware/eicon/divasproc.c
+++ b/drivers/isdn/hardware/eicon/divasproc.c
@@ -16,7 +16,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/list.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "platform.h"
 #include "debuglib.h"
diff --git a/drivers/isdn/hysdn/hysdn_boot.c b/drivers/isdn/hysdn/hysdn_boot.c
index eda4741e3f2f..4a0425378f37 100644
--- a/drivers/isdn/hysdn/hysdn_boot.c
+++ b/drivers/isdn/hysdn/hysdn_boot.c
@@ -13,7 +13,7 @@
 
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "hysdn_defs.h"
 #include "hysdn_pof.h"
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 9e385b38debf..ac219045daf7 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -15,7 +15,7 @@
 #include <linux/slab.h>
 #include <asm/paravirt.h>
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/poll.h>
 #include <asm/asm-offsets.h>
 #include "lg.h"
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index e3abebc912c0..0bc127e9f16a 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -16,7 +16,7 @@
 #include <linux/random.h>
 #include <linux/percpu.h>
 #include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "lg.h"
 
 /*M:008
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 743253fc638f..d71f6323ac00 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -45,7 +45,7 @@
 #include <asm/desc.h>
 #include <asm/setup.h>
 #include <asm/lguest.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/fpu/internal.h>
 #include <asm/tlbflush.h>
 #include "../lg.h"
diff --git a/drivers/macintosh/ans-lcd.c b/drivers/macintosh/ans-lcd.c
index cd35079c8c98..281fa9e6fc1f 100644
--- a/drivers/macintosh/ans-lcd.c
+++ b/drivers/macintosh/ans-lcd.c
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/fs.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/io.h>
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 08edb2c25b60..227869159ac0 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -47,7 +47,7 @@
 #include <asm/pmac_feature.h>
 #include <asm/smu.h>
 #include <asm/sections.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define VERSION "0.7"
 #define AUTHOR  "(c) 2005 Benjamin Herrenschmidt, IBM Corp."
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 91081dcdc272..43b8db2b5445 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -57,7 +57,7 @@
 #include <asm/pmac_feature.h>
 #include <asm/pmac_pfunc.h>
 #include <asm/pmac_low_i2c.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/cputable.h>
 #include <asm/time.h>
diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c
index a00ee41f0573..a411c5cb77a1 100644
--- a/drivers/macintosh/via-pmu68k.c
+++ b/drivers/macintosh/via-pmu68k.c
@@ -38,7 +38,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Misc minor number allocated for /dev/pmu */
 #define PMU_MINOR	154
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index c72a77048b73..a5a9b17f0f7f 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -17,7 +17,7 @@
 #include <linux/hdreg.h>
 #include <linux/compat.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define DM_MSG_PREFIX "ioctl"
 #define DM_DRIVER_EMAIL "dm-devel@redhat.com"
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index efe55a3e80d0..0c44479b556e 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -30,7 +30,7 @@
 #include <linux/poll.h>
 #include <linux/ioctl.h>
 #include <linux/wait.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "dmxdev.h"
 
 static int debug;
diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c
index 3ad0b2cd26b1..bbbff72bbb2a 100644
--- a/drivers/media/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb-core/dvb_demux.c
@@ -31,7 +31,7 @@
 #include <linux/poll.h>
 #include <linux/string.h>
 #include <linux/crc32.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/div64.h>
 
 #include "dvb_demux.h"
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index dfc03a95df71..bc5e8cfe7ca2 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -62,7 +62,7 @@
 #include <linux/etherdevice.h>
 #include <linux/dvb/net.h>
 #include <linux/uio.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/crc32.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
diff --git a/drivers/media/dvb-core/dvb_ringbuffer.c b/drivers/media/dvb-core/dvb_ringbuffer.c
index 7df7fb3738a0..5c4b5a1f604f 100644
--- a/drivers/media/dvb-core/dvb_ringbuffer.c
+++ b/drivers/media/dvb-core/dvb_ringbuffer.c
@@ -31,7 +31,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/string.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "dvb_ringbuffer.h"
 
diff --git a/drivers/media/i2c/adv7170.c b/drivers/media/i2c/adv7170.c
index 05f1dc6c72af..fc9ec0f3679c 100644
--- a/drivers/media/i2c/adv7170.c
+++ b/drivers/media/i2c/adv7170.c
@@ -32,7 +32,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/adv7175.c b/drivers/media/i2c/adv7175.c
index f554809a51e7..72139bdae1ca 100644
--- a/drivers/media/i2c/adv7175.c
+++ b/drivers/media/i2c/adv7175.c
@@ -28,7 +28,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/bt856.c b/drivers/media/i2c/bt856.c
index 48176591a80d..54c627859c8e 100644
--- a/drivers/media/i2c/bt856.c
+++ b/drivers/media/i2c/bt856.c
@@ -32,7 +32,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/bt866.c b/drivers/media/i2c/bt866.c
index bbec70c882a3..0d3f46af2545 100644
--- a/drivers/media/i2c/bt866.c
+++ b/drivers/media/i2c/bt866.c
@@ -32,7 +32,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/cs53l32a.c b/drivers/media/i2c/cs53l32a.c
index e4b3cf49dd38..59c1a98c5a90 100644
--- a/drivers/media/i2c/cs53l32a.c
+++ b/drivers/media/i2c/cs53l32a.c
@@ -24,7 +24,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/m52790.c b/drivers/media/i2c/m52790.c
index 81171d8e1c2c..89c28c36c5bf 100644
--- a/drivers/media/i2c/m52790.c
+++ b/drivers/media/i2c/m52790.c
@@ -24,7 +24,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/i2c/m52790.h>
diff --git a/drivers/media/i2c/saa6588.c b/drivers/media/i2c/saa6588.c
index 89e458c23983..00640233a5e3 100644
--- a/drivers/media/i2c/saa6588.c
+++ b/drivers/media/i2c/saa6588.c
@@ -29,7 +29,7 @@
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <media/i2c/saa6588.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/saa7110.c b/drivers/media/i2c/saa7110.c
index 6f49886806ee..ad456ce051f9 100644
--- a/drivers/media/i2c/saa7110.c
+++ b/drivers/media/i2c/saa7110.c
@@ -31,7 +31,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/saa7185.c b/drivers/media/i2c/saa7185.c
index eecad2d1edce..119050e1197a 100644
--- a/drivers/media/i2c/saa7185.c
+++ b/drivers/media/i2c/saa7185.c
@@ -28,7 +28,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/tlv320aic23b.c b/drivers/media/i2c/tlv320aic23b.c
index 2e06c06cac9b..cc6104da34ef 100644
--- a/drivers/media/i2c/tlv320aic23b.c
+++ b/drivers/media/i2c/tlv320aic23b.c
@@ -27,7 +27,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/vp27smpx.c b/drivers/media/i2c/vp27smpx.c
index d6c23bdbcd4a..ef0d8b8e3df7 100644
--- a/drivers/media/i2c/vp27smpx.c
+++ b/drivers/media/i2c/vp27smpx.c
@@ -25,7 +25,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/vpx3220.c b/drivers/media/i2c/vpx3220.c
index 90b693f4e2ab..ce9f09370e22 100644
--- a/drivers/media/i2c/vpx3220.c
+++ b/drivers/media/i2c/vpx3220.c
@@ -23,7 +23,7 @@
 #include <linux/delay.h>
 #include <linux/types.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/wm8739.c b/drivers/media/i2c/wm8739.c
index f086e5e6e844..c885def54b15 100644
--- a/drivers/media/i2c/wm8739.c
+++ b/drivers/media/i2c/wm8739.c
@@ -25,7 +25,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/i2c/wm8775.c b/drivers/media/i2c/wm8775.c
index 5581f4db02af..45039d756753 100644
--- a/drivers/media/i2c/wm8775.c
+++ b/drivers/media/i2c/wm8775.c
@@ -29,7 +29,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h
index 10cba305dbd2..6b09a9514d64 100644
--- a/drivers/media/pci/ivtv/ivtv-driver.h
+++ b/drivers/media/pci/ivtv/ivtv-driver.h
@@ -53,7 +53,7 @@
 #include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 #include <linux/dvb/video.h>
diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c
index e825bc93ea7a..24fba633c217 100644
--- a/drivers/media/pci/meye/meye.c
+++ b/drivers/media/pci/meye/meye.c
@@ -37,7 +37,7 @@
 #include <media/v4l2-ioctl.h>
 #include <media/v4l2-fh.h>
 #include <media/v4l2-event.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
diff --git a/drivers/media/pci/zoran/videocodec.c b/drivers/media/pci/zoran/videocodec.c
index 13a3c07cd259..3c3cbce0f9cc 100644
--- a/drivers/media/pci/zoran/videocodec.c
+++ b/drivers/media/pci/zoran/videocodec.c
@@ -40,7 +40,7 @@
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #endif
 
 #include "videocodec.h"
diff --git a/drivers/media/pci/zoran/zoran_driver.c b/drivers/media/pci/zoran/zoran_driver.c
index 2170e174c335..94b9b616df98 100644
--- a/drivers/media/pci/zoran/zoran_driver.c
+++ b/drivers/media/pci/zoran/zoran_driver.c
@@ -66,7 +66,7 @@
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/proc_fs.h>
 
 #include <linux/mutex.h>
diff --git a/drivers/media/platform/arv.c b/drivers/media/platform/arv.c
index 03c5098499c4..8fe59bf6cd3f 100644
--- a/drivers/media/platform/arv.c
+++ b/drivers/media/platform/arv.c
@@ -34,7 +34,7 @@
 #include <media/v4l2-fh.h>
 #include <linux/mutex.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/m32r.h>
 #include <asm/io.h>
 #include <asm/dma.h>
diff --git a/drivers/media/usb/pvrusb2/pvrusb2-ioread.c b/drivers/media/usb/pvrusb2/pvrusb2-ioread.c
index 70b8a052eb5b..3c7ca2c2c108 100644
--- a/drivers/media/usb/pvrusb2/pvrusb2-ioread.c
+++ b/drivers/media/usb/pvrusb2/pvrusb2-ioread.c
@@ -25,7 +25,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define BUFFER_COUNT 32
 #define BUFFER_SIZE PAGE_ALIGN(0x4000)
diff --git a/drivers/media/usb/pwc/pwc-ctrl.c b/drivers/media/usb/pwc/pwc-ctrl.c
index 3a1618580ed6..655cef39eb3d 100644
--- a/drivers/media/usb/pwc/pwc-ctrl.c
+++ b/drivers/media/usb/pwc/pwc-ctrl.c
@@ -39,7 +39,7 @@
 /* Control functions for the cam; brightness, contrast, video mode, etc. */
 
 #ifdef __KERNEL__
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #endif
 #include <asm/errno.h>
 
diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c
index 57cfe26a393f..a5ea1f517291 100644
--- a/drivers/media/v4l2-core/v4l2-common.c
+++ b/drivers/media/v4l2-core/v4l2-common.c
@@ -54,7 +54,7 @@
 #if defined(CONFIG_SPI)
 #include <linux/spi/spi.h>
 #endif
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/div64.h>
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 8be561ab2615..fa2124cb31bd 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -25,7 +25,7 @@
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <media/v4l2-common.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index 02b5f69e1a42..7b3b41368931 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -58,7 +58,7 @@
 #include <linux/compat.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
diff --git a/drivers/message/fusion/mptlan.h b/drivers/message/fusion/mptlan.h
index 69e9d5463564..8946e19dbfc8 100644
--- a/drivers/message/fusion/mptlan.h
+++ b/drivers/message/fusion/mptlan.h
@@ -70,7 +70,7 @@
 #include <linux/workqueue.h>
 #include <linux/delay.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
     /* Override mptbase.h by pre-defining these! */
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 520f58439080..e05c3245930a 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -76,7 +76,7 @@
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include "ibmasm.h"
 #include "remote.h"
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index bab3f07b1117..cb1698f268f1 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -43,7 +43,7 @@
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sd.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "queue.h"
 #include "block.h"
diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c
index dca5518b0139..590a8a4522be 100644
--- a/drivers/mmc/host/android-goldfish.c
+++ b/drivers/mmc/host/android-goldfish.c
@@ -49,7 +49,7 @@
 
 #include <asm/types.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define DRIVER_NAME "goldfish_mmc"
 
diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index 220f9200fa52..cadea0620cd0 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -82,7 +82,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/ptrace.h>
diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c
index a70eb83e68f1..8087c36dc693 100644
--- a/drivers/mtd/devices/slram.c
+++ b/drivers/mtd/devices/slram.c
@@ -30,7 +30,7 @@
 
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 9fb3b0dcdac2..664d206a4cbe 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -70,7 +70,7 @@
 #include <linux/hdreg.h>
 #include <linux/vmalloc.h>
 #include <linux/blkpg.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/mtd/ftl.h>
 
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index b66b541877f0..8db740d6eb08 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -34,7 +34,7 @@
 #include <linux/mtd/nftl.h>
 #include <linux/mtd/inftl.h>
 #include <linux/mtd/nand.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/errno.h>
 #include <asm/io.h>
 
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 1388c8d7f309..8d6bb189ea8e 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -27,7 +27,7 @@
 #include <linux/module.h>
 #include <asm/errno.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/mtd/mtd.h>
diff --git a/drivers/mtd/maps/sun_uflash.c b/drivers/mtd/maps/sun_uflash.c
index d459aca07881..414956eca0c9 100644
--- a/drivers/mtd/maps/sun_uflash.c
+++ b/drivers/mtd/maps/sun_uflash.c
@@ -16,7 +16,7 @@
 #include <linux/of_device.h>
 #include <linux/slab.h>
 #include <asm/prom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 #include <linux/mtd/mtd.h>
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 8d58acf33021..df8a5ef334c0 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -31,7 +31,7 @@
 #include <linux/spinlock.h>
 #include <linux/hdreg.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "mtdcore.h"
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 2a47a3f0e730..ce5ccc573a9c 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -37,7 +37,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/map.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "mtdcore.h"
 
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 46f27de018c3..e21161353e76 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <asm/errno.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/init.h>
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 31f89f1c6123..b8c293373ecc 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -33,7 +33,7 @@
 #include <linux/if_arp.h>
 #include <linux/slab.h>
 #include <net/route.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "ipddp.h"		/* Our stuff */
 
diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index a10ad74cc8d2..fe13bfea30ac 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -127,7 +127,7 @@
 #include <linux/if_eql.h>
 #include <linux/pkt_sched.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static int eql_open(struct net_device *dev);
 static int eql_close(struct net_device *dev);
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index a7533780dddc..c7f9f2c77da7 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -88,7 +88,7 @@
 #include <linux/eisa.h>
 #include <linux/bitops.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index be5b80103bec..e7b1fa56b290 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -72,7 +72,7 @@ static int max_interrupt_work = 20;
 #include <linux/ethtool.h>
 #include <linux/bitops.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 
diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c
index 9359a37fedc0..47c844cc9d27 100644
--- a/drivers/net/ethernet/3com/3c574_cs.c
+++ b/drivers/net/ethernet/3com/3c574_cs.c
@@ -92,7 +92,7 @@ earlier 3Com products.
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 /*====================================================================*/
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index b3560a364e53..40196f41768a 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -92,7 +92,7 @@ static int vortex_debug = 1;
 #include <linux/gfp.h>
 #include <asm/irq.h>			/* For nr_irqs only. */
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Kernel compatibility defines, some common to David Hinds' PCMCIA package.
    This is only in the support-all-kernels source code. */
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index a0cacbe846ba..9fe3990319ec 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -119,7 +119,7 @@ static const int multicast_filter_limit = 32;
 #include <linux/bitops.h>
 #include <asm/processor.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/in6.h>
 #include <linux/dma-mapping.h>
 #include <linux/firmware.h>
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 1d84a0544ace..3da1fc539ef9 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -46,7 +46,7 @@
 
 #include <asm/io.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define AXNET_CMD	0x00
 #define AXNET_DATAPORT	0x10	/* NatSemi-defined port window offset. */
diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c
index 07355302443d..1bdea746926c 100644
--- a/drivers/net/ethernet/8390/ne2k-pci.c
+++ b/drivers/net/ethernet/8390/ne2k-pci.c
@@ -54,7 +54,7 @@ static int options[MAX_UNITS];
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "8390.h"
 
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index 63079a6e20d9..bd0a2a14b649 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -49,7 +49,7 @@
 
 #include <asm/io.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define PCNET_CMD	0x00
 #define PCNET_DATAPORT	0x10	/* NatSemi-defined port window offset. */
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 3aaad33cdbc6..c12d2618eebf 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -45,7 +45,7 @@
 #include <linux/mm.h>
 #include <linux/firmware.h>
 #include <asm/processor.h>		/* Processor type for cache alignment. */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 /*
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 16f0c70266bc..a1a52eb53b14 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -80,7 +80,7 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 #define DRV_NAME "acenic"
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 11cf1e3e0295..9595f1bc535b 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -87,7 +87,7 @@ Revision History:
 
 #include <asm/io.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define AMD8111E_VLAN_TAG_USED 1
diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
index 113a3b3cc50c..b556c926557a 100644
--- a/drivers/net/ethernet/amd/nmclan_cs.c
+++ b/drivers/net/ethernet/amd/nmclan_cs.c
@@ -151,7 +151,7 @@ Include Files
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 /* ----------------------------------------------------------------------------
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 1df3048a3cdb..48707ed76ffc 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -32,7 +32,7 @@
 #include <linux/slab.h>
 #include <linux/phy.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 3a05f9098e75..d8aff7a4b3c7 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -44,7 +44,7 @@
 #include <linux/mii.h>
 #include <linux/sockios.h>
 #include <linux/dma-mapping.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "cpl5_cmd.h"
 #include "regs.h"
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 7b2224ae72f2..d76491676b51 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -50,7 +50,7 @@
 #include <linux/stringify.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "common.h"
 #include "cxgb3_ioctl.h"
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 66c37fac59b2..6f951877430b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -63,7 +63,7 @@
 #include <net/addrconf.h>
 #include <net/bonding.h>
 #include <net/addrconf.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/crash_dump.h>
 
 #include "cxgb4.h"
diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index 90c573b8ccaf..57c17e797ae3 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c
@@ -49,7 +49,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 /* These identify the driver base version and may not be removed. */
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 51fda3a6b13f..df4a871df633 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -472,7 +472,7 @@
 #include <asm/dma.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #ifdef CONFIG_PPC_PMAC
 #include <asm/machdep.h>
 #endif /* CONFIG_PPC_PMAC */
diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index df4994919456..07e10a45beaa 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c
@@ -90,7 +90,7 @@
 #include <asm/processor.h>
 #include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq.h>
 
 #ifdef CONFIG_TULIP_DM910X
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 5f1377449b8f..17e566a8b345 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -31,7 +31,7 @@
 #include <linux/mii.h>
 #include <linux/crc32.h>
 #include <asm/unaligned.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_SPARC
 #include <asm/prom.h>
diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index e1c4133b8787..f82ebe5d89ee 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -40,7 +40,7 @@
 #include <asm/processor.h>
 #include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define uw32(reg, val)	iowrite32(val, ioaddr + (reg))
 #define ur32(reg)	ioread32(ioaddr + (reg))
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index feda96d585e7..bc9bf88e5831 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -129,7 +129,7 @@ static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
 #include <linux/rtnetlink.h>
 #include <linux/crc32.h>
 #include <linux/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>		/* Processor type for cache alignment. */
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/net/ethernet/dec/tulip/xircom_cb.c b/drivers/net/ethernet/dec/tulip/xircom_cb.c
index 19e4ea15b504..a8de79355578 100644
--- a/drivers/net/ethernet/dec/tulip/xircom_cb.c
+++ b/drivers/net/ethernet/dec/tulip/xircom_cb.c
@@ -30,7 +30,7 @@
 #include <linux/delay.h>
 #include <linux/bitops.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #ifdef CONFIG_NET_POLL_CONTROLLER
 #include <asm/irq.h>
diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h
index 8f4f61262d5c..5d8ae5320242 100644
--- a/drivers/net/ethernet/dlink/dl2k.h
+++ b/drivers/net/ethernet/dlink/dl2k.h
@@ -31,7 +31,7 @@
 #include <linux/bitops.h>
 #include <asm/processor.h>	/* Processor type for cache alignment. */
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/time.h>
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index eab36acfc0d1..2e5b66762e15 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -91,7 +91,7 @@ static char *media[MAX_UNITS];
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>		/* Processor type for cache alignment. */
 #include <asm/io.h>
 #include <linux/delay.h>
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index 6967b287b6e7..9cb436cb3745 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -88,7 +88,7 @@ static int full_duplex[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 };
 
 #include <asm/processor.h>	/* Processor type for cache alignment. */
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 /* These identify the driver base version and may not be removed. */
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index d9f3a480ca1b..1f98838f32b7 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -44,7 +44,7 @@
 #include <linux/vmalloc.h>
 #include <asm/pgtable.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "fs_enet.h"
 
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index 120c758f5d01..6e64989f8478 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -42,7 +42,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "fs_enet.h"
 
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
index 777beffa1e1e..db9c0bcf54cd 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
@@ -36,7 +36,7 @@
 #include <linux/gfp.h>
 
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_8xx
 #include <asm/8xx_immap.h>
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
index 15abd37d70e3..96d44cf44fe0 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
@@ -35,7 +35,7 @@
 #include <linux/of_platform.h>
 
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_8xx
 #include <asm/8xx_immap.h>
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
index a89267b94352..1582d82483ec 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
@@ -35,7 +35,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mpc5xxx.h>
 
 #include "fs_enet.h"
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 756f7e763d5f..a6e7afa878be 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -93,7 +93,7 @@
 #include <asm/mpc85xx.h>
 #endif
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/dma-mapping.h>
 #include <linux/crc32.h>
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 6e8a9c8467b9..5aa814799d70 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -40,7 +40,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/crc32.h>
 #include <linux/workqueue.h>
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 56588f2e1d91..a93e0199c369 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -32,7 +32,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/crc32.h>
 #include <asm/types.h>
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 53c5fcf1436c..9d660888510f 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -37,7 +37,7 @@
 #include <linux/of_net.h>
 #include <linux/of_platform.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <soc/fsl/qe/immap_qe.h>
diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index 8ba636f61b50..b642990b549c 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
@@ -32,7 +32,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/types.h>
 
 #include "ucc_geth.h"
diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index 51c4abc51bf4..a69cd19a55ae 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -54,7 +54,7 @@
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 /*====================================================================*/
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 52a69c925965..5909615c27f7 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -47,7 +47,7 @@
 #include <asm/processor.h>
 #include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/dcr.h>
 #include <asm/dcr-regs.h>
 
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
index d2b29b490ae0..e5d72559cca9 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
@@ -30,7 +30,7 @@
 
 #include "ixgb.h"
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define IXGB_ALL_RAR_ENTRIES 16
 
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 22b0821c1da0..90eac63f9606 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -51,7 +51,7 @@
 #include <asm/processor.h>	/* Processor type for cache alignment. */
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define DRV_NAME	"natsemi"
 #define DRV_VERSION	"2.1"
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 93c4bdc0cdca..f9d2eb9a920a 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -119,7 +119,7 @@
 #include <linux/slab.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define DRV_NAME "ns83820"
 
diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index 2d04679a923a..baff744b560e 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -160,7 +160,7 @@ static int tx_params[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
 #include <linux/delay.h>
 #include <linux/bitops.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>	/* Processor type for cache alignment. */
 #include <asm/io.h>
 #include <asm/unaligned.h>
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index 2a2ca5fa0c69..fa7770da6ef8 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -100,7 +100,7 @@ static int gx_fix;
 #include <linux/ethtool.h>
 #include <linux/crc32.h>
 #include <linux/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>		/* Processor type for cache alignment. */
 #include <asm/unaligned.h>
 #include <asm/io.h>
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index b7c89ebcf4a2..0b3cd58093d5 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -76,7 +76,7 @@
 #include <linux/cache.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* These identify the driver base version and may not be removed. */
 static char version[] =
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 42051ab98cf0..d390b9663dc3 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -60,7 +60,7 @@
 #include <asm/byteorder.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sn/types.h>
 #include <asm/sn/ioc3.h>
 #include <asm/pci/bridge.h>
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 39fca6c0b68d..19a458716f1a 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -74,7 +74,7 @@
 #include <asm/processor.h>      /* Processor type for cache alignment. */
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>	/* User space memory access functions */
+#include <linux/uaccess.h>	/* User space memory access functions */
 
 #include "sis900.h"
 
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index fe9760ffab51..55a95e1d69d6 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -86,7 +86,7 @@ static int rx_copybreak;
 #include <linux/crc32.h>
 #include <linux/bitops.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 /* These identify the driver base version and may not be removed. */
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index f1c75e291e55..67154621abcf 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -52,7 +52,7 @@
 #include <pcmcia/ss.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*====================================================================*/
 
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index e9e5ef241c6f..0e8e89f17dbb 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -99,7 +99,7 @@
 #include <linux/atomic.h>
 #include <asm/io.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define cas_page_map(x)      kmap_atomic((x))
 #define cas_page_unmap(x)    kunmap_atomic((x))
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 66ecf0fcc330..d277e4107976 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -42,7 +42,7 @@
 
 #include <asm/io.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/irq.h>
 
 #ifdef CONFIG_SPARC
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index ca96408058b0..72ff05cd3ed8 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -49,7 +49,7 @@
 #include <asm/prom.h>
 #include <asm/auxio.h>
 #endif
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/pgtable.h>
 #include <asm/irq.h>
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index ba5c54249055..0a6c4e804eed 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -114,7 +114,7 @@ static const int multicast_filter_limit = 32;
 #include <asm/processor.h>	/* Processor type for cache alignment. */
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/dmi.h>
 
 /* These identify the driver base version and may not be removed. */
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index 3b08ec766076..f71883264cc0 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -88,7 +88,7 @@
 #include <pcmcia/ciscode.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifndef MANFID_COMPAQ
   #define MANFID_COMPAQ 	   0x0138
diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c
index 3a639180e4a0..2414f1dc8ddd 100644
--- a/drivers/net/fddi/skfp/skfddi.c
+++ b/drivers/net/fddi/skfp/skfddi.c
@@ -88,7 +88,7 @@ static const char * const boot_msg =
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include	"h/types.h"
 #undef ADDR			// undo Linux definition
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 470b3dcd54e5..922bf440e9f1 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/string.h>
 #include <linux/mm.h>
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 78dbc44540f6..7d054697b199 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -55,7 +55,7 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
 #include <net/ax25.h> 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* --------------------------------------------------------------------- */
 
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
index 072cddce9264..809dc25909d1 100644
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -86,7 +86,7 @@
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* --------------------------------------------------------------------- */
 
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index 7b916d5b14b9..ebc06822fd4d 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -82,7 +82,7 @@
 #include <linux/jiffies.h>
 #include <linux/time64.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 
diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c
index f9a8976195ba..60fcf512c208 100644
--- a/drivers/net/hamradio/baycom_ser_hdx.c
+++ b/drivers/net/hamradio/baycom_ser_hdx.c
@@ -67,7 +67,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/hdlcdrv.h>
 #include <linux/baycom.h>
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index 622ab3ab9e93..f62e7f325cf9 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -69,7 +69,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index e4137c1b3df9..2479072981a1 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -40,7 +40,7 @@
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/ax25.h>
 #include "z8530.h"
 
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index 4bad0b894e9c..8c3633c1d078 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -58,7 +58,7 @@
 #include <linux/hdlcdrv.h>
 #include <linux/random.h>
 #include <net/ax25.h> 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/crc-ccitt.h>
 
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 1dfe2304daa7..ece59c54a653 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -17,7 +17,7 @@
  */
 #include <linux/module.h>
 #include <linux/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/crc16.h>
 #include <linux/string.h>
 #include <linux/mm.h>
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index b8083161ef46..6754cd01c605 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -178,7 +178,7 @@
 
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "z8530.h"
 
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index aaff07c10058..b6891ada1d7b 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -68,7 +68,7 @@
 #include <linux/seq_file.h>
 #include <net/net_namespace.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/init.h>
 
 #include <linux/yam.h>
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index f5a9728b89f3..dd7fc6659ad4 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -46,7 +46,7 @@
 #include <asm/byteorder.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define rr_if_busy(dev)     netif_queue_stopped(dev)
 #define rr_if_running(dev)  netif_running(dev)
diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c
index 7a3f990c1935..7a20a9a4663a 100644
--- a/drivers/net/irda/irtty-sir.c
+++ b/drivers/net/irda/irtty-sir.c
@@ -31,7 +31,7 @@
 #include <linux/slab.h>
 #include <linux/tty.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 
diff --git a/drivers/net/irda/kingsun-sir.c b/drivers/net/irda/kingsun-sir.c
index fb5d162ec7d2..24c0f169a7b1 100644
--- a/drivers/net/irda/kingsun-sir.c
+++ b/drivers/net/irda/kingsun-sir.c
@@ -71,7 +71,7 @@
 
 #include <asm/unaligned.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/wrapper.h>
diff --git a/drivers/net/irda/ks959-sir.c b/drivers/net/irda/ks959-sir.c
index 8e6e0edf2440..3affded3e30d 100644
--- a/drivers/net/irda/ks959-sir.c
+++ b/drivers/net/irda/ks959-sir.c
@@ -123,7 +123,7 @@
 
 #include <asm/unaligned.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/wrapper.h>
diff --git a/drivers/net/irda/ksdazzle-sir.c b/drivers/net/irda/ksdazzle-sir.c
index 37f23a189b35..741452c7ce35 100644
--- a/drivers/net/irda/ksdazzle-sir.c
+++ b/drivers/net/irda/ksdazzle-sir.c
@@ -87,7 +87,7 @@
 
 #include <asm/unaligned.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/wrapper.h>
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index bca6a1e72d1d..6f6ed75b63c9 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -55,7 +55,7 @@
 
 #include <asm/unaligned.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/wrapper.h>
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index a0849f49bbec..ffedad2a360a 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -45,7 +45,7 @@ MODULE_LICENSE("GPL");
 #include <linux/seq_file.h>
 #include <linux/math64.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 #include <net/irda/irda.h>
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 6255973e3dda..1e05b7c2d157 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -40,7 +40,7 @@
 #include <linux/fcntl.h>
 #include <linux/in.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 #include <linux/inet.h>
diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c
index 36e3e2033eca..e28913d9ea7e 100644
--- a/drivers/net/phy/davicom.c
+++ b/drivers/net/phy/davicom.c
@@ -32,7 +32,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define MII_DM9161_SCR		0x10
 #define MII_DM9161_SCR_INIT	0x0610
diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c
index 22b51f01a94a..567280a72241 100644
--- a/drivers/net/phy/icplus.c
+++ b/drivers/net/phy/icplus.c
@@ -28,7 +28,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 MODULE_DESCRIPTION("ICPlus IP175C/IP101A/IP101G/IC1001 PHY drivers");
 MODULE_AUTHOR("Michael Barkowski");
diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c
index b9fde1bcf0f0..8d198a1f0031 100644
--- a/drivers/net/phy/lxt.c
+++ b/drivers/net/phy/lxt.c
@@ -32,7 +32,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* The Level one LXT970 is used by many boards				     */
 
diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c
index d470db89e8dd..dbef8002bc28 100644
--- a/drivers/net/phy/qsemi.c
+++ b/drivers/net/phy/qsemi.c
@@ -32,7 +32,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* ------------------------------------------------------------------------- */
 /* The Quality Semiconductor QS6612 is used on the RPX CLLF                  */
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index 9c889e0303dd..feb9569e3345 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -34,7 +34,7 @@
 #include <linux/jiffies.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/string.h>
 
 #define PPP_VERSION	"2.4.2"
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index 925d3e295bac..9ae53986cb4a 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -47,7 +47,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define PPP_VERSION	"2.4.2"
 
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index f017c72bb7fd..d7e405268983 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -83,7 +83,7 @@
 #include <net/netns/generic.h>
 #include <net/sock.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define PPPOE_HASH_BITS 4
 #define PPPOE_HASH_SIZE (1 << PPPOE_HASH_BITS)
diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c
index b9c8be6283d3..c0599b3b23c0 100644
--- a/drivers/net/ppp/pppox.c
+++ b/drivers/net/ppp/pppox.c
@@ -34,7 +34,7 @@
 
 #include <net/sock.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static const struct pppox_proto *pppox_protos[PX_MAX_PROTO + 1];
 
diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c
index 8b8b53259783..7820fced33f6 100644
--- a/drivers/net/sb1000.c
+++ b/drivers/net/sb1000.c
@@ -55,7 +55,7 @@ static char version[] = "sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net)\n";
 
 #include <asm/io.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef SB1000_DEBUG
 static int sb1000_debug = SB1000_DEBUG;
diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
index 27ed25252aac..5782733959f0 100644
--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -75,7 +75,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <linux/timer.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/checksum.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 7e933d8ff811..9841f3dc0682 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -64,7 +64,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/sched.h>
 #include <linux/string.h>
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 57e88b814700..cd8e02c94be0 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -73,7 +73,7 @@
 #include <linux/uio.h>
 #include <linux/skb_array.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Uncomment to enable debugging */
 /* #define TUN_DEBUG 1 */
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index a1f2f6f1e614..3daa41bdd4ea 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -42,7 +42,7 @@
 #include <linux/crc32.h>
 #include <linux/bitops.h>
 #include <linux/gfp.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #undef DEBUG
 
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index 338aed5da14d..876f02f4945e 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -54,7 +54,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/wait.h>
 #include <linux/firmware.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 #undef DEBUG
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 399f7ee57aea..24e803fe9a53 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -42,7 +42,7 @@
 #include <linux/usb.h>
 #include <linux/module.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "pegasus.h"
 
 /*
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 93a1bda1c1e5..95b7bd0d7abc 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -14,7 +14,7 @@
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/usb.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Version Information */
 #define DRIVER_VERSION "v0.6.2 (2004/08/27)"
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index ae6ecf401189..65ee2a6f248c 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -52,7 +52,7 @@
 
 #include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static const char version[] = "DLCI driver v0.35, 4 Jan 1997, mike.mclagan@linux.org";
 
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 7351e5440ed7..799830ffcae2 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -95,7 +95,7 @@
 
 #include <asm/cache.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 03696d35ee9c..33265eb50420 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -30,7 +30,7 @@
 #include <linux/if.h>
 #include <linux/hdlc.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "farsync.h"
 
diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index dc334c85d966..166696d2c496 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -39,7 +39,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "hd64570.h"
 
 #define get_msci(port)	  (phy_node(port) ?   MSCI1_OFFSET :   MSCI0_OFFSET)
diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index e92ecf1d3314..7ef49dab6855 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -39,7 +39,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "hd64572.h"
 
 #define NAPI_WEIGHT		16
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 6676607164d6..9df9ed62beff 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -35,7 +35,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 001b7796740d..4698450c77d1 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -59,7 +59,7 @@
 #include <asm/processor.h>             /* Processor type for cache alignment. */
 #include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 //#include <asm/spinlock.h>
 
 #define DRIVER_MAJOR_VERSION     1
diff --git a/drivers/net/wan/lmc/lmc_media.c b/drivers/net/wan/lmc/lmc_media.c
index ff2e4a5654c7..cffe23bd16e1 100644
--- a/drivers/net/wan/lmc/lmc_media.c
+++ b/drivers/net/wan/lmc/lmc_media.c
@@ -19,7 +19,7 @@
 #include <asm/io.h>
 #include <asm/dma.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "lmc.h"
 #include "lmc_var.h"
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index 3f83be98d469..3ca3419c54a0 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -63,7 +63,7 @@
 #include <asm/types.h>
 #include <asm/byteorder.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "sbni.h"
 
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index 421ac5f85699..236c62538036 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c
@@ -56,7 +56,7 @@
 
 #include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static const char* version = "SDLA driver v0.30, 12 Sep 1996, mike.mclagan@linux.org";
 
diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c
index eb92d5ab7a27..e12f62356fd1 100644
--- a/drivers/net/wireless/atmel/atmel.c
+++ b/drivers/net/wireless/atmel/atmel.c
@@ -48,7 +48,7 @@
 #include <linux/timer.h>
 #include <asm/byteorder.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
index 64176090b196..356aba9d3d53 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
@@ -148,7 +148,7 @@ that only one external action is invoked at a time.
 #include <linux/dma-mapping.h>
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_geo.c b/drivers/net/wireless/intel/ipw2x00/libipw_geo.c
index 218f2a32de21..ce7eda20a68f 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_geo.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_geo.c
@@ -38,7 +38,7 @@
 #include <linux/types.h>
 #include <linux/wireless.h>
 #include <linux/etherdevice.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "libipw.h"
 
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_module.c b/drivers/net/wireless/intel/ipw2x00/libipw_module.c
index 2332075565f2..c58c5b2dcce5 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_module.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_module.c
@@ -46,7 +46,7 @@
 #include <linux/types.h>
 #include <linux/wireless.h>
 #include <linux/etherdevice.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/net_namespace.h>
 #include <net/arp.h>
 
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c
index 1c1ec7bb9302..6df19f03355a 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c
@@ -29,7 +29,7 @@
 #include <linux/types.h>
 #include <linux/wireless.h>
 #include <linux/etherdevice.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/ctype.h>
 
 #include <net/lib80211.h>
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
index e8c039879b05..048f1e3ada11 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
@@ -39,7 +39,7 @@
 #include <linux/types.h>
 #include <linux/wireless.h>
 #include <linux/etherdevice.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "libipw.h"
 
diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c
index a8a9bd8e176a..544ef7adde7d 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_hw.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c
@@ -32,7 +32,7 @@
 
 
 #include <asm/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/slab.h>
 #include <linux/netdevice.h>
diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c
index 1a16b8cb366e..544fc09dcb62 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_main.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_main.c
@@ -27,7 +27,7 @@
 #include <net/net_namespace.h>
 #include <net/iw_handler.h>
 #include <net/lib80211.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "hostap_wlan.h"
 #include "hostap_80211.h"
diff --git a/drivers/net/wireless/intersil/prism54/isl_38xx.c b/drivers/net/wireless/intersil/prism54/isl_38xx.c
index 6700387ef9ab..ce9d4db0d9ca 100644
--- a/drivers/net/wireless/intersil/prism54/isl_38xx.c
+++ b/drivers/net/wireless/intersil/prism54/isl_38xx.c
@@ -21,7 +21,7 @@
 #include <linux/delay.h>
 #include <linux/ktime.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 #include "prismcompat.h"
diff --git a/drivers/net/wireless/intersil/prism54/isl_ioctl.c b/drivers/net/wireless/intersil/prism54/isl_ioctl.c
index 48e8a978a832..334717b0a2be 100644
--- a/drivers/net/wireless/intersil/prism54/isl_ioctl.c
+++ b/drivers/net/wireless/intersil/prism54/isl_ioctl.c
@@ -26,7 +26,7 @@
 #include <linux/pci.h>
 #include <linux/etherdevice.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "prismcompat.h"
 #include "isl_ioctl.h"
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 4fdc7223c894..b94479441b0c 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -53,7 +53,7 @@
 
 #include <asm/io.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Warning : these stuff will slow down the driver... */
 #define WIRELESS_SPY		/* Enable spying addresses */
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index d9d29ab88184..acec0d9ec422 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -51,7 +51,7 @@
 #include <pcmcia/ds.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "wl3501.h"
 
diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c
index 5371b374f1fe..e8f68f5732f1 100644
--- a/drivers/nubus/proc.c
+++ b/drivers/nubus/proc.c
@@ -25,7 +25,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 static int
diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index c0cc4e7ff023..67935fbbbcab 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -18,7 +18,7 @@
 #include <linux/capability.h>
 #include <linux/dcookies.h>
 #include <linux/fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "oprof.h"
 #include "event_buffer.h"
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index 134398e0231b..d77ebbfc67c9 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -15,7 +15,7 @@
 #include <linux/oprofile.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "oprof.h"
 
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 3ed6238f8f6e..553ef8a5d588 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -48,7 +48,7 @@
 
 #include <asm/byteorder.h>
 #include <asm/cache.h>		/* for L1_CACHE_BYTES */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/dma.h>
 #include <asm/io.h>
diff --git a/drivers/parisc/ccio-rm-dma.c b/drivers/parisc/ccio-rm-dma.c
index f78f6f1aef47..1bf988010855 100644
--- a/drivers/parisc/ccio-rm-dma.c
+++ b/drivers/parisc/ccio-rm-dma.c
@@ -40,7 +40,7 @@
 #include <linux/pci.h>
 #include <linux/gfp.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/io.h>
 #include <asm/hardware.h>
diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c
index 783906fe659a..4dd9b1308128 100644
--- a/drivers/parisc/eisa_eeprom.c
+++ b/drivers/parisc/eisa_eeprom.c
@@ -26,7 +26,7 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/eisa_eeprom.h>
 
 #define 	EISA_EEPROM_MINOR 241
diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c
index 21905fef2cbf..d9bffe8d29b9 100644
--- a/drivers/parisc/eisa_enumerator.c
+++ b/drivers/parisc/eisa_enumerator.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 #include <asm/eisa_bus.h>
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index b48243131993..ff1a332d76e4 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -49,7 +49,7 @@
 #include <asm/param.h>		/* HZ */
 #include <asm/led.h>
 #include <asm/pdc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* The control of the LEDs and LCDs on PARISC-machines have to be done 
    completely in software. The necessary calculations are done in a work queue
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index 3651c3871d5b..055f83fddc18 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -68,7 +68,7 @@
 
 #include <asm/pdc.h>
 #include <asm/page.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/hardware.h>
 
 #define PDCS_VERSION	"0.30"
diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c
index 5bed17f68ef4..d998d0ed2bec 100644
--- a/drivers/parport/daisy.c
+++ b/drivers/parport/daisy.c
@@ -26,7 +26,7 @@
 #include <linux/sched.h>
 
 #include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #undef DEBUG
 
diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
index 2e21af43d91e..c0e7d21c88c2 100644
--- a/drivers/parport/ieee1284_ops.c
+++ b/drivers/parport/ieee1284_ops.c
@@ -18,7 +18,7 @@
 #include <linux/parport.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #undef DEBUG /* undef me for production */
 
diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c
index 6e3a60c78873..dd6d4ccb41e4 100644
--- a/drivers/parport/parport_gsc.c
+++ b/drivers/parport/parport_gsc.c
@@ -34,7 +34,7 @@
 
 #include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/superio.h>
 
 #include <linux/parport.h>
diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c
index d763bc9e44c1..4d1d6eaf333d 100644
--- a/drivers/parport/probe.c
+++ b/drivers/parport/probe.c
@@ -10,7 +10,7 @@
 #include <linux/ctype.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static const struct {
 	const char *token;
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 74ed3e459a3e..8ee44a104ac4 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -23,7 +23,7 @@
 #include <linux/sysctl.h>
 #include <linux/device.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index f6221d739f59..68d105aaf4e2 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -35,7 +35,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "acpiphp.h"
 #include "../pci.h"
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index ec009a7dba20..33d300d12411 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -40,7 +40,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
diff --git a/drivers/pci/hotplug/cpqphp_nvram.c b/drivers/pci/hotplug/cpqphp_nvram.c
index c25fc9061059..daae8071a156 100644
--- a/drivers/pci/hotplug/cpqphp_nvram.c
+++ b/drivers/pci/hotplug/cpqphp_nvram.c
@@ -34,7 +34,7 @@
 #include <linux/workqueue.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
 
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 56013d0daf7f..7b0e97be9063 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -42,7 +42,7 @@
 #include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "../pci.h"
 #include "cpci_hotplug.h"
 
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 2408abe4ee8c..f82710a8694d 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -11,7 +11,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/capability.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 #include "pci.h"
 
diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c
index b91c4da68365..9bf993e1f71e 100644
--- a/drivers/pci/syscall.c
+++ b/drivers/pci/syscall.c
@@ -10,7 +10,7 @@
 #include <linux/errno.h>
 #include <linux/pci.h>
 #include <linux/syscalls.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "pci.h"
 
 SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn,
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index c890a49587e4..aa2ee51d3547 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -68,7 +68,7 @@
 #include <linux/poll.h>
 #include <linux/miscdevice.h>
 #endif
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <acpi/video.h>
 
 #define dprintk(fmt, ...)			\
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index aa65a857a6b1..cacb43fb1df7 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -82,7 +82,7 @@
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/initval.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <acpi/video.h>
 
 /* ThinkPad CMOS commands */
diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c
index 4b6808ff0e5d..5c5b3d47b5f6 100644
--- a/drivers/pnp/interface.c
+++ b/drivers/pnp/interface.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "base.h"
 
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index c212db0fc65d..5ee6b2a5f8d5 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -26,7 +26,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "pnpbios.h"
 
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 84ca314c87e3..dd46e96a3034 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -20,7 +20,7 @@
 #include <linux/slab.h>
 
 #include <asm/debug.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ipl.h>
 
 /* This is ugly... */
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 67bf50c9946f..ade04216c970 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -26,7 +26,7 @@
 #include <asm/idals.h>
 #include <asm/ebcdic.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cio.h>
 #include <asm/ccwdev.h>
 #include <asm/itcw.h>
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index 6c5d671304b4..8713fefd794b 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -20,7 +20,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 #include <asm/ebcdic.h>
 
diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c
index 113c1c1fa1af..9e3419124264 100644
--- a/drivers/s390/block/dasd_erp.c
+++ b/drivers/s390/block/dasd_erp.c
@@ -15,7 +15,7 @@
 
 #include <asm/debug.h>
 #include <asm/ebcdic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* This is ugly... */
 #define PRINTK_HEADER "dasd_erp:"
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index e2fa759bf2ad..8b1341fb2e0d 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -16,7 +16,7 @@
 #include <linux/fs.h>
 #include <linux/blkpg.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* This is ugly... */
 #define PRINTK_HEADER "dasd_gendisk:"
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 9dfbd972f844..ec65c1e51c2a 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -21,7 +21,7 @@
 #include <asm/ccwdev.h>
 #include <asm/schid.h>
 #include <asm/cmb.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* This is ugly... */
 #define PRINTK_HEADER "dasd_ioctl:"
diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index bad7a196bf84..70dc2c4cd3f7 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c
@@ -20,7 +20,7 @@
 #include <linux/proc_fs.h>
 
 #include <asm/debug.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* This is ugly... */
 #define PRINTK_HEADER "dasd_proc:"
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 288f59a4147b..b9d7e755c8a3 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -41,7 +41,7 @@
 #include <linux/suspend.h>
 #include <linux/platform_device.h>
 #include <linux/gfp.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define XPRAM_NAME	"xpram"
 #define XPRAM_DEVS	1	/* one partition */
diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 1b8d825623bd..9ec4ae056158 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c
@@ -25,7 +25,7 @@
 #include <asm/cio.h>
 #include <asm/io.h>
 #include <asm/ebcdic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/delay.h>
 #include <asm/cpcmd.h>
 #include <asm/setup.h>
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 7b9c50aa4cc9..82c913318b73 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -14,7 +14,7 @@
 #include <linux/consolemap.h>
 #include <linux/kbd_kern.h>
 #include <linux/kbd_diacr.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "keyboard.h"
 
diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c
index ebdeaa53182d..027ac6ae5eea 100644
--- a/drivers/s390/char/monreader.c
+++ b/drivers/s390/char/monreader.c
@@ -23,7 +23,7 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <net/iucv/iucv.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ebcdic.h>
 #include <asm/extmem.h>
 
diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c
index 9b5d1138b2e2..571a7e352755 100644
--- a/drivers/s390/char/monwriter.c
+++ b/drivers/s390/char/monwriter.c
@@ -21,7 +21,7 @@
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ebcdic.h>
 #include <asm/io.h>
 #include <asm/appldata.h>
diff --git a/drivers/s390/char/sclp_rw.c b/drivers/s390/char/sclp_rw.c
index 6010cd347a08..91b26df5227d 100644
--- a/drivers/s390/char/sclp_rw.c
+++ b/drivers/s390/char/sclp_rw.c
@@ -13,7 +13,7 @@
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/ctype.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "sclp.h"
 #include "sclp_rw.h"
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index 9259017a1295..236b736ae136 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/gfp.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "ctrlchar.h"
 #include "sclp.h"
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index 68d6ee7ae504..095481d32236 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -26,7 +26,7 @@
 #include <linux/reboot.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "sclp.h"
 #include "ctrlchar.h"
 
diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c
index 77f9b9c2f701..46ac1164f242 100644
--- a/drivers/s390/char/tape_char.c
+++ b/drivers/s390/char/tape_char.c
@@ -18,7 +18,7 @@
 #include <linux/mtio.h>
 #include <linux/compat.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define TAPE_DBF_AREA	tape_core_dbf
 
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index 272cb6cd1b2a..e5ebe2fbee23 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -24,7 +24,7 @@
 #include <asm/ccwdev.h>
 #include <asm/cio.h>
 #include <asm/ebcdic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "raw3270.h"
 #include "tty3270.h"
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 2a67b496a9e2..65f5a794f26d 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -21,7 +21,7 @@
 #include <asm/compat.h>
 #include <asm/cpcmd.h>
 #include <asm/debug.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "vmcp.h"
 
 static debug_info_t *vmcp_debug;
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index 3167e8581994..57974a1e0e03 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -21,7 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cpcmd.h>
 #include <asm/debug.h>
 #include <asm/ebcdic.h>
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index ff18f373af9a..04aceb694d51 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -15,7 +15,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cio.h>
 #include <asm/ccwdev.h>
 #include <asm/debug.h>
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index f771e5e9e26b..d3b51edb056e 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -23,7 +23,7 @@
 #include <asm/ipl.h>
 #include <asm/sclp.h>
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/debug.h>
 #include <asm/processor.h>
 #include <asm/irqflags.h>
diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c
index 9082476b51db..bf7f5d4c50e1 100644
--- a/drivers/s390/cio/blacklist.c
+++ b/drivers/s390/cio/blacklist.c
@@ -17,7 +17,7 @@
 #include <linux/ctype.h>
 #include <linux/device.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cio.h>
 #include <asm/ipl.h>
 
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 854a6e58dfea..51eece9af577 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -36,7 +36,7 @@
 #include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/hw_random.h>
 #include <linux/debugfs.h>
 #include <asm/debug.h>
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
index c7d48a18199e..b97c5d5ee5a4 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.c
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -30,7 +30,7 @@
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/mod_devicetable.h>
 
 #include "ap_bus.h"
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c
index 26ceaa696765..600604782b65 100644
--- a/drivers/s390/crypto/zcrypt_pcixcc.c
+++ b/drivers/s390/crypto/zcrypt_pcixcc.c
@@ -31,7 +31,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/mod_devicetable.h>
 
 #include "ap_bus.h"
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 2981024a2438..3f85b97ab8d2 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -62,7 +62,7 @@
 #include <net/dst.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ebcdic.h>
 
 #include <net/iucv/iucv.h>
diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index 33fbe8249fd5..04efed171c88 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -17,7 +17,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>		/* put_/get_user			*/
+#include <linux/uaccess.h>		/* put_/get_user			*/
 #include <asm/io.h>
 
 #include <asm/display7seg.h>
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 5609b602c54d..56e962a01493 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -29,7 +29,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/envctrl.h>
 #include <asm/io.h>
 
diff --git a/drivers/sbus/char/flash.c b/drivers/sbus/char/flash.c
index 206ef4232adf..216f923161d1 100644
--- a/drivers/sbus/char/flash.c
+++ b/drivers/sbus/char/flash.c
@@ -15,7 +15,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/upa.h>
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index a40ee1e37486..6ff61dad5e21 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -37,7 +37,7 @@
 #include <linux/string.h>
 #include <linux/genhd.h>
 #include <linux/blkdev.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/pcic.h>
diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c
index 4612691c6619..2c2e6a3b4c7e 100644
--- a/drivers/sbus/char/openprom.c
+++ b/drivers/sbus/char/openprom.c
@@ -40,7 +40,7 @@
 #include <linux/fs.h>
 #include <asm/oplib.h>
 #include <asm/prom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/openpromio.h>
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 316f87fe3299..00e7968a1d70 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -92,7 +92,7 @@
 #include <linux/slab.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_tcq.h>
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 970d8fa6bd53..b150e131b2e7 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -64,7 +64,7 @@
 #include <linux/slab.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_tcq.h>
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index aa412ab02765..33261b690774 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -210,7 +210,7 @@
 #include <linux/mutex.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_tcq.h>
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 6678d1fd897b..1ee7c654f7b8 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -32,7 +32,7 @@
 #include <linux/slab.h>
 #include <linux/completion.h>
 #include <linux/blkdev.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/highmem.h> /* For flush_kernel_dcache_page */
 #include <linux/module.h>
 
diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index 5648b715fed9..e1daff230c7d 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -41,7 +41,7 @@
 #include <linux/delay.h> /* ssleep prototype */
 #include <linux/kthread.h>
 #include <linux/semaphore.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <scsi/scsi_host.h>
 
 #include "aacraid.h"
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index 9e45749d55ed..af032c46ec0e 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -61,7 +61,7 @@
 #include <linux/circ_buf.h>
 #include <asm/dma.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index 9d253cb83ee7..d9e15210b110 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -27,7 +27,7 @@
 #include <linux/fs.h>
 #include <linux/pci.h>
 #include <linux/firmware.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/fcntl.h>
 
 #include "bfad_drv.h"
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index 27c0dce22e72..5f75e638ec95 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -37,7 +37,7 @@ MODULE_DESCRIPTION("Adaptec I2O RAID Driver");
 ////////////////////////////////////////////////////////////////
 
 #include <linux/ioctl.h>	/* For SCSI-Passthrough */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/stat.h>
 #include <linux/slab.h>		/* for kmalloc() */
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 0a767740bf02..d020a13646ae 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -130,7 +130,7 @@
 
 #include <asm/dma.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
 #include <linux/scatterlist.h>
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index a83f705ed8a5..db17ad15b0c1 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -26,7 +26,7 @@
 #include <linux/timer.h>
 #include <linux/spinlock.h>
 #include <linux/gfp.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/div64.h>
 #include <scsi/scsi_cmnd.h>
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index 45b9566b928e..b782bb60baf0 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -51,7 +51,7 @@
    #define _IPS_H_
 
 #include <linux/nmi.h>
-   #include <asm/uaccess.h>
+#include <linux/uaccess.h>
    #include <asm/io.h>
 
    /*
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 9d05302a3bcd..3c63c292cb92 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -34,7 +34,7 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
diff --git a/drivers/scsi/megaraid/megaraid_mm.h b/drivers/scsi/megaraid/megaraid_mm.h
index 55b425c0a654..a30e725f2d5c 100644
--- a/drivers/scsi/megaraid/megaraid_mm.h
+++ b/drivers/scsi/megaraid/megaraid_mm.h
@@ -17,7 +17,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 6484c382f670..d5cf15eb8c5e 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -42,7 +42,7 @@
 #include <linux/delay.h>
 #include <linux/uio.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fs.h>
 #include <linux/compat.h>
 #include <linux/blkdev.h>
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index a2960f5d98ec..e8196c55b633 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -52,7 +52,7 @@ static const char * osst_version = "0.99.4";
 #include <linux/delay.h>
 #include <linux/jiffies.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/dma.h>
 
 /* The driver prints some debugging information on the console if DEBUG
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 9f6012b78e56..b4336e0cd85f 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -9,7 +9,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * NVRAM support routines
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index c4f7b56fa6f6..8b8c814df5c7 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -12,7 +12,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/string.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index 7a74b82e8973..480a597b3877 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -26,7 +26,7 @@
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
 #include <linux/gfp.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1622e23138e0..b1933041da39 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -53,7 +53,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/pr.h>
 #include <linux/t10-pi.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 #include <scsi/scsi.h>
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index bed2bbd6b923..94352e4df831 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -46,7 +46,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_dbg.h>
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 03054c0e7689..dfffdf63e44c 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -10,7 +10,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_dbg.h>
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 605887d5ee57..5f35b863e1a7 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -41,7 +41,7 @@ static const char *verstr = "20160209";
 #include <linux/delay.h>
 #include <linux/mutex.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/dma.h>
 
 #include <scsi/scsi.h>
diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c
index dfbb974927f2..dea16bb8c46a 100644
--- a/drivers/tty/amiserial.c
+++ b/drivers/tty/amiserial.c
@@ -127,7 +127,7 @@ static struct serial_state rs_table[1];
 
 #define NR_PORTS ARRAY_SIZE(rs_table)
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define serial_isroot()	(capable(CAP_SYS_ADMIN))
 
diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index ce864875330e..9b5c0fb216b5 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -42,7 +42,7 @@
 #include <linux/slab.h>
 #include <linux/serial_core.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "hvc_console.h"
 
diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
index 3c4d7c2b4ade..7823d6d998cf 100644
--- a/drivers/tty/hvc/hvcs.c
+++ b/drivers/tty/hvc/hvcs.c
@@ -81,7 +81,7 @@
 #include <linux/tty_flip.h>
 #include <asm/hvconsole.h>
 #include <asm/hvcserver.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/vio.h>
 
 /*
diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c
index 96ce6bd1cc6f..2e578d6433af 100644
--- a/drivers/tty/hvc/hvsi.c
+++ b/drivers/tty/hvc/hvsi.c
@@ -46,7 +46,7 @@
 #include <asm/hvcall.h>
 #include <asm/hvconsole.h>
 #include <asm/prom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/vio.h>
 #include <asm/param.h>
 #include <asm/hvsi.h>
diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c
index 60d37b225589..4caf0c3b1f99 100644
--- a/drivers/tty/moxa.c
+++ b/drivers/tty/moxa.c
@@ -47,7 +47,7 @@
 #include <linux/ratelimit.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "moxa.h"
 
diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
index 69294ae154be..7b8f383fb090 100644
--- a/drivers/tty/mxser.c
+++ b/drivers/tty/mxser.c
@@ -43,7 +43,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "mxser.h"
 
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index a7fa016f31eb..eb278832f5ce 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -103,7 +103,7 @@
 #include <linux/bitops.h>
 
 #include <asm/termios.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Buffers for individual HDLC frames
diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index 345111467b85..305b6490d405 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -65,7 +65,7 @@
 #include <linux/n_r3964.h>
 #include <linux/poll.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*#define DEBUG_QUEUE*/
 
diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c
index c60a8d5e4020..d83783cfbade 100644
--- a/drivers/tty/serial/icom.c
+++ b/drivers/tty/serial/icom.c
@@ -53,7 +53,7 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "icom.h"
 
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index d0847375ea64..9939c3d9912b 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -36,7 +36,7 @@
 #include <linux/mutex.h>
 
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * This is used to lock changes in serial line configuration.
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 415885c56435..657eed82eeb3 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -107,7 +107,7 @@
 #define PUT_USER(error,value,addr) error = put_user(value,addr)
 #define COPY_TO_USER(error,dest,src,size) error = copy_to_user(dest,src,size) ? -EFAULT : 0
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define RCLRVALUE 0xffff
 
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 8267bcf2405e..31885f20fc15 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -77,7 +77,7 @@
 #include <asm/irq.h>
 #include <asm/dma.h>
 #include <asm/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #if defined(CONFIG_HDLC) || (defined(CONFIG_HDLC_MODULE) && defined(CONFIG_SYNCLINK_GT_MODULE))
 #define SYNCLINK_GENERIC_HDLC 1
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index d66620f7eaa3..51e8846cd68f 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -79,7 +79,7 @@
 #define PUT_USER(error,value,addr) error = put_user(value,addr)
 #define COPY_TO_USER(error,dest,src,size) error = copy_to_user(dest,src,size) ? -EFAULT : 0
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static MGSL_PARAMS default_params = {
 	MGSL_MODE_HDLC,			/* unsigned long mode */
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index bf36ac9aee41..f27fc0f14c11 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -22,7 +22,7 @@
 #include <linux/compat.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #undef TTY_DEBUG_WAIT_UNTIL_SENT
 
diff --git a/drivers/tty/vt/consolemap.c b/drivers/tty/vt/consolemap.c
index 71e81406ef71..1f6e17fc3fb0 100644
--- a/drivers/tty/vt/consolemap.c
+++ b/drivers/tty/vt/consolemap.c
@@ -29,7 +29,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/tty.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/console.h>
 #include <linux/consolemap.h>
 #include <linux/vt_kern.h>
diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
index 368ce1803e8f..36e1b8c7680f 100644
--- a/drivers/tty/vt/selection.c
+++ b/drivers/tty/vt/selection.c
@@ -16,7 +16,7 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/kbd_kern.h>
 #include <linux/vt_kern.h>
diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index 14a2b5f11bca..56dcff6059d3 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -39,7 +39,7 @@
 #include <linux/slab.h>
 #include <linux/notifier.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index f62c598810ff..a56edf2d58eb 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -29,7 +29,7 @@
 #include <linux/timex.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/kbd_kern.h>
 #include <linux/vt_kern.h>
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index 4dec9df8764b..5a59da0dc98a 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -64,7 +64,7 @@
 
 #include "usbatm.h"
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/crc32.h>
 #include <linux/errno.h>
 #include <linux/init.h>
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 143454ea385b..1fa5c0f29c64 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -29,7 +29,7 @@
 #include <linux/random.h>
 #include <linux/pm_qos.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 #include "hub.h"
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 10b2576f8b6a..e8f4102d19df 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -20,7 +20,7 @@
 #include <linux/uts.h>
 #include <linux/wait.h>
 #include <linux/compiler.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index 5d3d914ab4fb..683098afa93e 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -42,7 +42,7 @@
 #include <linux/bitops.h>
 #include <linux/dmi.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 
diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c
index 9a82f8308ad7..01a9373b7e18 100644
--- a/drivers/usb/misc/ftdi-elan.c
+++ b/drivers/usb/misc/ftdi-elan.c
@@ -48,7 +48,7 @@
 #include <linux/module.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/workqueue.h>
 #include <linux/platform_device.h>
diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
index 2975e80b7a56..debc1fd74b0d 100644
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -23,7 +23,7 @@
 #include <linux/module.h>
 #include <linux/completion.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/usb.h>
 
 /* image constants */
diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c
index 9ca595632f17..3bc5356832db 100644
--- a/drivers/usb/misc/ldusb.c
+++ b/drivers/usb/misc/ldusb.c
@@ -28,7 +28,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/input.h>
 #include <linux/usb.h>
 #include <linux/poll.h>
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index c8fbe7b739a0..b10e26c74a90 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -83,7 +83,7 @@
 #include <linux/module.h>
 #include <linux/completion.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/poll.h>
 
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 1a874a1f3890..91c22276c03b 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -20,7 +20,7 @@
 #include <linux/slab.h>
 #include <linux/time64.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "usb_mon.h"
 
diff --git a/drivers/usb/mon/mon_stat.c b/drivers/usb/mon/mon_stat.c
index 5388a339cfb8..5bdf73a57498 100644
--- a/drivers/usb/mon/mon_stat.c
+++ b/drivers/usb/mon/mon_stat.c
@@ -12,7 +12,7 @@
 #include <linux/export.h>
 #include <linux/usb.h>
 #include <linux/fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "usb_mon.h"
 
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index e59334b09c41..db1a4abf2806 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -14,7 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/scatterlist.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "usb_mon.h"
 
diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c
index 9b22d946c089..4fef50e5c8c1 100644
--- a/drivers/usb/musb/musb_debugfs.c
+++ b/drivers/usb/musb/musb_debugfs.c
@@ -37,7 +37,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "musb_core.h"
 #include "musb_debug.h"
diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index 1e11614322fe..42d02a206059 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/gio_device.h>
diff --git a/drivers/video/fbdev/68328fb.c b/drivers/video/fbdev/68328fb.c
index 17f21cedff9b..c0c6b88d3839 100644
--- a/drivers/video/fbdev/68328fb.c
+++ b/drivers/video/fbdev/68328fb.c
@@ -35,7 +35,7 @@
 #include <linux/vmalloc.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 
diff --git a/drivers/video/fbdev/hitfb.c b/drivers/video/fbdev/hitfb.c
index 9d68dc9ee7bf..abe3e54d4506 100644
--- a/drivers/video/fbdev/hitfb.c
+++ b/drivers/video/fbdev/hitfb.c
@@ -22,7 +22,7 @@
 #include <linux/fb.h>
 
 #include <asm/machvec.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/hd64461.h>
diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c
index 9476d196f510..16f16f5e1a4b 100644
--- a/drivers/video/fbdev/hpfb.c
+++ b/drivers/video/fbdev/hpfb.c
@@ -16,7 +16,7 @@
 #include <linux/dio.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static struct fb_info fb_info = {
 	.fix = {
diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c
index 8778e01cebac..1c3c7ab26a95 100644
--- a/drivers/video/fbdev/mx3fb.c
+++ b/drivers/video/fbdev/mx3fb.c
@@ -33,7 +33,7 @@
 #include <linux/platform_data/video-mx3fb.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define MX3FB_NAME		"mx3_sdc_fb"
 
diff --git a/drivers/video/fbdev/q40fb.c b/drivers/video/fbdev/q40fb.c
index 7487f76f6275..04ea330ccf5d 100644
--- a/drivers/video/fbdev/q40fb.c
+++ b/drivers/video/fbdev/q40fb.c
@@ -18,7 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/setup.h>
 #include <asm/q40_master.h>
 #include <linux/fb.h>
diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c
index d0a4e2f79a57..d80bc8a3200f 100644
--- a/drivers/video/fbdev/sm501fb.c
+++ b/drivers/video/fbdev/sm501fb.c
@@ -31,7 +31,7 @@
 #include <linux/console.h>
 #include <linux/io.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/div64.h>
 
 #ifdef CONFIG_PM
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 7df4228e25f0..accfef71e984 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -67,7 +67,7 @@
 #include <linux/io.h>
 
 #include <asm/grfioctl.h>	/* for HP-UX compatibility */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "sticore.h"
 
diff --git a/drivers/video/fbdev/w100fb.c b/drivers/video/fbdev/w100fb.c
index 10951c82f6ed..d570e19a2864 100644
--- a/drivers/video/fbdev/w100fb.c
+++ b/drivers/video/fbdev/w100fb.c
@@ -35,7 +35,7 @@
 #include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <video/w100fb.h>
 #include "w100fb.h"
 
diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c
index 6ac2579da0eb..05397305fccd 100644
--- a/drivers/zorro/proc.c
+++ b/drivers/zorro/proc.c
@@ -16,7 +16,7 @@
 #include <linux/export.h>
 
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/amigahw.h>
 #include <asm/setup.h>
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index d7b78d531e63..6a0f3fa85ef7 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -34,7 +34,7 @@
 #include <linux/list.h>
 #include <linux/pagemap.h>
 #include <linux/utsname.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/idr.h>
 #include <linux/uio.h>
 #include <linux/slab.h>
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 2853b4095344..35efb9a31dd7 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -14,7 +14,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 
 static struct proc_dir_entry *proc_afs;
diff --git a/fs/aio.c b/fs/aio.c
index 8c79e1a53af9..955c5241a8f2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -42,7 +42,7 @@
 #include <linux/mount.h>
 
 #include <asm/kmap_types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "internal.h"
 
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 80ef38c73e5a..3168ee4e77f4 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -20,7 +20,7 @@
 #include <linux/magic.h>
 #include <linux/anon_inodes.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static struct vfsmount *anon_inode_mnt __read_mostly;
 static struct inode *anon_inode_inode;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 1e5c896f6b79..f2deec0a62f0 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -16,7 +16,7 @@
 #include <linux/vfs.h>
 #include <linux/writeback.h>
 #include <linux/uio.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "bfs.h"
 
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ae1b5404fced..2a59139f520b 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -26,7 +26,7 @@
 #include <linux/coredump.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/a.out-core.h>
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e6c1bd443806..29a02daf08a9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -36,7 +36,7 @@
 #include <linux/coredump.h>
 #include <linux/sched.h>
 #include <linux/dax.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 464a972e88c1..d2e36f82c35d 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -37,7 +37,7 @@
 #include <linux/coredump.h>
 #include <linux/dax.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/param.h>
 #include <asm/pgalloc.h>
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7c4507224ed6..6254cee8f8f3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -32,7 +32,7 @@
 #include <linux/badblocks.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/falloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 
 struct bdev_inode {
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 3d03e48a9213..9727e1dcacd5 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -24,7 +24,7 @@
 #include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifsproto.h"
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e3fed9249a04..b47261858e6d 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -35,7 +35,7 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/task_io_accounting_ops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifsacl.h"
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b822bf364c2b..35ae49ed1f76 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -34,7 +34,7 @@
 #include <linux/pagevec.h>
 #include <linux/freezer.h>
 #include <linux/namei.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>
 #include <linux/inet.h>
 #include <linux/module.h>
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 5f02edc819af..fbb84c08e3cd 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -30,7 +30,7 @@
 #include <linux/tcp.h>
 #include <linux/bvec.h>
 #include <linux/highmem.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>
 #include <linux/mempool.h>
 #include "cifspdu.h"
diff --git a/fs/compat.c b/fs/compat.c
index 3f4908c28698..e50a2114f474 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -49,7 +49,7 @@
 #include <linux/pagemap.h>
 #include <linux/aio.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/ioctls.h>
 #include "internal.h"
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index f2d7402abe02..11d087b2b28e 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -76,7 +76,7 @@
 #include <scsi/sg.h>
 #endif
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/ethtool.h>
 #include <linux/mii.h>
 #include <linux/if_bonding.h>
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 2c6312db8516..39da1103d341 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -29,7 +29,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/vmalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/configfs.h>
 #include "configfs_internal.h"
diff --git a/fs/coredump.c b/fs/coredump.c
index eb9c92c9b20f..e525b6017cdf 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -38,7 +38,7 @@
 #include <linux/path.h>
 #include <linux/timekeeping.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
 #include <asm/exec.h>
diff --git a/fs/dcache.c b/fs/dcache.c
index 252378359a8f..769903dbc19d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -26,7 +26,7 @@
 #include <linux/export.h>
 #include <linux/mount.h>
 #include <linux/file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/security.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
diff --git a/fs/dcookies.c b/fs/dcookies.c
index a26a701ef512..0d0461cf2431 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -26,7 +26,7 @@
 #include <linux/mutex.h>
 #include <linux/path.h>
 #include <linux/compat.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* The dcookies are allocated from a kmem_cache and
  * hashed onto a small number of lists. None of the
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index b670f5601fbb..748e8d59e611 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -38,7 +38,7 @@
 #include <linux/mutex.h>
 #include <linux/idr.h>
 #include <linux/ratelimit.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/dlm.h>
 #include "config.h"
diff --git a/fs/efs/efs.h b/fs/efs/efs.h
index 5bbf9612140c..70f5d4f9a945 100644
--- a/fs/efs/efs.h
+++ b/fs/efs/efs.h
@@ -14,7 +14,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define EFS_VERSION "1.0a"
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 10db91218933..bcb68fcc8445 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,7 +34,7 @@
 #include <linux/mutex.h>
 #include <linux/anon_inodes.h>
 #include <linux/device.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/mman.h>
 #include <linux/atomic.h>
diff --git a/fs/exec.c b/fs/exec.c
index eadbf5069c38..e57946610733 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -58,7 +58,7 @@
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
 
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 9d617423e936..191e02b28ce8 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -14,7 +14,7 @@
 #include <linux/compat.h>
 #include <linux/mount.h>
 #include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 6cb042b53b5b..9e25a71fe1a2 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,7 +31,7 @@
 #include <linux/mount.h>
 #include <linux/log2.h>
 #include <linux/quotaops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b1f8416923ab..3e295d3350a9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -37,7 +37,7 @@
 #include <linux/quotaops.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fiemap.h>
 #include <linux/backing-dev.h>
 #include "ext4_jbd2.h"
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 49fd1371bfa2..d534399cf607 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -15,7 +15,7 @@
 #include <linux/file.h>
 #include <linux/quotaops.h>
 #include <linux/uuid.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 63a6b6332682..66845a08a87a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -38,7 +38,7 @@
 #include <linux/log2.h>
 #include <linux/crc16.h>
 #include <linux/cleancache.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6e2771c210f6..e1c54f20325c 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -25,7 +25,7 @@
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
 
diff --git a/fs/fhandle.c b/fs/fhandle.c
index ca3c3dd01789..5559168d5637 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -8,7 +8,7 @@
 #include <linux/fs_struct.h>
 #include <linux/fsnotify.h>
 #include <linux/personality.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 #include "mount.h"
 
diff --git a/fs/filesystems.c b/fs/filesystems.c
index c5618db110be..cac75547d35c 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Handling of filesystem drivers list.
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e23ff70b3435..016c11eaca7c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -22,7 +22,7 @@
 #include <linux/swap.h>
 #include <linux/crc32.h>
 #include <linux/writeback.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/dlm.h>
 #include <linux/dlm_plock.h>
 #include <linux/delay.h>
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 14cbf60167a7..f7b3ba61add5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -21,7 +21,7 @@
 #include <linux/list.h>
 #include <linux/wait.h>
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/kthread.h>
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6cd9f84967b8..eb7724b8578a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -19,7 +19,7 @@
 #include <linux/crc32.h>
 #include <linux/fiemap.h>
 #include <linux/security.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "gfs2.h"
 #include "incore.h"
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index c9ff1cf7d4f3..f8d30e41d1d3 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -15,7 +15,7 @@
 #include <linux/buffer_head.h>
 #include <linux/module.h>
 #include <linux/kobject.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/genhd.h>
 
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index aee4485ad8a9..763d659db91b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -14,7 +14,7 @@
 #include <linux/buffer_head.h>
 #include <linux/crc32.h>
 #include <linux/gfs2_ondisk.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "gfs2.h"
 #include "incore.h"
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index a4a577088d19..d87721aeb575 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -14,7 +14,7 @@
 #include <linux/xattr.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/posix_acl_xattr.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "gfs2.h"
 #include "incore.h"
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 4cdec5a19347..6d0783e2e276 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -23,7 +23,7 @@
 #include <linux/workqueue.h>
 
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "hfs.h"
 
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 99627f8a0a18..0a156d84e67d 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -16,7 +16,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "hfsplus_fs.h"
 
 /*
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4fb7b10f3a05..54de77e78775 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -37,7 +37,7 @@
 #include <linux/migrate.h>
 #include <linux/uio.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static const struct super_operations hugetlbfs_ops;
 static const struct address_space_operations hugetlbfs_aops;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8ed971eeab44..a097048ed1a3 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -47,7 +47,7 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/jbd2.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 
 #ifdef CONFIG_JBD2_DEBUG
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index b6fd1ff29ddf..fc89f9436784 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -13,7 +13,7 @@
 #include <linux/sched.h>
 #include <linux/blkdev.h>
 #include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "jfs_filsys.h"
 #include "jfs_debug.h"
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index a37eb5f8cbc0..a70907606025 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -22,7 +22,7 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_debug.h"
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 85671f7f8518..2be7c9ce6663 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -31,7 +31,7 @@
 #include <linux/exportfs.h>
 #include <linux/crc32.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/seq_file.h>
 #include <linux/blkdev.h>
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 6637aa60c1da..e973cd51f126 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -16,7 +16,7 @@
 #include <linux/writeback.h>
 #include <linux/buffer_head.h> /* sync_mapping_buffers */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "internal.h"
 
diff --git a/fs/locks.c b/fs/locks.c
index 22c5b4aa4961..26811321d39b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -131,7 +131,7 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/filelock.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
diff --git a/fs/namei.c b/fs/namei.c
index d9fc7617b9e4..ad74877e1442 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -37,7 +37,7 @@
 #include <linux/hash.h>
 #include <linux/bitops.h>
 #include <linux/init_task.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "internal.h"
 #include "mount.h"
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 6df2a3827574..088f52484d6e 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -18,7 +18,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/namei.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 #include "ncp_fs.h"
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 83ca77231707..76965e772264 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -8,7 +8,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/time.h>
 #include <linux/kernel.h>
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index ba611bf1aff3..7eb89c23c847 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -13,7 +13,7 @@
 
 #include <linux/module.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 #include <linux/time.h>
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 0a3f9b594602..4434e4977cf3 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -20,7 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "ncp_fs.h"
 
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 33b873b259a8..39f57bef8531 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -18,7 +18,7 @@
 #include <linux/fcntl.h>
 #include <linux/memcontrol.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "ncp_fs.h"
 
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 17cfb743b5bf..b4c87cfcee95 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -21,7 +21,7 @@
 #include <linux/fcntl.h>
 #include <linux/pagemap.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 #include <asm/string.h>
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 471bc3d1139e..f32f272ee501 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -16,7 +16,7 @@
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/in.h>
 #include <linux/net.h>
 #include <linux/mm.h>
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index 421b6f91e8ec..a6d26b46fc05 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -21,7 +21,7 @@
  */
 
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/errno.h>
 #include <linux/fs.h>
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index be88bcdca692..aab32fc3d6a8 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -52,7 +52,7 @@
 #include <linux/nfs_page.h>
 #include <linux/sunrpc/clnt.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 
 #include "internal.h"
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 157cb43ce9db..26dbe8b0c10d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -29,7 +29,7 @@
 #include <linux/gfp.h>
 #include <linux/swap.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "delegation.h"
 #include "internal.h"
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index a608ffd28acc..391dafaf9182 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -30,7 +30,7 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "internal.h"
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 011e4f8c1e01..5ca4d96b1942 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -39,7 +39,7 @@
 #include <linux/compat.h>
 #include <linux/freezer.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "nfs4_fs.h"
 #include "callback.h"
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ddce94ce8142..6bca17883b93 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -55,7 +55,7 @@
 #include <linux/nsproxy.h>
 #include <linux/rcupdate.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "nfs4_fs.h"
 #include "callback.h"
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6e761f3f4cbf..b00d53d13d47 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -24,7 +24,7 @@
 #include <linux/freezer.h>
 #include <linux/wait.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "delegation.h"
 #include "internal.h"
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index c16bf5af6831..34c1c449fddf 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/nsproxy.h>
 #include <linux/sunrpc/addr.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "state.h"
 #include "netns.h"
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7a21abe7caf7..26c6fdb4bf67 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -26,7 +26,7 @@
 #include <linux/jhash.h>
 #include <linux/ima.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/exportfs.h>
 #include <linux/writeback.h>
 #include <linux/security.h>
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 99510d811a8c..358ed7e1195a 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -30,7 +30,7 @@
 #include <linux/writeback.h>
 
 #include <asm/page.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "attrib.h"
 #include "bitmap.h"
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index dfe162f5fd4c..d331c2386b94 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -24,7 +24,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "masklog.h"
 
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 8abab16b4602..d4b5c81f0445 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -62,7 +62,7 @@
 #include <linux/export.h>
 #include <net/tcp.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "heartbeat.h"
 #include "tcp.h"
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1079fae5aa12..9ab9e1892b5f 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -45,7 +45,7 @@
 #include <linux/backing-dev.h>
 #include <linux/poll.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "stackglue.h"
 #include "userdlm.h"
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index c9e828ec3c8e..dae9eb7c441e 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "stackglue.h"
 
diff --git a/fs/open.c b/fs/open.c
index d3ed8171e8e0..9921f70bc5ca 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -19,7 +19,7 @@
 #include <linux/mount.h>
 #include <linux/fcntl.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fs.h>
 #include <linux/personality.h>
 #include <linux/pagemap.h>
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index c003a667ed1a..13215f26e321 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -16,7 +16,7 @@
 #include <asm/openprom.h>
 #include <asm/oplib.h>
 #include <asm/prom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static DEFINE_MUTEX(op_mutex);
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 8e0d9f26dfad..73b84baf58f8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -23,7 +23,7 @@
 #include <linux/fcntl.h>
 #include <linux/memcontrol.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ioctls.h>
 
 #include "internal.h"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5ea836362870..8e7e61b28f31 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -47,7 +47,7 @@
  *  Overall revision about smaps.
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/errno.h>
 #include <linux/time.h>
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 7eb3cefcf2a3..f6a01f09f79d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -22,7 +22,7 @@
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "internal.h"
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 873300164dc6..842a5ff5b85c 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -24,7 +24,7 @@
 #include <linux/mount.h>
 #include <linux/magic.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "internal.h"
 
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 5c89a07e3d7f..0b80ad87b4d6 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,7 +23,7 @@
 #include <linux/bootmem.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <linux/list.h>
 #include <linux/ioport.h>
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 05f8dcdb086e..f9387bb7631b 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -14,7 +14,7 @@
 #include <linux/fs.h>
 #include <linux/syslog.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 
 extern wait_queue_head_t log_wait;
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index f8595e8b5cd0..75634379f82e 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -25,7 +25,7 @@
 #include <linux/seq_file.h>
 #include <linux/hugetlb.h>
 #include <linux/vmalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
 #include <asm/div64.h>
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 3ecd445e830d..a2066e6dee90 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -13,7 +13,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
 #include <linux/kernel-page-flags.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 
 #define KPMSIZE sizeof(u64)
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 7ae6b1da7cab..ffd72a6c6e04 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -8,7 +8,7 @@
  *  proc net directory handling functions
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/errno.h>
 #include <linux/time.h>
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 15f327bed8c6..901bd06f437d 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -4,7 +4,7 @@
  * Copyright 1997, Theodore Ts'o
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/errno.h>
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 4bd0373576b5..1988440b2049 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -6,7 +6,7 @@
  *  proc root directory handling functions
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/errno.h>
 #include <linux/time.h>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 958f32545064..8f96a49178d0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -17,7 +17,7 @@
 #include <linux/shmem_fs.h>
 
 #include <asm/elf.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
 
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 8ab782d8b33d..5105b1599981 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -22,7 +22,7 @@
 #include <linux/list.h>
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include "internal.h"
 
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 2bcbf4e77982..2ef7ce75c062 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -23,7 +23,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 
 static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 8621c039b536..26e45863e499 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -35,7 +35,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 
 #define RAMFS_DEFAULT_MODE	0755
diff --git a/fs/read_write.c b/fs/read_write.c
index 7537b6b6b5a2..5816d4c4cab0 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -20,7 +20,7 @@
 #include <linux/fs.h>
 #include "internal.h"
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
diff --git a/fs/readdir.c b/fs/readdir.c
index 9d0212c374d6..0e8a7f355f7a 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -19,7 +19,7 @@
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int iterate_dir(struct file *file, struct dir_context *ctx)
 {
diff --git a/fs/select.c b/fs/select.c
index 3d4f85defeab..305c0daf5d67 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -31,7 +31,7 @@
 #include <net/busy_poll.h>
 #include <linux/vmalloc.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 /*
diff --git a/fs/seq_file.c b/fs/seq_file.c
index a11f271800ef..ca69fb99e41a 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -15,7 +15,7 @@
 #include <linux/printk.h>
 #include <linux/string_helpers.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 
 static void seq_set_overflow(struct seq_file *m)
diff --git a/fs/stat.c b/fs/stat.c
index 0b210c3ead5c..a268b7f27adf 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -15,7 +15,7 @@
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 void generic_fillattr(struct inode *inode, struct kstat *stat)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 1bc0bd6a9848..7e41aee7b69a 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -25,7 +25,7 @@
  *        David S. Miller (davem@caip.rutgers.edu), 1995
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/errno.h>
 #include <linux/fs.h>
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index f04ab232d08d..131b2b77c818 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -71,7 +71,7 @@
 
 #include <stdarg.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/errno.h>
 #include <linux/fs.h>
diff --git a/fs/utimes.c b/fs/utimes.c
index 5fdb505e307c..32b15b3f6629 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -8,7 +8,7 @@
 #include <linux/stat.h>
 #include <linux/utime.h>
 #include <linux/syscalls.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 #ifdef __ARCH_WANT_SYS_UTIME
diff --git a/fs/xattr.c b/fs/xattr.c
index 2d13b4e62fae..7e3317cf4045 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -22,7 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/posix_acl_xattr.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static const char *
 strcmp_prefix(const char *a, const char *a_prefix)
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 321f57721b92..7c49938c5aed 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -19,7 +19,7 @@
 #include <linux/ioctl.h>
 #include <linux/mount.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_format.h"
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index a415f822f2c1..e467218c0098 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -83,7 +83,7 @@ typedef __u32			xfs_nlink_t;
 #include <asm/page.h>
 #include <asm/div64.h>
 #include <asm/param.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
diff --git a/include/asm-generic/termios-base.h b/include/asm-generic/termios-base.h
index 0a769feb22b0..157bbf6f4510 100644
--- a/include/asm-generic/termios-base.h
+++ b/include/asm-generic/termios-base.h
@@ -4,7 +4,7 @@
 #ifndef _ASM_GENERIC_TERMIOS_BASE_H
 #define _ASM_GENERIC_TERMIOS_BASE_H
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifndef __ARCH_TERMIO_GETPUT
 
diff --git a/include/asm-generic/termios.h b/include/asm-generic/termios.h
index 4fa6fe0fc2a2..8c13a16b074e 100644
--- a/include/asm-generic/termios.h
+++ b/include/asm-generic/termios.h
@@ -2,7 +2,7 @@
 #define _ASM_GENERIC_TERMIOS_H
 
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <uapi/asm-generic/termios.h>
 
 /*	intr=^C		quit=^\		erase=del	kill=^U
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index a9cfd33c7b1a..192016e2b518 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -61,7 +61,7 @@
 
 #include <asm/mman.h>
 #include <asm/pgalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <uapi/drm/drm.h>
 #include <uapi/drm/drm_mode.h>
diff --git a/include/linux/isdnif.h b/include/linux/isdnif.h
index 0fc6ff276221..8d80fdc68647 100644
--- a/include/linux/isdnif.h
+++ b/include/linux/isdnif.h
@@ -500,6 +500,6 @@ typedef struct {
  *
  */
 extern int register_isdn(isdn_if*);
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #endif /* __ISDNIF_H__ */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7dbe9148b2f8..f29f80f81dbf 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -9,7 +9,7 @@
 #include <linux/list.h>
 #include <linux/highmem.h>
 #include <linux/compiler.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/gfp.h>
 #include <linux/bitops.h>
 #include <linux/hardirq.h> /* for in_interrupt() */
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 37b057b63b46..a46d6755035e 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -8,7 +8,7 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/sysctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <uapi/linux/poll.h>
 
 extern struct ctl_table epoll_table[]; /* for sysctl */
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 5c30891e84e5..35d0fabd2782 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -22,7 +22,7 @@
 #include <linux/errno.h>
 #include <asm/types.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/checksum.h>
 
 #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index f0dcaebebddb..d8833a86cd7e 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -69,7 +69,7 @@
 #include <net/ip6_route.h>
 #endif
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <net/sock.h>
 #include <net/snmp.h>
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8029d2a51f14..958a24d8fae7 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -59,7 +59,7 @@
 #include <linux/if_link.h>
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern struct workqueue_struct *ib_wq;
 extern struct workqueue_struct *ib_comp_wq;
diff --git a/init/init_task.c b/init/init_task.c
index 11f83be1fa79..53d4ce942a88 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -9,7 +9,7 @@
 #include <linux/mm.h>
 
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
diff --git a/kernel/capability.c b/kernel/capability.c
index 4984e1f552eb..a98e814f216f 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -17,7 +17,7 @@
 #include <linux/syscalls.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Leveraged for setting/resetting capabilities
diff --git a/kernel/compat.c b/kernel/compat.c
index b3a047f208a7..19aec5d98108 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -28,7 +28,7 @@
 #include <linux/ptrace.h>
 #include <linux/gfp.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static int compat_get_timex(struct timex *txc, struct compat_timex __user *utp)
 {
diff --git a/kernel/configs.c b/kernel/configs.c
index c18b1f1ae515..2df132b20217 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -28,7 +28,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /**************************************************/
 /* the actual current config file                 */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 29f815d2ef7e..b3088886cd37 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -55,7 +55,7 @@
 #include <linux/backing-dev.h>
 #include <linux/sort.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 #include <linux/mutex.h>
 #include <linux/cgroup.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index aacff8e2aec0..8f14b866f9f6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -56,7 +56,7 @@
 #include <linux/kcov.h>
 #include <linux/random.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
diff --git a/kernel/extable.c b/kernel/extable.c
index e820ccee9846..e3beec4a2339 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -22,7 +22,7 @@
 #include <linux/init.h>
 
 #include <asm/sections.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * mutex protecting text section modification (dynamic code patching).
diff --git a/kernel/fork.c b/kernel/fork.c
index 869b8ccc00bf..11c5c8ab827c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -79,7 +79,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 4ae3232e7a28..3f409968e466 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -13,7 +13,7 @@
 #include <linux/ptrace.h>
 #include <linux/syscalls.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 
 /*
diff --git a/kernel/groups.c b/kernel/groups.c
index 2fcadd66a8fd..8dd7a61b7115 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -8,7 +8,7 @@
 #include <linux/syscalls.h>
 #include <linux/user_namespace.h>
 #include <linux/vmalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 struct group_info *groups_alloc(int gidsetsize)
 {
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 0277d1216f80..d45c96073afb 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -39,7 +39,7 @@
 #include <linux/rwsem.h>
 #include <linux/ptrace.h>
 #include <linux/async.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <trace/events/module.h>
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d63095472ea9..43460104f119 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -52,7 +52,7 @@
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
 #include <asm/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define KPROBE_HASH_BITS 6
 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index a0f61effad25..6d1fcc786081 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -18,7 +18,7 @@
 #include <linux/debug_locks.h>
 #include <linux/vmalloc.h>
 #include <linux/sort.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/div64.h>
 
 #include "lockdep_internals.h"
diff --git a/kernel/module.c b/kernel/module.c
index f7482db0f843..5088784c0cf9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -46,7 +46,7 @@
 #include <linux/string.h>
 #include <linux/mutex.h>
 #include <linux/rculist.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <linux/license.h>
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 4f0f0604f1c4..2d8e2b227db8 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -30,7 +30,7 @@
 #include <linux/compiler.h>
 #include <linux/ktime.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 35310b627388..22df9f7ff672 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -25,7 +25,7 @@
 #include <linux/cpu.h>
 #include <linux/freezer.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "power.h"
 
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index e2cdd87e7a63..8b2696420abb 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -46,7 +46,7 @@
 #include <linux/ctype.h>
 #include <linux/uio.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/sections.h>
 
 #define CREATE_TRACE_POINTS
diff --git a/kernel/profile.c b/kernel/profile.c
index 2dbccf2d806c..f67ce0aa6bc4 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -408,7 +408,7 @@ void profile_tick(int type)
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static int prof_cpu_mask_proc_show(struct seq_file *m, void *v)
 {
diff --git a/kernel/signal.c b/kernel/signal.c
index ae60996fedff..f5d4e275345e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -39,7 +39,7 @@
 #include <trace/events/signal.h>
 
 #include <asm/param.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/siginfo.h>
 #include <asm/cacheflush.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index 9758892a2d09..842914ef7de4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -57,7 +57,7 @@
 /* Move somewhere else to avoid recompiling? */
 #include <generated/utsrelease.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/unistd.h>
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a292ebcbbb6..8dbaec0e4f7f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -67,7 +67,7 @@
 #include <linux/bpf.h>
 #include <linux/mount.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/processor.h>
 
 #ifdef CONFIG_X86
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 08be5c99d26b..161e340395d5 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -50,7 +50,7 @@
 #include <linux/timer.h>
 #include <linux/freezer.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <trace/events/timer.h>
 
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 2b9f45bc955d..a45afb7277c2 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -14,7 +14,7 @@
 #include <linux/hrtimer.h>
 #include <trace/events/timer.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /**
  * itimer_get_remtime - get remaining time for the timer
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index f246763c9947..e9e8c10f0d9a 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -6,7 +6,7 @@
 #include <linux/posix-timers.h>
 #include <linux/errno.h>
 #include <linux/math64.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/kernel_stat.h>
 #include <trace/events/timer.h>
 #include <linux/tick.h>
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index f2826c35e918..42d7b9558741 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -36,7 +36,7 @@
 #include <linux/time.h>
 #include <linux/mutex.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
diff --git a/kernel/time/time.c b/kernel/time/time.c
index bd62fb8e8e77..a3a9a8a029dc 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -38,7 +38,7 @@
 #include <linux/math64.h>
 #include <linux/ptrace.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 #include <generated/timeconst.h>
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index ea4fbf8477a9..ec33a6933eae 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -43,7 +43,7 @@
 #include <linux/slab.h>
 #include <linux/compat.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/div64.h>
 #include <asm/timex.h>
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ba7d8b288bb3..afe6cd1944fc 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -17,7 +17,7 @@
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "tick-internal.h"
 
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 087204c733eb..afddded947df 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -43,7 +43,7 @@
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * This is our basic unit of interest: a timer expiry event identified
diff --git a/kernel/uid16.c b/kernel/uid16.c
index cc40793464e3..71645ae9303a 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -14,7 +14,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group)
 {
diff --git a/lib/extable.c b/lib/extable.c
index 0be02ad561e9..62968daa66a9 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -12,7 +12,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sort.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifndef ARCH_HAS_RELATIVE_EXTABLE
 #define ex_to_insn(x)	((x)->insn)
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index b8e2080c1a47..bf85e05ce858 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -17,7 +17,7 @@
 #include <linux/math64.h>
 #include <linux/export.h>
 #include <linux/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "kstrtox.h"
 
 const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 175ec51c346d..4048897e7b01 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -68,7 +68,7 @@
 #include <net/ip.h>
 #include "slab.h"
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <trace/events/vmscan.h>
 
diff --git a/mm/memory.c b/mm/memory.c
index 455c3e628d52..7d23b5050248 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -68,7 +68,7 @@
 #include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/pgtable.h>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 6d3639e1f254..2e346645eb80 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -96,7 +96,7 @@
 #include <linux/printk.h>
 
 #include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "internal.h"
 
diff --git a/mm/mincore.c b/mm/mincore.c
index bfb866435478..ddb872da3f5b 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -16,7 +16,7 @@
 #include <linux/swapops.h>
 #include <linux/hugetlb.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 
 static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
diff --git a/mm/mmap.c b/mm/mmap.c
index 1af87c14183d..dc4291dcc99b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -45,7 +45,7 @@
 #include <linux/moduleparam.h>
 #include <linux/pkeys.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
diff --git a/mm/mprotect.c b/mm/mprotect.c
index cc2459c57f60..f9c07f54dd62 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -25,7 +25,7 @@
 #include <linux/perf_event.h>
 #include <linux/pkeys.h>
 #include <linux/ksm.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
diff --git a/mm/nommu.c b/mm/nommu.c
index 210d7ec2843c..24f9f5f39145 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -35,7 +35,7 @@
 #include <linux/audit.h>
 #include <linux/printk.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
diff --git a/mm/shmem.c b/mm/shmem.c
index b1b20dc63265..bb53285a1d99 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -71,7 +71,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/fcntl.h>
 #include <uapi/linux/memfd.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 
 #include "internal.h"
diff --git a/mm/util.c b/mm/util.c
index 1a41553db866..3cb2164f4099 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -13,7 +13,7 @@
 #include <linux/vmalloc.h>
 
 #include <asm/sections.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "internal.h"
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a5584384eabc..3ca82d44edd3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -32,7 +32,7 @@
 #include <linux/llist.h>
 #include <linux/bitops.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
diff --git a/net/802/fc.c b/net/802/fc.c
index 7b9219022418..1bb496ea997e 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -10,7 +10,7 @@
  *		v 1.0 03/22/99
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 5e4427beab2b..4460606e9c36 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -34,7 +34,7 @@
 #include <linux/errno.h>
 #include <net/arp.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Create the HIPPI MAC header for an arbitrary protocol layer
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 691f0ad7067d..467069b73ce1 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -34,7 +34,7 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/if_vlan.h>
 #include "vlan.h"
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 2fdebabbfacd..90fcf5fc2e0a 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -32,7 +32,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/termios.h>	/* For TIOCINQ/OUTQ */
 #include <linux/mm.h>
diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c
index e7c9b0ea17a1..ac2542b7be88 100644
--- a/net/ax25/ax25_addr.c
+++ b/net/ax25/ax25_addr.c
@@ -21,7 +21,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 3d106767b272..9a3a301e1e2f 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -23,7 +23,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c
index 9bd31e88aeca..891596e74278 100644
--- a/net/ax25/ax25_ds_in.c
+++ b/net/ax25/ax25_ds_in.c
@@ -22,7 +22,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index e05bd57b5afd..28827e81ba2b 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -23,7 +23,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 5237dff6941d..5fb2104b7304 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -24,7 +24,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 7f16e8a931b2..8c07c28569e4 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -23,7 +23,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index bb5a0e4e98d9..860752639b1a 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -25,7 +25,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 2fa3be965101..183b1c583d56 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -23,7 +23,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/termios.h>	/* For TIOCINQ/OUTQ */
 #include <linux/mm.h>
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 8ddd41baa81c..b11a5f466fcc 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -25,7 +25,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index d39097737e38..e1fda27cb27c 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -31,7 +31,7 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c
index 3fbf8f7b2cf4..8632b86e843e 100644
--- a/net/ax25/ax25_std_in.c
+++ b/net/ax25/ax25_std_in.c
@@ -29,7 +29,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_subr.c b/net/ax25/ax25_std_subr.c
index 8b66a41e538f..94bd06396a43 100644
--- a/net/ax25/ax25_std_subr.c
+++ b/net/ax25/ax25_std_subr.c
@@ -20,7 +20,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 2c0d6ef66f9d..30bbc675261d 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -24,7 +24,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 655a7d4c96e1..4855d18a8511 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -25,7 +25,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index c3cffa79bafb..23a6f38a80bf 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -28,7 +28,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 4ad2fb7bcd35..0403b0def7e6 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -25,7 +25,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index bca5ead3e973..ed3b3192fb00 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -19,7 +19,7 @@
 #include <linux/list.h>
 #include <linux/netfilter_bridge.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "br_private.h"
 
 #define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index d99b2009771a..da8157c57eb1 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -18,7 +18,7 @@
 #include <linux/slab.h>
 #include <linux/times.h>
 #include <net/net_namespace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "br_private.h"
 
 static int get_bridge_ifindices(struct net *net, int *indices, int num)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index b12501a77f18..8ca6a929bf12 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -40,7 +40,7 @@
 #include <net/netfilter/br_netfilter.h>
 #include <net/netns/generic.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "br_private.h"
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 5989661c659f..96c072e71ea2 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -38,7 +38,7 @@
 #include <net/route.h>
 #include <net/netfilter/br_netfilter.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "br_private.h"
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 1ab6014cf0f8..537e3d506fc2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -23,7 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
 #include <linux/audit.h>
diff --git a/net/compat.c b/net/compat.c
index 1cd2ec046164..96c544b05b15 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -28,7 +28,7 @@
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/compat.h>
 
 int get_compat_msghdr(struct msghdr *kmsg,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 9482037a5c8c..662bea587165 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -36,7 +36,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/errno.h>
diff --git a/net/core/dev.c b/net/core/dev.c
index 6372117f653f..037ffd27fcc2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -72,7 +72,7 @@
  *				        - netif_rx() feedback
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
diff --git a/net/core/filter.c b/net/core/filter.c
index 7190bd648154..e6c412b94dec 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -40,7 +40,7 @@
 #include <net/flow_dissector.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/filter.h>
 #include <linux/ratelimit.h>
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 101b5d0e2142..0385dece1f6f 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -14,7 +14,7 @@
  *              names to make it usable in general net subsystem.
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c482491a63d8..18b5aae99bec 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -40,7 +40,7 @@
 #include <linux/pci.h>
 #include <linux/etherdevice.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/core/scm.c b/net/core/scm.c
index 2696aefdc148..d8820438ba37 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -29,7 +29,7 @@
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/protocol.h>
 #include <linux/skbuff.h>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 65a74e13c45b..e77f40616fea 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -72,7 +72,7 @@
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <trace/events/skb.h>
 #include <linux/highmem.h>
 #include <linux/capability.h>
diff --git a/net/core/sock.c b/net/core/sock.c
index 9fa46b956bdc..f560e0826009 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -118,7 +118,7 @@
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/netdevice.h>
 #include <net/protocol.h>
diff --git a/net/core/utils.c b/net/core/utils.c
index cf5622b9ccc4..6592d7bbed39 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,7 +31,7 @@
 #include <net/net_ratelimit.h>
 
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
 /*
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 41f803e35da3..8fdd9f492b0e 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -42,7 +42,7 @@
 #include <linux/notifier.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/net_namespace.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index a796fc7cbc35..7af0ba6157a1 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -31,7 +31,7 @@
 #include <linux/timer.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 1540b506e3e0..232675480756 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -25,7 +25,7 @@
 #include <linux/timer.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/route.h> /* RTF_xxx */
 #include <net/neighbour.h>
 #include <net/netlink.h>
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 5325b541c526..6c7da6c29bf0 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -22,7 +22,7 @@
 #include <net/dst.h>
 #include <net/flow.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/dn.h>
 #include <net/dn_dev.h>
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1830e6f0e9cc..f75069883f2b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -90,7 +90,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/inet.h>
 #include <linux/igmp.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 062a67ca9a21..4cd2ee8857d2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -26,7 +26,7 @@
  */
 
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/capability.h>
 #include <linux/module.h>
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index dbad5a1c161a..3ff8938893ec 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -14,7 +14,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/capability.h>
 #include <linux/types.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c1bc1e92de0e..7a5b4c7d9a87 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -13,7 +13,7 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1b0e7d1f5217..2919d1a10cfd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,7 +50,7 @@
 
 #define VERSION "0.409"
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f79d7a8ab1c6..0777ea949223 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -91,7 +91,7 @@
 #include <linux/errno.h>
 #include <linux/timer.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
 #include <net/inet_common.h>
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 15db786d50ed..68d622133f53 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -72,7 +72,7 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 78fd62048335..c9c1cb635d9a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -17,7 +17,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/in.h>
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4d158ff1def1..93157f2f4758 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -15,7 +15,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 618ab5079816..fac275c48108 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -42,7 +42,7 @@
  *		Hirokazu Takahashi:	sendfile() on UDP works now.
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 976073417b03..57e1405e8282 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -44,7 +44,7 @@
 #include <net/ip_fib.h>
 
 #include <linux/errqueue.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  *	SOL_IP control messages.
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 071a785c65eb..fd9f34bbd740 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -61,7 +61,7 @@
 #include <net/ipconfig.h>
 #include <net/route.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/checksum.h>
 #include <asm/processor.h>
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 79489f017854..00d4229b6954 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -96,7 +96,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/in.h>
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 665505d86b12..efc1e76d4977 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -26,7 +26,7 @@
  *
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/types.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 1258a9ab62ef..a467e1236c43 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -24,7 +24,7 @@
 #include <linux/err.h>
 #include <net/compat.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp/arp_tables.h>
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 308b456723f0..91656a1d8fbd 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -20,7 +20,7 @@
 #include <linux/icmp.h>
 #include <net/ip.h>
 #include <net/compat.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/mutex.h>
 #include <linux/proc_fs.h>
 #include <linux/err.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2300fae11b22..4e49e5cb001c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -41,7 +41,7 @@
 #include <linux/atomic.h>
 #include <asm/byteorder.h>
 #include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ioctls.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 9eabf490133a..a82a11747b3f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -65,7 +65,7 @@
 #define pr_fmt(fmt) "IPv4: " fmt
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1ef3165114ba..4a044964da66 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -277,7 +277,7 @@
 #include <net/ip.h>
 #include <net/sock.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ioctls.h>
 #include <net/busy_poll.h>
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9ca279b130d5..1307a7c2e544 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -79,7 +79,7 @@
 
 #define pr_fmt(fmt) "UDP: " fmt
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ioctls.h>
 #include <linux/bootmem.h>
 #include <linux/highmem.h>
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 237e654ba717..aa42123bc301 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -63,7 +63,7 @@
 #include <net/calipso.h>
 #include <net/seg6.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/mroute6.h>
 
 #include "ip6_offload.h"
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 1407426bc862..a3eaafd87100 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -33,7 +33,7 @@
 #include <net/dsfield.h>
 
 #include <linux/errqueue.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static bool ipv6_mapped_addr_any(const struct in6_addr *a)
 {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 17fa28f7a0ff..3036f665e6c8 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -70,7 +70,7 @@
 #include <net/dsfield.h>
 #include <net/l3mdev.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  *	The ICMP socket(s). This is the most convenient way to flow control
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b912f0dbaf72..8081bafe441b 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -29,7 +29,7 @@
 #include <net/rawv6.h>
 #include <net/transp_v6.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define FL_MIN_LINGER	6	/* Minimal linger. It is set to 6sec specified
 				   in old IPv6 RFC. Well, it was reasonable value.
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 8b186b56183a..36d292180942 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -42,7 +42,7 @@
 #include <linux/hash.h>
 #include <linux/etherdevice.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 
 #include <net/icmp.h>
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 52101b37ad6e..604d8953c775 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -16,7 +16,7 @@
  *
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3ba530373560..ee97c44e2aa0 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -54,7 +54,7 @@
 #include <net/compat.h>
 #include <net/seg6.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 struct ip6_ra_chain *ip6_ra_chain;
 DEFINE_RWLOCK(ip6_ra_lock);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d56d8ac09a94..25a022d41a70 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -24,7 +24,7 @@
 #include <linux/icmpv6.h>
 #include <net/ipv6.h>
 #include <net/compat.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/mutex.h>
 #include <linux/proc_fs.h>
 #include <linux/err.h>
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 890acace01d0..8417c41d8ec8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -64,7 +64,7 @@
 #include <net/l3mdev.h>
 #include <trace/events/fib6.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0355231162b8..fad992ad4bc8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -31,7 +31,7 @@
 #include <linux/if_arp.h>
 #include <linux/icmp.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/init.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/if_ether.h>
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 649efc26a252..4d5c4eee4b3f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -35,7 +35,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/addrconf.h>
 #include <net/ndisc.h>
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 48d0dc89b58d..e07f22b0c58a 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -56,7 +56,7 @@
 #include <net/tcp_states.h>
 #include <net/net_namespace.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Configuration Variables */
 static unsigned char ipxcfg_max_hops = 16;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 391c3cbd2eed..ab254041dab7 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -52,7 +52,7 @@
 #include <linux/poll.h>
 
 #include <asm/ioctls.h>		/* TIOCOUTQ, TIOCINQ */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/sock.h>
 #include <net/tcp_states.h>
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 873c4b707d6a..817b1b186aff 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -40,7 +40,7 @@
 #include <linux/interrupt.h>
 #include <linux/device.h>		/* for MODULE_ALIAS_CHARDEV_MAJOR */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irmod.h>
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index 8f5678cb6263..f18070118d05 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -32,7 +32,7 @@
 #include <linux/tty.h>
 #include <linux/serial.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irmod.h>
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 856736656a30..890b90d055d5 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -43,7 +43,7 @@
 #include <linux/export.h>
 
 #include <asm/ioctls.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/dma.h>
 #include <asm/io.h>
 
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index c69f0f38f566..9d451f8ed47a 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -249,7 +249,7 @@
 #include <linux/capability.h>
 #include <linux/ctype.h>	/* isspace() */
 #include <linux/string.h>	/* skip_spaces() */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/init.h>
 
 #include <linux/ppp_defs.h>
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index fc60d9d738b5..b50b64ac8815 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -33,7 +33,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c
index 182470847fcf..d5d2110eb717 100644
--- a/net/lapb/lapb_in.c
+++ b/net/lapb/lapb_in.c
@@ -31,7 +31,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
index 482c94d9d958..eda726e22f64 100644
--- a/net/lapb/lapb_out.c
+++ b/net/lapb/lapb_out.c
@@ -29,7 +29,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c
index 3c1914df641f..75efde3e616c 100644
--- a/net/lapb/lapb_subr.c
+++ b/net/lapb/lapb_subr.c
@@ -28,7 +28,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 355cc3b6fa4d..1a5535bc3b8d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -29,7 +29,7 @@
 #include <linux/inet.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 3d02b0c13547..55e0169caa4c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -48,7 +48,7 @@
 #include <net/sock.h>
 #include <net/genetlink.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 2278d9ab723b..a09fa9fd8f3d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -22,7 +22,7 @@
 #include <linux/sockios.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/sock.h>
 #include <linux/init.h>
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 801d474de75b..161b628ab2b0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -40,7 +40,7 @@
 #include <linux/net.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 49cd0c70a13a..b9e1a13b4ba3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -73,7 +73,7 @@
 #include <net/sock.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ioctls.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 129d357d2722..9ad301c46b88 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -34,7 +34,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/termios.h>
 #include <linux/mm.h>
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 0fc76d845103..452bbb38d943 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -25,7 +25,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/termios.h>	/* For TIOCINQ/OUTQ */
 #include <linux/mm.h>
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 176af3080a2b..5ed8e79bf102 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -71,7 +71,7 @@
 #include <net/inet_ecn.h>
 #include <net/sctp/sctp.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
 					 union sctp_addr *s2);
diff --git a/net/socket.c b/net/socket.c
index dc01d7be2fda..5ff26c44db33 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -90,7 +90,7 @@
 #include <linux/slab.h>
 #include <linux/xattr.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unistd.h>
 
 #include <net/compat.h>
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 16cea00c959b..cdeb1d814833 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -50,7 +50,7 @@
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/gss_api.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/hashtable.h>
 
 #include "../netns.h"
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8aabe12201f8..8147e8d56eb2 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/string_helpers.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 135ec2c11b3b..a3e85ee28b5a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -42,7 +42,7 @@
 #include <net/udp.h>
 #include <net/tcp.h>
 #include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/ioctls.h>
 #include <trace/events/skb.h>
 
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index c88d9bc06f5c..8c3936403fea 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -14,7 +14,7 @@
 #include <linux/sysctl.h>
 #include <linux/module.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/stats.h>
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 310882fb698e..127656ebe7be 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -100,7 +100,7 @@
 #include <linux/in.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <net/net_namespace.h>
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f83b74d3e2ac..079c883aa96e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -51,7 +51,7 @@
 #include <linux/slab.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/termios.h>	/* For TIOCINQ/OUTQ */
 #include <linux/notifier.h>
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index fd5ffb25873f..bcaa180d6a3f 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -29,7 +29,7 @@
 #include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/init.h>
 #include <net/x25.h>
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 45cb7c699b65..d4ab9a7f3d94 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -20,7 +20,7 @@
 #include <linux/module.h>
 #include <linux/cache.h>
 #include <linux/audit.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/ktime.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 671a1d0333f0..9705c279494b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -27,7 +27,7 @@
 #include <net/xfrm.h>
 #include <net/netlink.h>
 #include <net/ah.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <linux/in6.h>
 #endif
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index f89f1900e58d..04a764f71ec8 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -23,7 +23,7 @@
 #include <linux/vmalloc.h>
 #include <linux/security.h>
 #include <linux/uio.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 
 #define KEY_MAX_DESC_SIZE 4096
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 40a885239782..918cddcd4516 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -18,7 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/security.h>
 #include <linux/user_namespace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 
 /* Session keyring create vs join semaphore */
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 9db8b4a82787..6bbe2f535f08 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -16,7 +16,7 @@
 #include <linux/err.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 #include <keys/user-type.h>
 
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 66b1840b4110..e187c8909d9d 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -15,7 +15,7 @@
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <keys/user-type.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 
 static int logon_vet_description(const char *desc);
diff --git a/sound/oss/dmasound/dmasound_atari.c b/sound/oss/dmasound/dmasound_atari.c
index 1c56bf58eff9..a1a2979c0bb1 100644
--- a/sound/oss/dmasound/dmasound_atari.c
+++ b/sound/oss/dmasound/dmasound_atari.c
@@ -22,7 +22,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/atariints.h>
 #include <asm/atari_stram.h>
 
diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c
index f4ee85a4c42f..5f248fb41bea 100644
--- a/sound/oss/dmasound/dmasound_core.c
+++ b/sound/oss/dmasound/dmasound_core.c
@@ -183,7 +183,7 @@
 #include <linux/poll.h>
 #include <linux/mutex.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "dmasound.h"
 
diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c
index 3f653618614d..81eb82c4675a 100644
--- a/sound/oss/dmasound/dmasound_paula.c
+++ b/sound/oss/dmasound/dmasound_paula.c
@@ -23,7 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/setup.h>
 #include <asm/amigahw.h>
 #include <asm/amigaints.h>
diff --git a/sound/oss/dmasound/dmasound_q40.c b/sound/oss/dmasound/dmasound_q40.c
index 99bcb21c2281..be4fe15cfd6b 100644
--- a/sound/oss/dmasound/dmasound_q40.c
+++ b/sound/oss/dmasound/dmasound_q40.c
@@ -20,7 +20,7 @@
 #include <linux/soundcard.h>
 #include <linux/interrupt.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/q40ints.h>
 #include <asm/q40_master.h>
 
diff --git a/sound/oss/msnd.c b/sound/oss/msnd.c
index c0cc951ba97d..b63010ad22f1 100644
--- a/sound/oss/msnd.c
+++ b/sound/oss/msnd.c
@@ -32,7 +32,7 @@
 #include <linux/interrupt.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/spinlock.h>
 #include <asm/irq.h>
 #include "msnd.h"
diff --git a/sound/oss/os.h b/sound/oss/os.h
index 75ad0cd0c0ab..0bf89e1d679c 100644
--- a/sound/oss/os.h
+++ b/sound/oss/os.h
@@ -17,7 +17,7 @@
 #include <linux/ioport.h>
 #include <asm/page.h>
 #include <linux/vmalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <linux/pci.h>
 #endif
diff --git a/sound/oss/swarm_cs4297a.c b/sound/oss/swarm_cs4297a.c
index 213a416b6e0b..f3af63e58b36 100644
--- a/sound/oss/swarm_cs4297a.c
+++ b/sound/oss/swarm_cs4297a.c
@@ -80,7 +80,7 @@
 #include <asm/byteorder.h>
 #include <asm/dma.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/sibyte/sb1250_regs.h>
 #include <asm/sibyte/sb1250_int.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de102cae7125..994f81f8eecb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -53,7 +53,7 @@
 #include <asm/processor.h>
 #include <asm/io.h>
 #include <asm/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 
 #include "coalesced_mmio.h"
-- 
cgit v1.2.3


From c53b005dd64bdcf5acac00bd55ecf94dda22dc4f Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 21 Dec 2016 20:19:50 +0100
Subject: scsi/bnx2fc: Convert to hotplug state machine

Install the callbacks via the state machine. No functional change.

This is the minimal fixup so we can remove the hotplug notifier mess
completely.

The real rework of this driver to use work queues is still stuck in
review/testing on the SCSI mailing list.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
Cc: linux-scsi@vger.kernel.org
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Chad Dupuis <chad.dupuis@qlogic.com>
Cc: QLogic-Storage-Upstream@qlogic.com
Cc: Johannes Thumshirn <jth@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20161221192111.757309869@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 79 ++++++++++++++++-----------------------
 include/linux/cpuhotplug.h        |  1 +
 2 files changed, 34 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 0990130821fa..c639d5a02656 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -127,13 +127,6 @@ module_param_named(log_fka, bnx2fc_log_fka, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(log_fka, " Print message to kernel log when fcoe is "
 	"initiating a FIP keep alive when debug logging is enabled.");
 
-static int bnx2fc_cpu_callback(struct notifier_block *nfb,
-			     unsigned long action, void *hcpu);
-/* notification function for CPU hotplug events */
-static struct notifier_block bnx2fc_cpu_notifier = {
-	.notifier_call = bnx2fc_cpu_callback,
-};
-
 static inline struct net_device *bnx2fc_netdev(const struct fc_lport *lport)
 {
 	return ((struct bnx2fc_interface *)
@@ -2622,37 +2615,19 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
 		kthread_stop(thread);
 }
 
-/**
- * bnx2fc_cpu_callback - Handler for CPU hotplug events
- *
- * @nfb:    The callback data block
- * @action: The event triggering the callback
- * @hcpu:   The index of the CPU that the event is for
- *
- * This creates or destroys per-CPU data for fcoe
- *
- * Returns NOTIFY_OK always.
- */
-static int bnx2fc_cpu_callback(struct notifier_block *nfb,
-			     unsigned long action, void *hcpu)
+
+static int bnx2fc_cpu_online(unsigned int cpu)
 {
-	unsigned cpu = (unsigned long)hcpu;
+	printk(PFX "CPU %x online: Create Rx thread\n", cpu);
+	bnx2fc_percpu_thread_create(cpu);
+	return 0;
+}
 
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		printk(PFX "CPU %x online: Create Rx thread\n", cpu);
-		bnx2fc_percpu_thread_create(cpu);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		printk(PFX "CPU %x offline: Remove Rx thread\n", cpu);
-		bnx2fc_percpu_thread_destroy(cpu);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
+static int bnx2fc_cpu_dead(unsigned int cpu)
+{
+	printk(PFX "CPU %x offline: Remove Rx thread\n", cpu);
+	bnx2fc_percpu_thread_destroy(cpu);
+	return 0;
 }
 
 static int bnx2fc_slave_configure(struct scsi_device *sdev)
@@ -2664,6 +2639,8 @@ static int bnx2fc_slave_configure(struct scsi_device *sdev)
 	return 0;
 }
 
+static enum cpuhp_state bnx2fc_online_state;
+
 /**
  * bnx2fc_mod_init - module init entry point
  *
@@ -2724,21 +2701,31 @@ static int __init bnx2fc_mod_init(void)
 		spin_lock_init(&p->fp_work_lock);
 	}
 
-	cpu_notifier_register_begin();
+	get_online_cpus();
 
-	for_each_online_cpu(cpu) {
+	for_each_online_cpu(cpu)
 		bnx2fc_percpu_thread_create(cpu);
-	}
 
-	/* Initialize per CPU interrupt thread */
-	__register_hotcpu_notifier(&bnx2fc_cpu_notifier);
+	rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+				       "scsi/bnx2fc:online",
+				       bnx2fc_cpu_online, NULL);
+	if (rc < 0)
+		goto stop_threads;
+	bnx2fc_online_state = rc;
 
-	cpu_notifier_register_done();
+	cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD, "scsi/bnx2fc:dead",
+				  NULL, bnx2fc_cpu_dead);
+	put_online_cpus();
 
 	cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb);
 
 	return 0;
 
+stop_threads:
+	for_each_online_cpu(cpu)
+		bnx2fc_percpu_thread_destroy(cpu);
+	put_online_cpus();
+	kthread_stop(l2_thread);
 free_wq:
 	destroy_workqueue(bnx2fc_wq);
 release_bt:
@@ -2797,16 +2784,16 @@ static void __exit bnx2fc_mod_exit(void)
 	if (l2_thread)
 		kthread_stop(l2_thread);
 
-	cpu_notifier_register_begin();
-
+	get_online_cpus();
 	/* Destroy per cpu threads */
 	for_each_online_cpu(cpu) {
 		bnx2fc_percpu_thread_destroy(cpu);
 	}
 
-	__unregister_hotcpu_notifier(&bnx2fc_cpu_notifier);
+	cpuhp_remove_state_nocalls(bnx2fc_online_state);
+	cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD);
 
-	cpu_notifier_register_done();
+	put_online_cpus();
 
 	destroy_workqueue(bnx2fc_wq);
 	/*
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2ab7bf53d529..ab0e54d25647 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -41,6 +41,7 @@ enum cpuhp_state {
 	CPUHP_NET_DEV_DEAD,
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
+	CPUHP_SCSI_BNX2FC_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-- 
cgit v1.2.3


From e210faa2359f92eb2e417cd8462eb980a4dbb172 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 21 Dec 2016 20:19:51 +0100
Subject: scsi/bnx2i: Convert to hotplug state machine

Install the callbacks via the state machine. No functional change.

This is the minimal fixup so we can remove the hotplug notifier mess
completely.

The real rework of this driver to use work queues is still stuck in
review/testing on the SCSI mailing list.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
Cc: linux-scsi@vger.kernel.org
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Chad Dupuis <chad.dupuis@qlogic.com>
Cc: QLogic-Storage-Upstream@qlogic.com
Cc: Johannes Thumshirn <jth@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20161221192111.836895753@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/scsi/bnx2i/bnx2i_init.c | 78 ++++++++++++++++-------------------------
 include/linux/cpuhotplug.h      |  1 +
 2 files changed, 31 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
index c8b410c24cf0..86afc002814c 100644
--- a/drivers/scsi/bnx2i/bnx2i_init.c
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -70,14 +70,6 @@ u64 iscsi_error_mask = 0x00;
 
 DEFINE_PER_CPU(struct bnx2i_percpu_s, bnx2i_percpu);
 
-static int bnx2i_cpu_callback(struct notifier_block *nfb,
-			      unsigned long action, void *hcpu);
-/* notification function for CPU hotplug events */
-static struct notifier_block bnx2i_cpu_notifier = {
-	.notifier_call = bnx2i_cpu_callback,
-};
-
-
 /**
  * bnx2i_identify_device - identifies NetXtreme II device type
  * @hba: 		Adapter structure pointer
@@ -461,41 +453,21 @@ static void bnx2i_percpu_thread_destroy(unsigned int cpu)
 		kthread_stop(thread);
 }
 
-
-/**
- * bnx2i_cpu_callback - Handler for CPU hotplug events
- *
- * @nfb:	The callback data block
- * @action:	The event triggering the callback
- * @hcpu:	The index of the CPU that the event is for
- *
- * This creates or destroys per-CPU data for iSCSI
- *
- * Returns NOTIFY_OK always.
- */
-static int bnx2i_cpu_callback(struct notifier_block *nfb,
-			      unsigned long action, void *hcpu)
+static int bnx2i_cpu_online(unsigned int cpu)
 {
-	unsigned cpu = (unsigned long)hcpu;
+	pr_info("bnx2i: CPU %x online: Create Rx thread\n", cpu);
+	bnx2i_percpu_thread_create(cpu);
+	return 0;
+}
 
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		printk(KERN_INFO "bnx2i: CPU %x online: Create Rx thread\n",
-			cpu);
-		bnx2i_percpu_thread_create(cpu);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		printk(KERN_INFO "CPU %x offline: Remove Rx thread\n", cpu);
-		bnx2i_percpu_thread_destroy(cpu);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
+static int bnx2i_cpu_dead(unsigned int cpu)
+{
+	pr_info("CPU %x offline: Remove Rx thread\n", cpu);
+	bnx2i_percpu_thread_destroy(cpu);
+	return 0;
 }
 
+static enum cpuhp_state bnx2i_online_state;
 
 /**
  * bnx2i_mod_init - module init entry point
@@ -539,18 +511,28 @@ static int __init bnx2i_mod_init(void)
 		p->iothread = NULL;
 	}
 
-	cpu_notifier_register_begin();
+	get_online_cpus();
 
 	for_each_online_cpu(cpu)
 		bnx2i_percpu_thread_create(cpu);
 
-	/* Initialize per CPU interrupt thread */
-	__register_hotcpu_notifier(&bnx2i_cpu_notifier);
-
-	cpu_notifier_register_done();
+	err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+				       "scsi/bnx2i:online",
+				       bnx2i_cpu_online, NULL);
+	if (err < 0)
+		goto remove_threads;
+	bnx2i_online_state = err;
 
+	cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2I_DEAD, "scsi/bnx2i:dead",
+				  NULL, bnx2i_cpu_dead);
+	put_online_cpus();
 	return 0;
 
+remove_threads:
+	for_each_online_cpu(cpu)
+		bnx2i_percpu_thread_destroy(cpu);
+	put_online_cpus();
+	cnic_unregister_driver(CNIC_ULP_ISCSI);
 unreg_xport:
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
 out:
@@ -587,14 +569,14 @@ static void __exit bnx2i_mod_exit(void)
 	}
 	mutex_unlock(&bnx2i_dev_lock);
 
-	cpu_notifier_register_begin();
+	get_online_cpus();
 
 	for_each_online_cpu(cpu)
 		bnx2i_percpu_thread_destroy(cpu);
 
-	__unregister_hotcpu_notifier(&bnx2i_cpu_notifier);
-
-	cpu_notifier_register_done();
+	cpuhp_remove_state_nocalls(bnx2i_online_state);
+	cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2I_DEAD);
+	put_online_cpus();
 
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
 	cnic_unregister_driver(CNIC_ULP_ISCSI);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index ab0e54d25647..fc4587c05dd3 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -42,6 +42,7 @@ enum cpuhp_state {
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_SCSI_BNX2FC_DEAD,
+	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-- 
cgit v1.2.3


From 7b737965b33188bd3dbb44e938535c4006d97fbb Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 21 Dec 2016 20:19:52 +0100
Subject: staging/lustre/libcfs: Convert to hotplug state machine

Install the callbacks via the state machine. No functional change.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: devel@driverdev.osuosl.org
Cc: Andreas Dilger <andreas.dilger@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Oleg Drokin <oleg.drokin@intel.com>
Cc: rt@linutronix.de
Cc: lustre-devel@lists.lustre.org
Link: http://lkml.kernel.org/r/20161202110027.htzzeervzkoc4muv@linutronix.de
Link: http://lkml.kernel.org/r/20161221192111.922872524@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 85 ++++++++++++----------
 include/linux/cpuhotplug.h                         |  1 +
 2 files changed, 46 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 6b9cf06e8df2..427e2198bb9e 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -967,48 +967,38 @@ cfs_cpt_table_create_pattern(char *pattern)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static int
-cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	bool warn;
-
-	switch (action) {
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		spin_lock(&cpt_data.cpt_lock);
-		cpt_data.cpt_version++;
-		spin_unlock(&cpt_data.cpt_lock);
-		/* Fall through */
-	default:
-		if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) {
-			CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n",
-			       cpu, action);
-			break;
-		}
+static enum cpuhp_state lustre_cpu_online;
 
-		mutex_lock(&cpt_data.cpt_mutex);
-		/* if all HTs in a core are offline, it may break affinity */
-		cpumask_copy(cpt_data.cpt_cpumask,
-			     topology_sibling_cpumask(cpu));
-		warn = cpumask_any_and(cpt_data.cpt_cpumask,
-				       cpu_online_mask) >= nr_cpu_ids;
-		mutex_unlock(&cpt_data.cpt_mutex);
-		CDEBUG(warn ? D_WARNING : D_INFO,
-		       "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n",
-		       cpu, action);
-	}
+static void cfs_cpu_incr_cpt_version(void)
+{
+	spin_lock(&cpt_data.cpt_lock);
+	cpt_data.cpt_version++;
+	spin_unlock(&cpt_data.cpt_lock);
+}
 
-	return NOTIFY_OK;
+static int cfs_cpu_online(unsigned int cpu)
+{
+	cfs_cpu_incr_cpt_version();
+	return 0;
 }
 
-static struct notifier_block cfs_cpu_notifier = {
-	.notifier_call	= cfs_cpu_notify,
-	.priority	= 0
-};
+static int cfs_cpu_dead(unsigned int cpu)
+{
+	bool warn;
+
+	cfs_cpu_incr_cpt_version();
 
+	mutex_lock(&cpt_data.cpt_mutex);
+	/* if all HTs in a core are offline, it may break affinity */
+	cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
+	warn = cpumask_any_and(cpt_data.cpt_cpumask,
+			       cpu_online_mask) >= nr_cpu_ids;
+	mutex_unlock(&cpt_data.cpt_mutex);
+	CDEBUG(warn ? D_WARNING : D_INFO,
+	       "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
+	       cpu);
+	return 0;
+}
 #endif
 
 void
@@ -1018,7 +1008,9 @@ cfs_cpu_fini(void)
 		cfs_cpt_table_free(cfs_cpt_table);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	unregister_hotcpu_notifier(&cfs_cpu_notifier);
+	if (lustre_cpu_online > 0)
+		cpuhp_remove_state_nocalls(lustre_cpu_online);
+	cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
 #endif
 	if (cpt_data.cpt_cpumask)
 		LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
@@ -1027,6 +1019,8 @@ cfs_cpu_fini(void)
 int
 cfs_cpu_init(void)
 {
+	int ret = 0;
+
 	LASSERT(!cfs_cpt_table);
 
 	memset(&cpt_data, 0, sizeof(cpt_data));
@@ -1041,8 +1035,19 @@ cfs_cpu_init(void)
 	mutex_init(&cpt_data.cpt_mutex);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	register_hotcpu_notifier(&cfs_cpu_notifier);
+	ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
+					"staging/lustre/cfe:dead", NULL,
+					cfs_cpu_dead);
+	if (ret < 0)
+		goto failed;
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					"staging/lustre/cfe:online",
+					cfs_cpu_online, NULL);
+	if (ret < 0)
+		goto failed;
+	lustre_cpu_online = ret;
 #endif
+	ret = -EINVAL;
 
 	if (*cpu_pattern) {
 		cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
@@ -1075,7 +1080,7 @@ cfs_cpu_init(void)
 
  failed:
 	cfs_cpu_fini();
-	return -1;
+	return ret;
 }
 
 #endif
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index fc4587c05dd3..175d276ac335 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -41,6 +41,7 @@ enum cpuhp_state {
 	CPUHP_NET_DEV_DEAD,
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
+	CPUHP_LUSTRE_CFS_DEAD,
 	CPUHP_SCSI_BNX2FC_DEAD,
 	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
-- 
cgit v1.2.3


From 530e9b76ae8f863dfdef4a6ad0b38613d32e8c3f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Dec 2016 20:19:53 +0100
Subject: cpu/hotplug: Remove obsolete cpu hotplug register/unregister
 functions

hotcpu_notifier(), cpu_notifier(), __hotcpu_notifier(), __cpu_notifier(),
register_hotcpu_notifier(), register_cpu_notifier(),
__register_hotcpu_notifier(), __register_cpu_notifier(),
unregister_hotcpu_notifier(), unregister_cpu_notifier(),
__unregister_hotcpu_notifier(), __unregister_cpu_notifier()

are unused now. Remove them and all related code.

Remove also the now pointless cpu notifier error injection mechanism. The
states can be executed step by step and error rollback is the same as cpu
down, so any state transition can be tested w/o requiring the notifier
error injection.

Some CPU hotplug states are kept as they are (ab)used for hotplug state
tracking.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161221192112.005642358@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h             |  90 --------------------------
 include/linux/cpuhotplug.h      |   3 -
 kernel/cpu.c                    | 139 +---------------------------------------
 lib/Kconfig.debug               |  24 -------
 lib/Makefile                    |   1 -
 lib/cpu-notifier-error-inject.c |  84 ------------------------
 6 files changed, 1 insertion(+), 340 deletions(-)
 delete mode 100644 lib/cpu-notifier-error-inject.c

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 09807c2ce328..21f9c74496e7 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -57,9 +57,6 @@ struct notifier_block;
 
 #define CPU_ONLINE		0x0002 /* CPU (unsigned)v is up */
 #define CPU_UP_PREPARE		0x0003 /* CPU (unsigned)v coming up */
-#define CPU_UP_CANCELED		0x0004 /* CPU (unsigned)v NOT coming up */
-#define CPU_DOWN_PREPARE	0x0005 /* CPU (unsigned)v going down */
-#define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
 #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
 #define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
 					* lock is dropped */
@@ -80,80 +77,14 @@ struct notifier_block;
 
 #ifdef CONFIG_SMP
 extern bool cpuhp_tasks_frozen;
-/* Need to know about CPUs going up/down? */
-#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
-#define cpu_notifier(fn, pri) {					\
-	static struct notifier_block fn##_nb =			\
-		{ .notifier_call = fn, .priority = pri };	\
-	register_cpu_notifier(&fn##_nb);			\
-}
-
-#define __cpu_notifier(fn, pri) {				\
-	static struct notifier_block fn##_nb =			\
-		{ .notifier_call = fn, .priority = pri };	\
-	__register_cpu_notifier(&fn##_nb);			\
-}
-
-extern int register_cpu_notifier(struct notifier_block *nb);
-extern int __register_cpu_notifier(struct notifier_block *nb);
-extern void unregister_cpu_notifier(struct notifier_block *nb);
-extern void __unregister_cpu_notifier(struct notifier_block *nb);
-
-#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */
-#define cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
-#define __cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
-
-static inline int register_cpu_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-
-static inline int __register_cpu_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-
-static inline void unregister_cpu_notifier(struct notifier_block *nb)
-{
-}
-
-static inline void __unregister_cpu_notifier(struct notifier_block *nb)
-{
-}
-#endif
-
 int cpu_up(unsigned int cpu);
 void notify_cpu_starting(unsigned int cpu);
 extern void cpu_maps_update_begin(void);
 extern void cpu_maps_update_done(void);
 
-#define cpu_notifier_register_begin	cpu_maps_update_begin
-#define cpu_notifier_register_done	cpu_maps_update_done
-
 #else	/* CONFIG_SMP */
 #define cpuhp_tasks_frozen	0
 
-#define cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
-#define __cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
-
-static inline int register_cpu_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-
-static inline int __register_cpu_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-
-static inline void unregister_cpu_notifier(struct notifier_block *nb)
-{
-}
-
-static inline void __unregister_cpu_notifier(struct notifier_block *nb)
-{
-}
-
 static inline void cpu_maps_update_begin(void)
 {
 }
@@ -162,14 +93,6 @@ static inline void cpu_maps_update_done(void)
 {
 }
 
-static inline void cpu_notifier_register_begin(void)
-{
-}
-
-static inline void cpu_notifier_register_done(void)
-{
-}
-
 #endif /* CONFIG_SMP */
 extern struct bus_type cpu_subsys;
 
@@ -182,12 +105,6 @@ extern void get_online_cpus(void);
 extern void put_online_cpus(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
-#define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
-#define __hotcpu_notifier(fn, pri)	__cpu_notifier(fn, pri)
-#define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
-#define __register_hotcpu_notifier(nb)	__register_cpu_notifier(nb)
-#define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
-#define __unregister_hotcpu_notifier(nb)	__unregister_cpu_notifier(nb)
 void clear_tasks_mm_cpumask(int cpu);
 int cpu_down(unsigned int cpu);
 
@@ -199,13 +116,6 @@ static inline void cpu_hotplug_done(void) {}
 #define put_online_cpus()	do { } while (0)
 #define cpu_hotplug_disable()	do { } while (0)
 #define cpu_hotplug_enable()	do { } while (0)
-#define hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
-#define __hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
-/* These aren't inline functions due to a GCC bug. */
-#define register_hotcpu_notifier(nb)	({ (void)(nb); 0; })
-#define __register_hotcpu_notifier(nb)	({ (void)(nb); 0; })
-#define unregister_hotcpu_notifier(nb)	({ (void)(nb); })
-#define __unregister_hotcpu_notifier(nb)	({ (void)(nb); })
 #endif		/* CONFIG_HOTPLUG_CPU */
 
 #ifdef CONFIG_PM_SLEEP_SMP
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 175d276ac335..0d5ef8563113 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -59,7 +59,6 @@ enum cpuhp_state {
 	CPUHP_POWERPC_MMU_CTX_PREPARE,
 	CPUHP_XEN_PREPARE,
 	CPUHP_XEN_EVTCHN_PREPARE,
-	CPUHP_NOTIFY_PREPARE,
 	CPUHP_ARM_SHMOBILE_SCU_PREPARE,
 	CPUHP_SH_SH3X_PREPARE,
 	CPUHP_BLK_MQ_PREPARE,
@@ -74,7 +73,6 @@ enum cpuhp_state {
 	CPUHP_KVM_PPC_BOOK3S_PREPARE,
 	CPUHP_ZCOMP_PREPARE,
 	CPUHP_TIMERS_DEAD,
-	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
 	CPUHP_BRINGUP_CPU,
 	CPUHP_AP_IDLE_DEAD,
@@ -145,7 +143,6 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
-	CPUHP_AP_NOTIFY_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
 	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
 	CPUHP_AP_X86_HPET_ONLINE,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3ff0ea5db371..042fd7e8e030 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -183,23 +183,16 @@ EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
 /*
  * The following two APIs (cpu_maps_update_begin/done) must be used when
  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
- * The APIs cpu_notifier_register_begin/done() must be used to protect CPU
- * hotplug callback (un)registration performed using __register_cpu_notifier()
- * or __unregister_cpu_notifier().
  */
 void cpu_maps_update_begin(void)
 {
 	mutex_lock(&cpu_add_remove_lock);
 }
-EXPORT_SYMBOL(cpu_notifier_register_begin);
 
 void cpu_maps_update_done(void)
 {
 	mutex_unlock(&cpu_add_remove_lock);
 }
-EXPORT_SYMBOL(cpu_notifier_register_done);
-
-static RAW_NOTIFIER_HEAD(cpu_chain);
 
 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
@@ -349,66 +342,7 @@ void cpu_hotplug_enable(void)
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 #endif	/* CONFIG_HOTPLUG_CPU */
 
-/* Need to know about CPUs going up/down? */
-int register_cpu_notifier(struct notifier_block *nb)
-{
-	int ret;
-	cpu_maps_update_begin();
-	ret = raw_notifier_chain_register(&cpu_chain, nb);
-	cpu_maps_update_done();
-	return ret;
-}
-
-int __register_cpu_notifier(struct notifier_block *nb)
-{
-	return raw_notifier_chain_register(&cpu_chain, nb);
-}
-
-static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call,
-			int *nr_calls)
-{
-	unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0;
-	void *hcpu = (void *)(long)cpu;
-
-	int ret;
-
-	ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call,
-					nr_calls);
-
-	return notifier_to_errno(ret);
-}
-
-static int cpu_notify(unsigned long val, unsigned int cpu)
-{
-	return __cpu_notify(val, cpu, -1, NULL);
-}
-
-static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
-{
-	BUG_ON(cpu_notify(val, cpu));
-}
-
 /* Notifier wrappers for transitioning to state machine */
-static int notify_prepare(unsigned int cpu)
-{
-	int nr_calls = 0;
-	int ret;
-
-	ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls);
-	if (ret) {
-		nr_calls--;
-		printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
-				__func__, cpu);
-		__cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
-	}
-	return ret;
-}
-
-static int notify_online(unsigned int cpu)
-{
-	cpu_notify(CPU_ONLINE, cpu);
-	return 0;
-}
 
 static int bringup_wait_for_ap(unsigned int cpu)
 {
@@ -433,10 +367,8 @@ static int bringup_cpu(unsigned int cpu)
 	/* Arch-specific enabling code. */
 	ret = __cpu_up(cpu, idle);
 	irq_unlock_sparse();
-	if (ret) {
-		cpu_notify(CPU_UP_CANCELED, cpu);
+	if (ret)
 		return ret;
-	}
 	ret = bringup_wait_for_ap(cpu);
 	BUG_ON(!cpu_online(cpu));
 	return ret;
@@ -565,11 +497,6 @@ static void cpuhp_thread_fun(unsigned int cpu)
 		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
 
 		undo_cpu_down(cpu, st);
-		/*
-		 * This is a momentary workaround to keep the notifier users
-		 * happy. Will go away once we got rid of the notifiers.
-		 */
-		cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
 		st->rollback = false;
 	} else {
 		/* Cannot happen .... */
@@ -659,22 +586,6 @@ void __init cpuhp_threads_init(void)
 	kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
 
-EXPORT_SYMBOL(register_cpu_notifier);
-EXPORT_SYMBOL(__register_cpu_notifier);
-void unregister_cpu_notifier(struct notifier_block *nb)
-{
-	cpu_maps_update_begin();
-	raw_notifier_chain_unregister(&cpu_chain, nb);
-	cpu_maps_update_done();
-}
-EXPORT_SYMBOL(unregister_cpu_notifier);
-
-void __unregister_cpu_notifier(struct notifier_block *nb)
-{
-	raw_notifier_chain_unregister(&cpu_chain, nb);
-}
-EXPORT_SYMBOL(__unregister_cpu_notifier);
-
 #ifdef CONFIG_HOTPLUG_CPU
 /**
  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
@@ -741,20 +652,6 @@ static inline void check_for_tasks(int dead_cpu)
 	read_unlock(&tasklist_lock);
 }
 
-static int notify_down_prepare(unsigned int cpu)
-{
-	int err, nr_calls = 0;
-
-	err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls);
-	if (err) {
-		nr_calls--;
-		__cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
-		pr_warn("%s: attempt to take down CPU %u failed\n",
-				__func__, cpu);
-	}
-	return err;
-}
-
 /* Take this CPU down. */
 static int take_cpu_down(void *_param)
 {
@@ -833,13 +730,6 @@ static int takedown_cpu(unsigned int cpu)
 	return 0;
 }
 
-static int notify_dead(unsigned int cpu)
-{
-	cpu_notify_nofail(CPU_DEAD, cpu);
-	check_for_tasks(cpu);
-	return 0;
-}
-
 static void cpuhp_complete_idle_dead(void *arg)
 {
 	struct cpuhp_cpu_state *st = arg;
@@ -863,9 +753,7 @@ void cpuhp_report_idle_dead(void)
 }
 
 #else
-#define notify_down_prepare	NULL
 #define takedown_cpu		NULL
-#define notify_dead		NULL
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -924,9 +812,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 	hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
 out:
 	cpu_hotplug_done();
-	/* This post dead nonsense must die */
-	if (!ret && hasdied)
-		cpu_notify_nofail(CPU_POST_DEAD, cpu);
 	return ret;
 }
 
@@ -1291,17 +1176,6 @@ static struct cpuhp_step cpuhp_bp_states[] = {
 		.startup.single		= rcutree_prepare_cpu,
 		.teardown.single	= rcutree_dead_cpu,
 	},
-	/*
-	 * Preparatory and dead notifiers. Will be replaced once the notifiers
-	 * are converted to states.
-	 */
-	[CPUHP_NOTIFY_PREPARE] = {
-		.name			= "notify:prepare",
-		.startup.single		= notify_prepare,
-		.teardown.single	= notify_dead,
-		.skip_onerr		= true,
-		.cant_stop		= true,
-	},
 	/*
 	 * On the tear-down path, timers_dead_cpu() must be invoked
 	 * before blk_mq_queue_reinit_notify() from notify_dead(),
@@ -1391,17 +1265,6 @@ static struct cpuhp_step cpuhp_ap_states[] = {
 		.startup.single		= rcutree_online_cpu,
 		.teardown.single	= rcutree_offline_cpu,
 	},
-
-	/*
-	 * Online/down_prepare notifiers. Will be removed once the notifiers
-	 * are converted to states.
-	 */
-	[CPUHP_AP_NOTIFY_ONLINE] = {
-		.name			= "notify:online",
-		.startup.single		= notify_online,
-		.teardown.single	= notify_down_prepare,
-		.skip_onerr		= true,
-	},
 #endif
 	/*
 	 * The dynamically registered state space is here
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cb66a4648840..b06848a104e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1538,30 +1538,6 @@ config NOTIFIER_ERROR_INJECTION
 
 	  Say N if unsure.
 
-config CPU_NOTIFIER_ERROR_INJECT
-	tristate "CPU notifier error injection module"
-	depends on HOTPLUG_CPU && NOTIFIER_ERROR_INJECTION
-	help
-	  This option provides a kernel module that can be used to test
-	  the error handling of the cpu notifiers by injecting artificial
-	  errors to CPU notifier chain callbacks.  It is controlled through
-	  debugfs interface under /sys/kernel/debug/notifier-error-inject/cpu
-
-	  If the notifier call chain should be failed with some events
-	  notified, write the error code to "actions/<notifier event>/error".
-
-	  Example: Inject CPU offline error (-1 == -EPERM)
-
-	  # cd /sys/kernel/debug/notifier-error-inject/cpu
-	  # echo -1 > actions/CPU_DOWN_PREPARE/error
-	  # echo 0 > /sys/devices/system/cpu/cpu1/online
-	  bash: echo: write error: Operation not permitted
-
-	  To compile this code as a module, choose M here: the module will
-	  be called cpu-notifier-error-inject.
-
-	  If unsure, say N.
-
 config PM_NOTIFIER_ERROR_INJECT
 	tristate "PM notifier error injection module"
 	depends on PM && NOTIFIER_ERROR_INJECTION
diff --git a/lib/Makefile b/lib/Makefile
index 50144a3aeebd..bc4073a8cd08 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -128,7 +128,6 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
-obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
 obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
 obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
deleted file mode 100644
index 0e2c9a1e958a..000000000000
--- a/lib/cpu-notifier-error-inject.c
+++ /dev/null
@@ -1,84 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-
-#include "notifier-error-inject.h"
-
-static int priority;
-module_param(priority, int, 0);
-MODULE_PARM_DESC(priority, "specify cpu notifier priority");
-
-#define UP_PREPARE 0
-#define UP_PREPARE_FROZEN 0
-#define DOWN_PREPARE 0
-#define DOWN_PREPARE_FROZEN 0
-
-static struct notifier_err_inject cpu_notifier_err_inject = {
-	.actions = {
-		{ NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) },
-		{ NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) },
-		{ NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) },
-		{ NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) },
-		{}
-	}
-};
-
-static int notf_err_handle(struct notifier_err_inject_action *action)
-{
-	int ret;
-
-	ret = action->error;
-	if (ret)
-		pr_info("Injecting error (%d) to %s\n", ret, action->name);
-	return ret;
-}
-
-static int notf_err_inj_up_prepare(unsigned int cpu)
-{
-	if (!cpuhp_tasks_frozen)
-		return notf_err_handle(&cpu_notifier_err_inject.actions[0]);
-	else
-		return notf_err_handle(&cpu_notifier_err_inject.actions[1]);
-}
-
-static int notf_err_inj_dead(unsigned int cpu)
-{
-	if (!cpuhp_tasks_frozen)
-		return notf_err_handle(&cpu_notifier_err_inject.actions[2]);
-	else
-		return notf_err_handle(&cpu_notifier_err_inject.actions[3]);
-}
-
-static struct dentry *dir;
-
-static int err_inject_init(void)
-{
-	int err;
-
-	dir = notifier_err_inject_init("cpu", notifier_err_inject_dir,
-					&cpu_notifier_err_inject, priority);
-	if (IS_ERR(dir))
-		return PTR_ERR(dir);
-
-	err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE,
-					"cpu-err-notif:prepare",
-					notf_err_inj_up_prepare,
-					notf_err_inj_dead);
-	if (err)
-		debugfs_remove_recursive(dir);
-
-	return err;
-}
-
-static void err_inject_exit(void)
-{
-	cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE);
-	debugfs_remove_recursive(dir);
-}
-
-module_init(err_inject_init);
-module_exit(err_inject_exit);
-
-MODULE_DESCRIPTION("CPU notifier error injection module");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
-- 
cgit v1.2.3


From 36e5b0e39194b09a10f19697fb9ea4ccc44eb166 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Dec 2016 20:19:55 +0100
Subject: coresight/etm3/4x: Consolidate hotplug state space

Even if both drivers are compiled in only one instance can run on a given
system depending on the available tracer cell.

So having seperate hotplug states for them is pointless.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/20161221192112.162765484@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/hwtracing/coresight/coresight-etm4x.c | 4 ++--
 include/linux/cpuhotplug.h                    | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 5edc63fbb06f..031480f2c34d 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -986,7 +986,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 		dev_err(dev, "ETM arch init failed\n");
 
 	if (!etm4_count++) {
-		cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT4_STARTING,
+		cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
 					  "arm/coresight4:starting",
 					  etm4_starting_cpu, etm4_dying_cpu);
 		ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
@@ -1037,7 +1037,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 
 err_arch_supported:
 	if (--etm4_count == 0) {
-		cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT4_STARTING);
+		cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
 		if (hp_online)
 			cpuhp_remove_state_nocalls(hp_online);
 	}
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0d5ef8563113..4a938bee5858 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -119,7 +119,6 @@ enum cpuhp_state {
 	CPUHP_AP_DUMMY_TIMER_STARTING,
 	CPUHP_AP_ARM_XEN_STARTING,
 	CPUHP_AP_ARM_CORESIGHT_STARTING,
-	CPUHP_AP_ARM_CORESIGHT4_STARTING,
 	CPUHP_AP_ARM64_ISNDEP_STARTING,
 	CPUHP_AP_SMPCFD_DYING,
 	CPUHP_AP_X86_TBOOT_DYING,
-- 
cgit v1.2.3


From 6896bcd198df04777820cab4acc70142e87d5ce0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Dec 2016 20:19:56 +0100
Subject: irqchip/gic: Consolidate hotplug state space

Even if both drivers are compiled in only one instance can run on a given
system depending on the available GIC version.

So having seperate hotplug states for them is pointless.


Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Link: http://lkml.kernel.org/r/20161221192112.252416267@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic-v3.c | 2 +-
 include/linux/cpuhotplug.h   | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index d696de1a29fb..c132f29322cc 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -632,7 +632,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 static void gic_smp_init(void)
 {
 	set_smp_cross_call(gic_raise_softirq);
-	cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GICV3_STARTING,
+	cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
 				  "irqchip/arm/gicv3:starting",
 				  gic_starting_cpu, NULL);
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 4a938bee5858..45c786cbb324 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -80,7 +80,6 @@ enum cpuhp_state {
 	CPUHP_AP_SCHED_STARTING,
 	CPUHP_AP_RCUTREE_DYING,
 	CPUHP_AP_IRQ_GIC_STARTING,
-	CPUHP_AP_IRQ_GICV3_STARTING,
 	CPUHP_AP_IRQ_HIP04_STARTING,
 	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
 	CPUHP_AP_IRQ_ARMADA_CASC_STARTING,
-- 
cgit v1.2.3


From 008b69e4d52f2cbee3ed0d0502edd78155000b1a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Dec 2016 20:19:57 +0100
Subject: irqchip/armada-xp: Consolidate hotplug state space

The mpic is either the main interrupt controller or is cascaded behind a
GIC. The mpic is single instance and the modes are mutually exclusive, so
there is no reason to have seperate cpu hotplug states.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/20161221192112.333161745@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-armada-370-xp.c | 2 +-
 include/linux/cpuhotplug.h          | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 9d9c2c45916a..eb0d4d41b156 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -583,7 +583,7 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
 #endif
 	} else {
 #ifdef CONFIG_SMP
-		cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_CASC_STARTING,
+		cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_XP_STARTING,
 					  "irqchip/armada/cascade:starting",
 					  mpic_cascaded_starting_cpu, NULL);
 #endif
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 45c786cbb324..20bfefbe7594 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -82,7 +82,6 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_GIC_STARTING,
 	CPUHP_AP_IRQ_HIP04_STARTING,
 	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
-	CPUHP_AP_IRQ_ARMADA_CASC_STARTING,
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
 	CPUHP_AP_PERF_X86_UNCORE_STARTING,
-- 
cgit v1.2.3


From a5a1d1c2914b5316924c7893eb683a5420ebd3be Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Dec 2016 20:32:01 +0100
Subject: clocksource: Use a plain u64 instead of cycle_t

There is no point in having an extra type for extra confusion. u64 is
unambiguous.

Conversion was done with the following coccinelle script:

@rem@
@@
-typedef u64 cycle_t;

@fix@
typedef cycle_t;
@@
-cycle_t
+u64

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
---
 arch/alpha/kernel/time.c                           |  4 +-
 arch/arm/mach-davinci/time.c                       |  2 +-
 arch/arm/mach-ep93xx/timer-ep93xx.c                |  4 +-
 arch/arm/mach-footbridge/dc21285-timer.c           |  2 +-
 arch/arm/mach-ixp4xx/common.c                      |  2 +-
 arch/arm/mach-mmp/time.c                           |  2 +-
 arch/arm/mach-omap2/timer.c                        |  4 +-
 arch/arm/plat-iop/time.c                           |  2 +-
 arch/avr32/kernel/time.c                           |  4 +-
 arch/blackfin/kernel/time-ts.c                     |  4 +-
 arch/c6x/kernel/time.c                             |  2 +-
 arch/hexagon/kernel/time.c                         |  4 +-
 arch/ia64/kernel/cyclone.c                         |  4 +-
 arch/ia64/kernel/fsyscall_gtod_data.h              |  6 +--
 arch/ia64/kernel/time.c                            |  6 +--
 arch/ia64/sn/kernel/sn2/timer.c                    |  4 +-
 arch/m68k/68000/timers.c                           |  2 +-
 arch/m68k/coldfire/dma_timer.c                     |  2 +-
 arch/m68k/coldfire/pit.c                           |  2 +-
 arch/m68k/coldfire/sltimers.c                      |  2 +-
 arch/m68k/coldfire/timers.c                        |  2 +-
 arch/microblaze/kernel/timer.c                     |  6 +--
 arch/mips/alchemy/common/time.c                    |  2 +-
 arch/mips/cavium-octeon/csrc-octeon.c              |  2 +-
 arch/mips/jz4740/time.c                            |  2 +-
 arch/mips/kernel/cevt-txx9.c                       |  2 +-
 arch/mips/kernel/csrc-bcm1480.c                    |  4 +-
 arch/mips/kernel/csrc-ioasic.c                     |  2 +-
 arch/mips/kernel/csrc-r4k.c                        |  2 +-
 arch/mips/kernel/csrc-sb1250.c                     |  4 +-
 arch/mips/loongson32/common/time.c                 |  4 +-
 arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c  |  4 +-
 arch/mips/loongson64/loongson-3/hpet.c             |  4 +-
 arch/mips/mti-malta/malta-time.c                   |  2 +-
 arch/mips/netlogic/common/time.c                   |  4 +-
 arch/mips/sgi-ip27/ip27-timer.c                    |  2 +-
 arch/mn10300/kernel/csrc-mn10300.c                 |  2 +-
 arch/nios2/kernel/time.c                           |  2 +-
 arch/openrisc/kernel/time.c                        |  4 +-
 arch/parisc/kernel/time.c                          |  2 +-
 arch/powerpc/kernel/time.c                         | 14 +++---
 arch/s390/kernel/time.c                            |  2 +-
 arch/sparc/kernel/time_32.c                        |  2 +-
 arch/sparc/kernel/time_64.c                        |  2 +-
 arch/um/kernel/time.c                              |  2 +-
 arch/unicore32/kernel/time.c                       |  2 +-
 arch/x86/entry/vdso/vclock_gettime.c               |  8 +--
 arch/x86/include/asm/kvm_host.h                    |  2 +-
 arch/x86/include/asm/pvclock.h                     |  7 ++-
 arch/x86/include/asm/tsc.h                         |  2 +-
 arch/x86/include/asm/vgtod.h                       |  4 +-
 arch/x86/kernel/apb_timer.c                        |  4 +-
 arch/x86/kernel/cpu/mshyperv.c                     |  4 +-
 arch/x86/kernel/hpet.c                             | 14 +++---
 arch/x86/kernel/kvmclock.c                         | 10 ++--
 arch/x86/kernel/pvclock.c                          |  4 +-
 arch/x86/kernel/tsc.c                              |  6 +--
 arch/x86/kvm/x86.c                                 | 14 +++---
 arch/x86/lguest/boot.c                             |  2 +-
 arch/x86/platform/uv/uv_time.c                     |  8 +--
 arch/x86/xen/time.c                                |  6 +--
 arch/x86/xen/xen-ops.h                             |  2 +-
 arch/xtensa/kernel/time.c                          |  4 +-
 drivers/char/hpet.c                                |  4 +-
 drivers/clocksource/acpi_pm.c                      | 14 +++---
 drivers/clocksource/arc_timer.c                    | 12 ++---
 drivers/clocksource/arm_arch_timer.c               |  4 +-
 drivers/clocksource/arm_global_timer.c             |  2 +-
 drivers/clocksource/cadence_ttc_timer.c            |  4 +-
 drivers/clocksource/clksrc-dbx500-prcmu.c          |  2 +-
 drivers/clocksource/dw_apb_timer.c                 |  8 +--
 drivers/clocksource/em_sti.c                       | 12 ++---
 drivers/clocksource/exynos_mct.c                   |  6 +--
 drivers/clocksource/h8300_timer16.c                |  2 +-
 drivers/clocksource/h8300_tpu.c                    |  2 +-
 drivers/clocksource/i8253.c                        |  4 +-
 drivers/clocksource/jcore-pit.c                    |  2 +-
 drivers/clocksource/metag_generic.c                |  2 +-
 drivers/clocksource/mips-gic-timer.c               |  2 +-
 drivers/clocksource/mmio.c                         | 18 +++----
 drivers/clocksource/mxs_timer.c                    |  2 +-
 drivers/clocksource/qcom-timer.c                   |  2 +-
 drivers/clocksource/samsung_pwm_timer.c            |  2 +-
 drivers/clocksource/scx200_hrt.c                   |  4 +-
 drivers/clocksource/sh_cmt.c                       |  2 +-
 drivers/clocksource/sh_tmu.c                       |  2 +-
 drivers/clocksource/tcb_clksrc.c                   |  4 +-
 drivers/clocksource/time-pistachio.c               |  4 +-
 drivers/clocksource/timer-atlas7.c                 |  2 +-
 drivers/clocksource/timer-atmel-pit.c              |  2 +-
 drivers/clocksource/timer-atmel-st.c               |  2 +-
 drivers/clocksource/timer-nps.c                    |  4 +-
 drivers/clocksource/timer-prima2.c                 |  2 +-
 drivers/clocksource/timer-sun5i.c                  |  2 +-
 drivers/clocksource/timer-ti-32k.c                 |  4 +-
 drivers/clocksource/vt8500_timer.c                 |  4 +-
 drivers/hv/hv.c                                    |  8 +--
 drivers/irqchip/irq-mips-gic.c                     | 16 +++---
 drivers/net/ethernet/amd/xgbe/xgbe-ptp.c           |  2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   |  2 +-
 drivers/net/ethernet/freescale/fec_ptp.c           |  2 +-
 drivers/net/ethernet/intel/e1000e/netdev.c         | 18 +++----
 drivers/net/ethernet/intel/e1000e/ptp.c            |  4 +-
 drivers/net/ethernet/intel/igb/igb_ptp.c           |  4 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c       |  4 +-
 drivers/net/ethernet/mellanox/mlx4/en_clock.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx4/main.c          |  4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  4 +-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |  2 +-
 drivers/net/ethernet/ti/cpts.c                     |  2 +-
 include/kvm/arm_arch_timer.h                       |  4 +-
 include/linux/clocksource.h                        | 22 ++++-----
 include/linux/dw_apb_timer.h                       |  2 +-
 include/linux/irqchip/mips-gic.h                   |  8 +--
 include/linux/mlx4/device.h                        |  2 +-
 include/linux/timecounter.h                        | 12 ++---
 include/linux/timekeeper_internal.h                | 10 ++--
 include/linux/timekeeping.h                        |  4 +-
 include/linux/types.h                              |  3 --
 kernel/time/clocksource.c                          |  2 +-
 kernel/time/jiffies.c                              |  4 +-
 kernel/time/timecounter.c                          |  6 +--
 kernel/time/timekeeping.c                          | 57 ++++++++++------------
 kernel/time/timekeeping_internal.h                 |  6 +--
 kernel/trace/ftrace.c                              |  4 +-
 kernel/trace/trace.c                               |  6 +--
 kernel/trace/trace.h                               |  8 +--
 kernel/trace/trace_irqsoff.c                       |  4 +-
 kernel/trace/trace_sched_wakeup.c                  |  4 +-
 sound/hda/hdac_stream.c                            |  6 +--
 virt/kvm/arm/arch_timer.c                          |  6 +--
 132 files changed, 320 insertions(+), 327 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 992000e3d9e4..3bfe058d75d9 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -133,7 +133,7 @@ init_rtc_clockevent(void)
  * The QEMU clock as a clocksource primitive.
  */
 
-static cycle_t
+static u64
 qemu_cs_read(struct clocksource *cs)
 {
 	return qemu_get_vmtime();
@@ -260,7 +260,7 @@ common_init_rtc(void)
  * use this method when WTINT is in use.
  */
 
-static cycle_t read_rpcc(struct clocksource *cs)
+static u64 read_rpcc(struct clocksource *cs)
 {
 	return rpcc();
 }
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 6c18445a4639..034f865fe78e 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -268,7 +268,7 @@ static void __init timer_init(void)
 /*
  * clocksource
  */
-static cycle_t read_cycles(struct clocksource *cs)
+static u64 read_cycles(struct clocksource *cs)
 {
 	struct timer_s *t = &timers[TID_CLOCKSOURCE];
 
diff --git a/arch/arm/mach-ep93xx/timer-ep93xx.c b/arch/arm/mach-ep93xx/timer-ep93xx.c
index e5f791145bd0..874cbc91b669 100644
--- a/arch/arm/mach-ep93xx/timer-ep93xx.c
+++ b/arch/arm/mach-ep93xx/timer-ep93xx.c
@@ -59,13 +59,13 @@ static u64 notrace ep93xx_read_sched_clock(void)
 	return ret;
 }
 
-cycle_t ep93xx_clocksource_read(struct clocksource *c)
+u64 ep93xx_clocksource_read(struct clocksource *c)
 {
 	u64 ret;
 
 	ret = readl(EP93XX_TIMER4_VALUE_LOW);
 	ret |= ((u64) (readl(EP93XX_TIMER4_VALUE_HIGH) & 0xff) << 32);
-	return (cycle_t) ret;
+	return (u64) ret;
 }
 
 static int ep93xx_clkevt_set_next_event(unsigned long next,
diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c
index 810edc78c817..75395a720e63 100644
--- a/arch/arm/mach-footbridge/dc21285-timer.c
+++ b/arch/arm/mach-footbridge/dc21285-timer.c
@@ -19,7 +19,7 @@
 
 #include "common.h"
 
-static cycle_t cksrc_dc21285_read(struct clocksource *cs)
+static u64 cksrc_dc21285_read(struct clocksource *cs)
 {
 	return cs->mask - *CSR_TIMER2_VALUE;
 }
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 0f08f611c1a6..846e033c56fa 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -493,7 +493,7 @@ static u64 notrace ixp4xx_read_sched_clock(void)
  * clocksource
  */
 
-static cycle_t ixp4xx_clocksource_read(struct clocksource *c)
+static u64 ixp4xx_clocksource_read(struct clocksource *c)
 {
 	return *IXP4XX_OSTS;
 }
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index 3c2c92aaa0ae..96ad1db0b04b 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -144,7 +144,7 @@ static struct clock_event_device ckevt = {
 	.set_state_oneshot	= timer_set_shutdown,
 };
 
-static cycle_t clksrc_read(struct clocksource *cs)
+static u64 clksrc_read(struct clocksource *cs)
 {
 	return timer_read();
 }
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 5e2e2218a402..56128da23c3a 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -369,9 +369,9 @@ static bool use_gptimer_clksrc __initdata;
 /*
  * clocksource
  */
-static cycle_t clocksource_read_cycles(struct clocksource *cs)
+static u64 clocksource_read_cycles(struct clocksource *cs)
 {
-	return (cycle_t)__omap_dm_timer_read_counter(&clksrc,
+	return (u64)__omap_dm_timer_read_counter(&clksrc,
 						     OMAP_TIMER_NONPOSTED);
 }
 
diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c
index ed8d129d4bea..2cff0010f677 100644
--- a/arch/arm/plat-iop/time.c
+++ b/arch/arm/plat-iop/time.c
@@ -38,7 +38,7 @@
 /*
  * IOP clocksource (free-running timer 1).
  */
-static cycle_t notrace iop_clocksource_read(struct clocksource *unused)
+static u64 notrace iop_clocksource_read(struct clocksource *unused)
 {
 	return 0xffffffffu - read_tcr1();
 }
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index a124c55733db..4d9b69615979 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -20,9 +20,9 @@
 
 static bool disable_cpu_idle_poll;
 
-static cycle_t read_cycle_count(struct clocksource *cs)
+static u64 read_cycle_count(struct clocksource *cs)
 {
-	return (cycle_t)sysreg_read(COUNT);
+	return (u64)sysreg_read(COUNT);
 }
 
 /*
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index fb9e95f1b719..0e9fcf841d67 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -26,7 +26,7 @@
 
 #if defined(CONFIG_CYCLES_CLOCKSOURCE)
 
-static notrace cycle_t bfin_read_cycles(struct clocksource *cs)
+static notrace u64 bfin_read_cycles(struct clocksource *cs)
 {
 #ifdef CONFIG_CPU_FREQ
 	return __bfin_cycles_off + (get_cycles() << __bfin_cycles_mod);
@@ -80,7 +80,7 @@ void __init setup_gptimer0(void)
 	enable_gptimers(TIMER0bit);
 }
 
-static cycle_t bfin_read_gptimer0(struct clocksource *cs)
+static u64 bfin_read_gptimer0(struct clocksource *cs)
 {
 	return bfin_read_TIMER0_COUNTER();
 }
diff --git a/arch/c6x/kernel/time.c b/arch/c6x/kernel/time.c
index 04845aaf5985..6a8e00a1f6d5 100644
--- a/arch/c6x/kernel/time.c
+++ b/arch/c6x/kernel/time.c
@@ -26,7 +26,7 @@
 static u32 sched_clock_multiplier;
 #define SCHED_CLOCK_SHIFT 16
 
-static cycle_t tsc_read(struct clocksource *cs)
+static u64 tsc_read(struct clocksource *cs)
 {
 	return get_cycles();
 }
diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c
index a6a1d1f8309a..ff4e9bf995e9 100644
--- a/arch/hexagon/kernel/time.c
+++ b/arch/hexagon/kernel/time.c
@@ -72,9 +72,9 @@ struct adsp_hw_timer_struct {
 /*  Look for "TCX0" for related constants.  */
 static __iomem struct adsp_hw_timer_struct *rtos_timer;
 
-static cycle_t timer_get_cycles(struct clocksource *cs)
+static u64 timer_get_cycles(struct clocksource *cs)
 {
-	return (cycle_t) __vmgettime();
+	return (u64) __vmgettime();
 }
 
 static struct clocksource hexagon_clocksource = {
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index 5fa3848ba224..ee1a4afbf9da 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -21,9 +21,9 @@ void __init cyclone_setup(void)
 
 static void __iomem *cyclone_mc;
 
-static cycle_t read_cyclone(struct clocksource *cs)
+static u64 read_cyclone(struct clocksource *cs)
 {
-	return (cycle_t)readq((void __iomem *)cyclone_mc);
+	return (u64)readq((void __iomem *)cyclone_mc);
 }
 
 static struct clocksource clocksource_cyclone = {
diff --git a/arch/ia64/kernel/fsyscall_gtod_data.h b/arch/ia64/kernel/fsyscall_gtod_data.h
index 146b15b5fec3..dcc514917731 100644
--- a/arch/ia64/kernel/fsyscall_gtod_data.h
+++ b/arch/ia64/kernel/fsyscall_gtod_data.h
@@ -9,15 +9,15 @@ struct fsyscall_gtod_data_t {
 	seqcount_t	seq;
 	struct timespec	wall_time;
 	struct timespec monotonic_time;
-	cycle_t		clk_mask;
+	u64		clk_mask;
 	u32		clk_mult;
 	u32		clk_shift;
 	void		*clk_fsys_mmio;
-	cycle_t		clk_cycle_last;
+	u64		clk_cycle_last;
 } ____cacheline_aligned;
 
 struct itc_jitter_data_t {
 	int		itc_jitter;
-	cycle_t		itc_lastcycle;
+	u64		itc_lastcycle;
 } ____cacheline_aligned;
 
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 021f44ab4bfb..71775b95d6cc 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -31,7 +31,7 @@
 
 #include "fsyscall_gtod_data.h"
 
-static cycle_t itc_get_cycles(struct clocksource *cs);
+static u64 itc_get_cycles(struct clocksource *cs);
 
 struct fsyscall_gtod_data_t fsyscall_gtod_data;
 
@@ -323,7 +323,7 @@ void ia64_init_itm(void)
 	}
 }
 
-static cycle_t itc_get_cycles(struct clocksource *cs)
+static u64 itc_get_cycles(struct clocksource *cs)
 {
 	unsigned long lcycle, now, ret;
 
@@ -397,7 +397,7 @@ void update_vsyscall_tz(void)
 }
 
 void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
-			 struct clocksource *c, u32 mult, cycle_t cycle_last)
+			 struct clocksource *c, u32 mult, u64 cycle_last)
 {
 	write_seqcount_begin(&fsyscall_gtod_data.seq);
 
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
index abab8f99e913..66edc36426ed 100644
--- a/arch/ia64/sn/kernel/sn2/timer.c
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -22,9 +22,9 @@
 
 extern unsigned long sn_rtc_cycles_per_second;
 
-static cycle_t read_sn2(struct clocksource *cs)
+static u64 read_sn2(struct clocksource *cs)
 {
-	return (cycle_t)readq(RTC_COUNTER_ADDR);
+	return (u64)readq(RTC_COUNTER_ADDR);
 }
 
 static struct clocksource clocksource_sn2 = {
diff --git a/arch/m68k/68000/timers.c b/arch/m68k/68000/timers.c
index 99a98698bc95..252455bce144 100644
--- a/arch/m68k/68000/timers.c
+++ b/arch/m68k/68000/timers.c
@@ -76,7 +76,7 @@ static struct irqaction m68328_timer_irq = {
 
 /***************************************************************************/
 
-static cycle_t m68328_read_clk(struct clocksource *cs)
+static u64 m68328_read_clk(struct clocksource *cs)
 {
 	unsigned long flags;
 	u32 cycles;
diff --git a/arch/m68k/coldfire/dma_timer.c b/arch/m68k/coldfire/dma_timer.c
index 235ad57c4707..8273eea57874 100644
--- a/arch/m68k/coldfire/dma_timer.c
+++ b/arch/m68k/coldfire/dma_timer.c
@@ -34,7 +34,7 @@
 #define DMA_DTMR_CLK_DIV_16	(2 << 1)
 #define DMA_DTMR_ENABLE		(1 << 0)
 
-static cycle_t cf_dt_get_cycles(struct clocksource *cs)
+static u64 cf_dt_get_cycles(struct clocksource *cs)
 {
 	return __raw_readl(DTCN0);
 }
diff --git a/arch/m68k/coldfire/pit.c b/arch/m68k/coldfire/pit.c
index d86a9ffb3f13..175553d5b8ed 100644
--- a/arch/m68k/coldfire/pit.c
+++ b/arch/m68k/coldfire/pit.c
@@ -118,7 +118,7 @@ static struct irqaction pit_irq = {
 
 /***************************************************************************/
 
-static cycle_t pit_read_clk(struct clocksource *cs)
+static u64 pit_read_clk(struct clocksource *cs)
 {
 	unsigned long flags;
 	u32 cycles;
diff --git a/arch/m68k/coldfire/sltimers.c b/arch/m68k/coldfire/sltimers.c
index 831a08cf6f40..3292c0d68b18 100644
--- a/arch/m68k/coldfire/sltimers.c
+++ b/arch/m68k/coldfire/sltimers.c
@@ -97,7 +97,7 @@ static struct irqaction mcfslt_timer_irq = {
 	.handler = mcfslt_tick,
 };
 
-static cycle_t mcfslt_read_clk(struct clocksource *cs)
+static u64 mcfslt_read_clk(struct clocksource *cs)
 {
 	unsigned long flags;
 	u32 cycles, scnt;
diff --git a/arch/m68k/coldfire/timers.c b/arch/m68k/coldfire/timers.c
index cd496a20fcc7..2dc7a58204f6 100644
--- a/arch/m68k/coldfire/timers.c
+++ b/arch/m68k/coldfire/timers.c
@@ -89,7 +89,7 @@ static struct irqaction mcftmr_timer_irq = {
 
 /***************************************************************************/
 
-static cycle_t mcftmr_read_clk(struct clocksource *cs)
+static u64 mcftmr_read_clk(struct clocksource *cs)
 {
 	unsigned long flags;
 	u32 cycles;
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 9e954959f605..1d6fad50fa76 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -190,17 +190,17 @@ static u64 xilinx_clock_read(void)
 	return read_fn(timer_baseaddr + TCR1);
 }
 
-static cycle_t xilinx_read(struct clocksource *cs)
+static u64 xilinx_read(struct clocksource *cs)
 {
 	/* reading actual value of timer 1 */
-	return (cycle_t)xilinx_clock_read();
+	return (u64)xilinx_clock_read();
 }
 
 static struct timecounter xilinx_tc = {
 	.cc = NULL,
 };
 
-static cycle_t xilinx_cc_read(const struct cyclecounter *cc)
+static u64 xilinx_cc_read(const struct cyclecounter *cc)
 {
 	return xilinx_read(NULL);
 }
diff --git a/arch/mips/alchemy/common/time.c b/arch/mips/alchemy/common/time.c
index f99d3ec17a45..e1bec5a77c39 100644
--- a/arch/mips/alchemy/common/time.c
+++ b/arch/mips/alchemy/common/time.c
@@ -44,7 +44,7 @@
 /* 32kHz clock enabled and detected */
 #define CNTR_OK (SYS_CNTRL_E0 | SYS_CNTRL_32S)
 
-static cycle_t au1x_counter1_read(struct clocksource *cs)
+static u64 au1x_counter1_read(struct clocksource *cs)
 {
 	return alchemy_rdsys(AU1000_SYS_RTCREAD);
 }
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index 23c2344a3552..39f153fe0022 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -98,7 +98,7 @@ void octeon_init_cvmcount(void)
 	local_irq_restore(flags);
 }
 
-static cycle_t octeon_cvmcount_read(struct clocksource *cs)
+static u64 octeon_cvmcount_read(struct clocksource *cs)
 {
 	return read_c0_cvmcount();
 }
diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c
index 1f7ca2c9f262..bcf8f8c62737 100644
--- a/arch/mips/jz4740/time.c
+++ b/arch/mips/jz4740/time.c
@@ -34,7 +34,7 @@
 
 static uint16_t jz4740_jiffies_per_tick;
 
-static cycle_t jz4740_clocksource_read(struct clocksource *cs)
+static u64 jz4740_clocksource_read(struct clocksource *cs)
 {
 	return jz4740_timer_get_count(TIMER_CLOCKSOURCE);
 }
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index 537eefdf838f..aaca60d6ffc3 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -27,7 +27,7 @@ struct txx9_clocksource {
 	struct txx9_tmr_reg __iomem *tmrptr;
 };
 
-static cycle_t txx9_cs_read(struct clocksource *cs)
+static u64 txx9_cs_read(struct clocksource *cs)
 {
 	struct txx9_clocksource *txx9_cs =
 		container_of(cs, struct txx9_clocksource, cs);
diff --git a/arch/mips/kernel/csrc-bcm1480.c b/arch/mips/kernel/csrc-bcm1480.c
index 7f65b53d1b24..f011261e9506 100644
--- a/arch/mips/kernel/csrc-bcm1480.c
+++ b/arch/mips/kernel/csrc-bcm1480.c
@@ -25,9 +25,9 @@
 
 #include <asm/sibyte/sb1250.h>
 
-static cycle_t bcm1480_hpt_read(struct clocksource *cs)
+static u64 bcm1480_hpt_read(struct clocksource *cs)
 {
-	return (cycle_t) __raw_readq(IOADDR(A_SCD_ZBBUS_CYCLE_COUNT));
+	return (u64) __raw_readq(IOADDR(A_SCD_ZBBUS_CYCLE_COUNT));
 }
 
 struct clocksource bcm1480_clocksource = {
diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c
index 722f5589cd1d..f6acd1e58c26 100644
--- a/arch/mips/kernel/csrc-ioasic.c
+++ b/arch/mips/kernel/csrc-ioasic.c
@@ -22,7 +22,7 @@
 #include <asm/dec/ioasic.h>
 #include <asm/dec/ioasic_addrs.h>
 
-static cycle_t dec_ioasic_hpt_read(struct clocksource *cs)
+static u64 dec_ioasic_hpt_read(struct clocksource *cs)
 {
 	return ioasic_read(IO_REG_FCTR);
 }
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index d76275da54cb..eed099f35bf1 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -11,7 +11,7 @@
 
 #include <asm/time.h>
 
-static cycle_t c0_hpt_read(struct clocksource *cs)
+static u64 c0_hpt_read(struct clocksource *cs)
 {
 	return read_c0_count();
 }
diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c
index d915652b4d56..b07b7310d3f4 100644
--- a/arch/mips/kernel/csrc-sb1250.c
+++ b/arch/mips/kernel/csrc-sb1250.c
@@ -30,7 +30,7 @@
  * The HPT is free running from SB1250_HPT_VALUE down to 0 then starts over
  * again.
  */
-static inline cycle_t sb1250_hpt_get_cycles(void)
+static inline u64 sb1250_hpt_get_cycles(void)
 {
 	unsigned int count;
 	void __iomem *addr;
@@ -41,7 +41,7 @@ static inline cycle_t sb1250_hpt_get_cycles(void)
 	return SB1250_HPT_VALUE - count;
 }
 
-static cycle_t sb1250_hpt_read(struct clocksource *cs)
+static u64 sb1250_hpt_read(struct clocksource *cs)
 {
 	return sb1250_hpt_get_cycles();
 }
diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c
index ff224f0020e5..e6f972d35252 100644
--- a/arch/mips/loongson32/common/time.c
+++ b/arch/mips/loongson32/common/time.c
@@ -63,7 +63,7 @@ void __init ls1x_pwmtimer_init(void)
 	ls1x_pwmtimer_restart();
 }
 
-static cycle_t ls1x_clocksource_read(struct clocksource *cs)
+static u64 ls1x_clocksource_read(struct clocksource *cs)
 {
 	unsigned long flags;
 	int count;
@@ -107,7 +107,7 @@ static cycle_t ls1x_clocksource_read(struct clocksource *cs)
 
 	raw_spin_unlock_irqrestore(&ls1x_timer_lock, flags);
 
-	return (cycle_t) (jifs * ls1x_jiffies_per_tick) + count;
+	return (u64) (jifs * ls1x_jiffies_per_tick) + count;
 }
 
 static struct clocksource ls1x_clocksource = {
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
index da77d412514c..9edfa55a0e78 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
+++ b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
@@ -144,7 +144,7 @@ void __init setup_mfgpt0_timer(void)
  * to just read by itself. So use jiffies to emulate a free
  * running counter:
  */
-static cycle_t mfgpt_read(struct clocksource *cs)
+static u64 mfgpt_read(struct clocksource *cs)
 {
 	unsigned long flags;
 	int count;
@@ -188,7 +188,7 @@ static cycle_t mfgpt_read(struct clocksource *cs)
 
 	raw_spin_unlock_irqrestore(&mfgpt_lock, flags);
 
-	return (cycle_t) (jifs * COMPARE) + count;
+	return (u64) (jifs * COMPARE) + count;
 }
 
 static struct clocksource clocksource_mfgpt = {
diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/loongson-3/hpet.c
index 4788bea62a6a..24afe364637b 100644
--- a/arch/mips/loongson64/loongson-3/hpet.c
+++ b/arch/mips/loongson64/loongson-3/hpet.c
@@ -248,9 +248,9 @@ void __init setup_hpet_timer(void)
 	pr_info("hpet clock event device register\n");
 }
 
-static cycle_t hpet_read_counter(struct clocksource *cs)
+static u64 hpet_read_counter(struct clocksource *cs)
 {
-	return (cycle_t)hpet_read(HPET_COUNTER);
+	return (u64)hpet_read(HPET_COUNTER);
 }
 
 static void hpet_suspend(struct clocksource *cs)
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 7407da04f8d6..1829a9031eec 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -75,7 +75,7 @@ static void __init estimate_frequencies(void)
 	unsigned int count, start;
 	unsigned char secs1, secs2, ctrl;
 	int secs;
-	cycle_t giccount = 0, gicstart = 0;
+	u64 giccount = 0, gicstart = 0;
 
 #if defined(CONFIG_KVM_GUEST) && CONFIG_KVM_GUEST_TIMER_FREQ
 	mips_hpt_frequency = CONFIG_KVM_GUEST_TIMER_FREQ * 1000000;
diff --git a/arch/mips/netlogic/common/time.c b/arch/mips/netlogic/common/time.c
index 5873c83e65be..cbbf0d48216b 100644
--- a/arch/mips/netlogic/common/time.c
+++ b/arch/mips/netlogic/common/time.c
@@ -59,14 +59,14 @@ unsigned int get_c0_compare_int(void)
 	return IRQ_TIMER;
 }
 
-static cycle_t nlm_get_pic_timer(struct clocksource *cs)
+static u64 nlm_get_pic_timer(struct clocksource *cs)
 {
 	uint64_t picbase = nlm_get_node(0)->picbase;
 
 	return ~nlm_pic_read_timer(picbase, PIC_CLOCK_TIMER);
 }
 
-static cycle_t nlm_get_pic_timer32(struct clocksource *cs)
+static u64 nlm_get_pic_timer32(struct clocksource *cs)
 {
 	uint64_t picbase = nlm_get_node(0)->picbase;
 
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 42d6cb9f956e..695c51bdd7dc 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -140,7 +140,7 @@ static void __init hub_rt_clock_event_global_init(void)
 	setup_irq(irq, &hub_rt_irqaction);
 }
 
-static cycle_t hub_rt_read(struct clocksource *cs)
+static u64 hub_rt_read(struct clocksource *cs)
 {
 	return REMOTE_HUB_L(cputonasid(0), PI_RT_COUNT);
 }
diff --git a/arch/mn10300/kernel/csrc-mn10300.c b/arch/mn10300/kernel/csrc-mn10300.c
index 45644cf18c41..6b74df3661f2 100644
--- a/arch/mn10300/kernel/csrc-mn10300.c
+++ b/arch/mn10300/kernel/csrc-mn10300.c
@@ -13,7 +13,7 @@
 #include <asm/timex.h>
 #include "internal.h"
 
-static cycle_t mn10300_read(struct clocksource *cs)
+static u64 mn10300_read(struct clocksource *cs)
 {
 	return read_timestamp_counter();
 }
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index 746bf5caaffc..6e2bdc9b8530 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -81,7 +81,7 @@ static inline unsigned long read_timersnapshot(struct nios2_timer *timer)
 	return count;
 }
 
-static cycle_t nios2_timer_read(struct clocksource *cs)
+static u64 nios2_timer_read(struct clocksource *cs)
 {
 	struct nios2_clocksource *nios2_cs = to_nios2_clksource(cs);
 	unsigned long flags;
diff --git a/arch/openrisc/kernel/time.c b/arch/openrisc/kernel/time.c
index 50e970183dcd..687c11d048d7 100644
--- a/arch/openrisc/kernel/time.c
+++ b/arch/openrisc/kernel/time.c
@@ -117,9 +117,9 @@ static __init void openrisc_clockevent_init(void)
  * is 32 bits wide and runs at the CPU clock frequency.
  */
 
-static cycle_t openrisc_timer_read(struct clocksource *cs)
+static u64 openrisc_timer_read(struct clocksource *cs)
 {
-	return (cycle_t) mfspr(SPR_TTCR);
+	return (u64) mfspr(SPR_TTCR);
 }
 
 static struct clocksource openrisc_timer = {
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 037d81f00520..da0d9cb63403 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -137,7 +137,7 @@ EXPORT_SYMBOL(profile_pc);
 
 /* clock source code */
 
-static cycle_t notrace read_cr16(struct clocksource *cs)
+static u64 notrace read_cr16(struct clocksource *cs)
 {
 	return get_cycles();
 }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 19397e2a8bf5..bc2e08d415fa 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -80,7 +80,7 @@
 #include <linux/clockchips.h>
 #include <linux/timekeeper_internal.h>
 
-static cycle_t rtc_read(struct clocksource *);
+static u64 rtc_read(struct clocksource *);
 static struct clocksource clocksource_rtc = {
 	.name         = "rtc",
 	.rating       = 400,
@@ -89,7 +89,7 @@ static struct clocksource clocksource_rtc = {
 	.read         = rtc_read,
 };
 
-static cycle_t timebase_read(struct clocksource *);
+static u64 timebase_read(struct clocksource *);
 static struct clocksource clocksource_timebase = {
 	.name         = "timebase",
 	.rating       = 400,
@@ -802,18 +802,18 @@ void read_persistent_clock(struct timespec *ts)
 }
 
 /* clocksource code */
-static cycle_t rtc_read(struct clocksource *cs)
+static u64 rtc_read(struct clocksource *cs)
 {
-	return (cycle_t)get_rtc();
+	return (u64)get_rtc();
 }
 
-static cycle_t timebase_read(struct clocksource *cs)
+static u64 timebase_read(struct clocksource *cs)
 {
-	return (cycle_t)get_tb();
+	return (u64)get_tb();
 }
 
 void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
-			 struct clocksource *clock, u32 mult, cycle_t cycle_last)
+			 struct clocksource *clock, u32 mult, u64 cycle_last)
 {
 	u64 new_tb_to_xs, new_stamp_xsec;
 	u32 frac_sec;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index ec76315c9ee5..52949df88529 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -209,7 +209,7 @@ void read_boot_clock64(struct timespec64 *ts)
 	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
 }
 
-static cycle_t read_tod_clock(struct clocksource *cs)
+static u64 read_tod_clock(struct clocksource *cs)
 {
 	unsigned long long now, adj;
 
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 1affabc96b08..244062bdaa56 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -148,7 +148,7 @@ static unsigned int sbus_cycles_offset(void)
 	return offset;
 }
 
-static cycle_t timer_cs_read(struct clocksource *cs)
+static u64 timer_cs_read(struct clocksource *cs)
 {
 	unsigned int seq, offset;
 	u64 cycles;
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 807f7e2ce014..12a6d3555cb8 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -770,7 +770,7 @@ void udelay(unsigned long usecs)
 }
 EXPORT_SYMBOL(udelay);
 
-static cycle_t clocksource_tick_read(struct clocksource *cs)
+static u64 clocksource_tick_read(struct clocksource *cs)
 {
 	return tick_ops->get_tick();
 }
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 25c23666d592..ba87a27d6715 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -83,7 +83,7 @@ static irqreturn_t um_timer(int irq, void *dev)
 	return IRQ_HANDLED;
 }
 
-static cycle_t timer_read(struct clocksource *cs)
+static u64 timer_read(struct clocksource *cs)
 {
 	return os_nsecs() / TIMER_MULTIPLIER;
 }
diff --git a/arch/unicore32/kernel/time.c b/arch/unicore32/kernel/time.c
index ac4c5449bb88..fceaa673f861 100644
--- a/arch/unicore32/kernel/time.c
+++ b/arch/unicore32/kernel/time.c
@@ -62,7 +62,7 @@ static struct clock_event_device ckevt_puv3_osmr0 = {
 	.set_state_oneshot	= puv3_osmr0_shutdown,
 };
 
-static cycle_t puv3_read_oscr(struct clocksource *cs)
+static u64 puv3_read_oscr(struct clocksource *cs)
 {
 	return readl(OST_OSCR);
 }
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 02223cb4bcfd..9d4d6e138311 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -92,10 +92,10 @@ static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
 	return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
 }
 
-static notrace cycle_t vread_pvclock(int *mode)
+static notrace u64 vread_pvclock(int *mode)
 {
 	const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
-	cycle_t ret;
+	u64 ret;
 	u64 last;
 	u32 version;
 
@@ -142,9 +142,9 @@ static notrace cycle_t vread_pvclock(int *mode)
 }
 #endif
 
-notrace static cycle_t vread_tsc(void)
+notrace static u64 vread_tsc(void)
 {
-	cycle_t ret = (cycle_t)rdtsc_ordered();
+	u64 ret = (u64)rdtsc_ordered();
 	u64 last = gtod->cycle_last;
 
 	if (likely(ret >= last))
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2e25038dbd93..a7066dc1a7e9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -768,7 +768,7 @@ struct kvm_arch {
 	spinlock_t pvclock_gtod_sync_lock;
 	bool use_master_clock;
 	u64 master_kernel_ns;
-	cycle_t master_cycle_now;
+	u64 master_cycle_now;
 	struct delayed_work kvmclock_update_work;
 	struct delayed_work kvmclock_sync_work;
 
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 3ad741b84072..448cfe1b48cf 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -14,7 +14,7 @@ static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
 #endif
 
 /* some helper functions for xen and kvm pv clock sources */
-cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
 u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
 void pvclock_set_flags(u8 flags);
 unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
@@ -87,11 +87,10 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 }
 
 static __always_inline
-cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
-			      u64 tsc)
+u64 __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u64 tsc)
 {
 	u64 delta = tsc - src->tsc_timestamp;
-	cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+	u64 offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
 					     src->tsc_shift);
 	return src->system_time + offset;
 }
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index abb1fdcc545a..f5e6f1c417df 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -29,7 +29,7 @@ static inline cycles_t get_cycles(void)
 	return rdtsc();
 }
 
-extern struct system_counterval_t convert_art_to_tsc(cycle_t art);
+extern struct system_counterval_t convert_art_to_tsc(u64 art);
 
 extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 3a01996db58f..022e59714562 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -17,8 +17,8 @@ struct vsyscall_gtod_data {
 	unsigned seq;
 
 	int vclock_mode;
-	cycle_t	cycle_last;
-	cycle_t	mask;
+	u64	cycle_last;
+	u64	mask;
 	u32	mult;
 	u32	shift;
 
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 456316f6c868..092ea664d2c6 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -247,7 +247,7 @@ void apbt_setup_secondary_clock(void) {}
 static int apbt_clocksource_register(void)
 {
 	u64 start, now;
-	cycle_t t1;
+	u64 t1;
 
 	/* Start the counter, use timer 2 as source, timer 0/1 for event */
 	dw_apb_clocksource_start(clocksource_apbt);
@@ -355,7 +355,7 @@ unsigned long apbt_quick_calibrate(void)
 {
 	int i, scale;
 	u64 old, new;
-	cycle_t t1, t2;
+	u64 t1, t2;
 	unsigned long khz = 0;
 	u32 loop, shift;
 
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f37e02e41a77..65e20c97e04b 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -133,9 +133,9 @@ static uint32_t  __init ms_hyperv_platform(void)
 	return 0;
 }
 
-static cycle_t read_hv_clock(struct clocksource *arg)
+static u64 read_hv_clock(struct clocksource *arg)
 {
-	cycle_t current_tick;
+	u64 current_tick;
 	/*
 	 * Read the partition counter to get the current tick count. This count
 	 * is set to 0 when the partition is created and is incremented in
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 274fab99169d..367756d55980 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -791,7 +791,7 @@ static union hpet_lock hpet __cacheline_aligned = {
 	{ .lock = __ARCH_SPIN_LOCK_UNLOCKED, },
 };
 
-static cycle_t read_hpet(struct clocksource *cs)
+static u64 read_hpet(struct clocksource *cs)
 {
 	unsigned long flags;
 	union hpet_lock old, new;
@@ -802,7 +802,7 @@ static cycle_t read_hpet(struct clocksource *cs)
 	 * Read HPET directly if in NMI.
 	 */
 	if (in_nmi())
-		return (cycle_t)hpet_readl(HPET_COUNTER);
+		return (u64)hpet_readl(HPET_COUNTER);
 
 	/*
 	 * Read the current state of the lock and HPET value atomically.
@@ -821,7 +821,7 @@ static cycle_t read_hpet(struct clocksource *cs)
 		WRITE_ONCE(hpet.value, new.value);
 		arch_spin_unlock(&hpet.lock);
 		local_irq_restore(flags);
-		return (cycle_t)new.value;
+		return (u64)new.value;
 	}
 	local_irq_restore(flags);
 
@@ -843,15 +843,15 @@ contended:
 		new.lockval = READ_ONCE(hpet.lockval);
 	} while ((new.value == old.value) && arch_spin_is_locked(&new.lock));
 
-	return (cycle_t)new.value;
+	return (u64)new.value;
 }
 #else
 /*
  * For UP or 32-bit.
  */
-static cycle_t read_hpet(struct clocksource *cs)
+static u64 read_hpet(struct clocksource *cs)
 {
-	return (cycle_t)hpet_readl(HPET_COUNTER);
+	return (u64)hpet_readl(HPET_COUNTER);
 }
 #endif
 
@@ -867,7 +867,7 @@ static struct clocksource clocksource_hpet = {
 static int hpet_clocksource_register(void)
 {
 	u64 start, now;
-	cycle_t t1;
+	u64 t1;
 
 	/* Start the counter */
 	hpet_restart_counter();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 60b9949f1e65..2a5cafdf8808 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -32,7 +32,7 @@
 static int kvmclock __ro_after_init = 1;
 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
 static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
-static cycle_t kvm_sched_clock_offset;
+static u64 kvm_sched_clock_offset;
 
 static int parse_no_kvmclock(char *arg)
 {
@@ -79,10 +79,10 @@ static int kvm_set_wallclock(const struct timespec *now)
 	return -1;
 }
 
-static cycle_t kvm_clock_read(void)
+static u64 kvm_clock_read(void)
 {
 	struct pvclock_vcpu_time_info *src;
-	cycle_t ret;
+	u64 ret;
 	int cpu;
 
 	preempt_disable_notrace();
@@ -93,12 +93,12 @@ static cycle_t kvm_clock_read(void)
 	return ret;
 }
 
-static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
+static u64 kvm_clock_get_cycles(struct clocksource *cs)
 {
 	return kvm_clock_read();
 }
 
-static cycle_t kvm_sched_clock_read(void)
+static u64 kvm_sched_clock_read(void)
 {
 	return kvm_clock_read() - kvm_sched_clock_offset;
 }
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 5b2cc889ce34..9e93fe5803b4 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -71,10 +71,10 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
 	return flags & valid_flags;
 }
 
-cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
 {
 	unsigned version;
-	cycle_t ret;
+	u64 ret;
 	u64 last;
 	u8 flags;
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 0aed75a1e31b..be3a49ee0356 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1101,9 +1101,9 @@ static void tsc_resume(struct clocksource *cs)
  * checking the result of read_tsc() - cycle_last for being negative.
  * That works because CLOCKSOURCE_MASK(64) does not mask out any bit.
  */
-static cycle_t read_tsc(struct clocksource *cs)
+static u64 read_tsc(struct clocksource *cs)
 {
-	return (cycle_t)rdtsc_ordered();
+	return (u64)rdtsc_ordered();
 }
 
 /*
@@ -1192,7 +1192,7 @@ int unsynchronized_tsc(void)
 /*
  * Convert ART to TSC given numerator/denominator found in detect_art()
  */
-struct system_counterval_t convert_art_to_tsc(cycle_t art)
+struct system_counterval_t convert_art_to_tsc(u64 art)
 {
 	u64 tmp, res, rem;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 445c51b6cf6d..ed04398f52c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1131,8 +1131,8 @@ struct pvclock_gtod_data {
 
 	struct { /* extract of a clocksource struct */
 		int vclock_mode;
-		cycle_t	cycle_last;
-		cycle_t	mask;
+		u64	cycle_last;
+		u64	mask;
 		u32	mult;
 		u32	shift;
 	} clock;
@@ -1572,9 +1572,9 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
 
 #ifdef CONFIG_X86_64
 
-static cycle_t read_tsc(void)
+static u64 read_tsc(void)
 {
-	cycle_t ret = (cycle_t)rdtsc_ordered();
+	u64 ret = (u64)rdtsc_ordered();
 	u64 last = pvclock_gtod_data.clock.cycle_last;
 
 	if (likely(ret >= last))
@@ -1592,7 +1592,7 @@ static cycle_t read_tsc(void)
 	return last;
 }
 
-static inline u64 vgettsc(cycle_t *cycle_now)
+static inline u64 vgettsc(u64 *cycle_now)
 {
 	long v;
 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
@@ -1603,7 +1603,7 @@ static inline u64 vgettsc(cycle_t *cycle_now)
 	return v * gtod->clock.mult;
 }
 
-static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
+static int do_monotonic_boot(s64 *t, u64 *cycle_now)
 {
 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 	unsigned long seq;
@@ -1624,7 +1624,7 @@ static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
 }
 
 /* returns true if host is using tsc clocksource */
-static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
 {
 	/* checked again under seqlock below */
 	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4ca0d78adcf0..d3289d7e78fa 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -916,7 +916,7 @@ static unsigned long lguest_tsc_khz(void)
  * If we can't use the TSC, the kernel falls back to our lower-priority
  * "lguest_clock", where we read the time value given to us by the Host.
  */
-static cycle_t lguest_clock_read(struct clocksource *cs)
+static u64 lguest_clock_read(struct clocksource *cs)
 {
 	unsigned long sec, nsec;
 
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index b333fc45f9ec..2ee7632d4916 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -30,7 +30,7 @@
 
 #define RTC_NAME		"sgi_rtc"
 
-static cycle_t uv_read_rtc(struct clocksource *cs);
+static u64 uv_read_rtc(struct clocksource *cs);
 static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
 static int uv_rtc_shutdown(struct clock_event_device *evt);
 
@@ -38,7 +38,7 @@ static struct clocksource clocksource_uv = {
 	.name		= RTC_NAME,
 	.rating		= 299,
 	.read		= uv_read_rtc,
-	.mask		= (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
+	.mask		= (u64)UVH_RTC_REAL_TIME_CLOCK_MASK,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -296,7 +296,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
  * cachelines of it's own page.  This allows faster simultaneous reads
  * from a given socket.
  */
-static cycle_t uv_read_rtc(struct clocksource *cs)
+static u64 uv_read_rtc(struct clocksource *cs)
 {
 	unsigned long offset;
 
@@ -305,7 +305,7 @@ static cycle_t uv_read_rtc(struct clocksource *cs)
 	else
 		offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
 
-	return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
+	return (u64)uv_read_local_mmr(UVH_RTC | offset);
 }
 
 /*
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 33d8f6a7829d..1e69956d7852 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -39,10 +39,10 @@ static unsigned long xen_tsc_khz(void)
 	return pvclock_tsc_khz(info);
 }
 
-cycle_t xen_clocksource_read(void)
+u64 xen_clocksource_read(void)
 {
         struct pvclock_vcpu_time_info *src;
-	cycle_t ret;
+	u64 ret;
 
 	preempt_disable_notrace();
 	src = &__this_cpu_read(xen_vcpu)->time;
@@ -51,7 +51,7 @@ cycle_t xen_clocksource_read(void)
 	return ret;
 }
 
-static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
+static u64 xen_clocksource_get_cycles(struct clocksource *cs)
 {
 	return xen_clocksource_read();
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3cbce3b085e7..ac0a2b0f9e62 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -67,7 +67,7 @@ void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);
 void xen_setup_runstate_info(int cpu);
 void xen_teardown_timer(int cpu);
-cycle_t xen_clocksource_read(void);
+u64 xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
 void __init xen_init_time_ops(void);
 void __init xen_hvm_init_time_ops(void);
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index be81e69b25bc..668c1056f9e4 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -34,9 +34,9 @@
 unsigned long ccount_freq;		/* ccount Hz */
 EXPORT_SYMBOL(ccount_freq);
 
-static cycle_t ccount_read(struct clocksource *cs)
+static u64 ccount_read(struct clocksource *cs)
 {
-	return (cycle_t)get_ccount();
+	return (u64)get_ccount();
 }
 
 static u64 notrace ccount_sched_clock_read(void)
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index be54e5331a45..20b32bb8c2af 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -69,9 +69,9 @@ static u32 hpet_nhpet, hpet_max_freq = HPET_USER_FREQ;
 #ifdef CONFIG_IA64
 static void __iomem *hpet_mctr;
 
-static cycle_t read_hpet(struct clocksource *cs)
+static u64 read_hpet(struct clocksource *cs)
 {
-	return (cycle_t)read_counter((void __iomem *)hpet_mctr);
+	return (u64)read_counter((void __iomem *)hpet_mctr);
 }
 
 static struct clocksource clocksource_hpet = {
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 28037d0b8dcd..1961e3539b57 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -58,16 +58,16 @@ u32 acpi_pm_read_verified(void)
 	return v2;
 }
 
-static cycle_t acpi_pm_read(struct clocksource *cs)
+static u64 acpi_pm_read(struct clocksource *cs)
 {
-	return (cycle_t)read_pmtmr();
+	return (u64)read_pmtmr();
 }
 
 static struct clocksource clocksource_acpi_pm = {
 	.name		= "acpi_pm",
 	.rating		= 200,
 	.read		= acpi_pm_read,
-	.mask		= (cycle_t)ACPI_PM_MASK,
+	.mask		= (u64)ACPI_PM_MASK,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -81,9 +81,9 @@ static int __init acpi_pm_good_setup(char *__str)
 }
 __setup("acpi_pm_good", acpi_pm_good_setup);
 
-static cycle_t acpi_pm_read_slow(struct clocksource *cs)
+static u64 acpi_pm_read_slow(struct clocksource *cs)
 {
-	return (cycle_t)acpi_pm_read_verified();
+	return (u64)acpi_pm_read_verified();
 }
 
 static inline void acpi_pm_need_workaround(void)
@@ -145,7 +145,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE,
  */
 static int verify_pmtmr_rate(void)
 {
-	cycle_t value1, value2;
+	u64 value1, value2;
 	unsigned long count, delta;
 
 	mach_prepare_counter();
@@ -175,7 +175,7 @@ static int verify_pmtmr_rate(void)
 
 static int __init init_acpi_pm_clocksource(void)
 {
-	cycle_t value1, value2;
+	u64 value1, value2;
 	unsigned int i, j = 0;
 
 	if (!pmtmr_ioport)
diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c
index a49748d826c0..3ea46343024f 100644
--- a/drivers/clocksource/arc_timer.c
+++ b/drivers/clocksource/arc_timer.c
@@ -56,7 +56,7 @@ static int noinline arc_get_timer_clk(struct device_node *node)
 
 #ifdef CONFIG_ARC_TIMERS_64BIT
 
-static cycle_t arc_read_gfrc(struct clocksource *cs)
+static u64 arc_read_gfrc(struct clocksource *cs)
 {
 	unsigned long flags;
 	u32 l, h;
@@ -71,7 +71,7 @@ static cycle_t arc_read_gfrc(struct clocksource *cs)
 
 	local_irq_restore(flags);
 
-	return (((cycle_t)h) << 32) | l;
+	return (((u64)h) << 32) | l;
 }
 
 static struct clocksource arc_counter_gfrc = {
@@ -105,7 +105,7 @@ CLOCKSOURCE_OF_DECLARE(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc);
 #define AUX_RTC_LOW	0x104
 #define AUX_RTC_HIGH	0x105
 
-static cycle_t arc_read_rtc(struct clocksource *cs)
+static u64 arc_read_rtc(struct clocksource *cs)
 {
 	unsigned long status;
 	u32 l, h;
@@ -122,7 +122,7 @@ static cycle_t arc_read_rtc(struct clocksource *cs)
 		status = read_aux_reg(AUX_RTC_CTRL);
 	} while (!(status & _BITUL(31)));
 
-	return (((cycle_t)h) << 32) | l;
+	return (((u64)h) << 32) | l;
 }
 
 static struct clocksource arc_counter_rtc = {
@@ -166,9 +166,9 @@ CLOCKSOURCE_OF_DECLARE(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc);
  * 32bit TIMER1 to keep counting monotonically and wraparound
  */
 
-static cycle_t arc_read_timer1(struct clocksource *cs)
+static u64 arc_read_timer1(struct clocksource *cs)
 {
-	return (cycle_t) read_aux_reg(ARC_REG_TIMER1_CNT);
+	return (u64) read_aux_reg(ARC_REG_TIMER1_CNT);
 }
 
 static struct clocksource arc_counter_timer1 = {
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 02fef6830e72..394e417414d3 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -562,12 +562,12 @@ static u64 arch_counter_get_cntvct_mem(void)
  */
 u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct;
 
-static cycle_t arch_counter_read(struct clocksource *cs)
+static u64 arch_counter_read(struct clocksource *cs)
 {
 	return arch_timer_read_counter();
 }
 
-static cycle_t arch_counter_read_cc(const struct cyclecounter *cc)
+static u64 arch_counter_read_cc(const struct cyclecounter *cc)
 {
 	return arch_timer_read_counter();
 }
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 8da03298f844..570cc58baec4 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -195,7 +195,7 @@ static int gt_dying_cpu(unsigned int cpu)
 	return 0;
 }
 
-static cycle_t gt_clocksource_read(struct clocksource *cs)
+static u64 gt_clocksource_read(struct clocksource *cs)
 {
 	return gt_counter_read();
 }
diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index fbfbdec13b08..44e5e951583b 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -158,11 +158,11 @@ static irqreturn_t ttc_clock_event_interrupt(int irq, void *dev_id)
  *
  * returns: Current timer counter register value
  **/
-static cycle_t __ttc_clocksource_read(struct clocksource *cs)
+static u64 __ttc_clocksource_read(struct clocksource *cs)
 {
 	struct ttc_timer *timer = &to_ttc_timer_clksrc(cs)->ttc;
 
-	return (cycle_t)readl_relaxed(timer->base_addr +
+	return (u64)readl_relaxed(timer->base_addr +
 				TTC_COUNT_VAL_OFFSET);
 }
 
diff --git a/drivers/clocksource/clksrc-dbx500-prcmu.c b/drivers/clocksource/clksrc-dbx500-prcmu.c
index 77a365f573d7..c69e2772658d 100644
--- a/drivers/clocksource/clksrc-dbx500-prcmu.c
+++ b/drivers/clocksource/clksrc-dbx500-prcmu.c
@@ -30,7 +30,7 @@
 
 static void __iomem *clksrc_dbx500_timer_base;
 
-static cycle_t notrace clksrc_dbx500_prcmu_read(struct clocksource *cs)
+static u64 notrace clksrc_dbx500_prcmu_read(struct clocksource *cs)
 {
 	void __iomem *base = clksrc_dbx500_timer_base;
 	u32 count, count2;
diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c
index 797505aa2ba4..63e4f5519577 100644
--- a/drivers/clocksource/dw_apb_timer.c
+++ b/drivers/clocksource/dw_apb_timer.c
@@ -348,7 +348,7 @@ void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs)
 	dw_apb_clocksource_read(dw_cs);
 }
 
-static cycle_t __apbt_read_clocksource(struct clocksource *cs)
+static u64 __apbt_read_clocksource(struct clocksource *cs)
 {
 	u32 current_count;
 	struct dw_apb_clocksource *dw_cs =
@@ -357,7 +357,7 @@ static cycle_t __apbt_read_clocksource(struct clocksource *cs)
 	current_count = apbt_readl_relaxed(&dw_cs->timer,
 					APBTMR_N_CURRENT_VALUE);
 
-	return (cycle_t)~current_count;
+	return (u64)~current_count;
 }
 
 static void apbt_restart_clocksource(struct clocksource *cs)
@@ -416,7 +416,7 @@ void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs)
  *
  * @dw_cs:	The clocksource to read.
  */
-cycle_t dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs)
+u64 dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs)
 {
-	return (cycle_t)~apbt_readl(&dw_cs->timer, APBTMR_N_CURRENT_VALUE);
+	return (u64)~apbt_readl(&dw_cs->timer, APBTMR_N_CURRENT_VALUE);
 }
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c
index 19bb1792d647..aff87df07449 100644
--- a/drivers/clocksource/em_sti.c
+++ b/drivers/clocksource/em_sti.c
@@ -110,9 +110,9 @@ static void em_sti_disable(struct em_sti_priv *p)
 	clk_disable_unprepare(p->clk);
 }
 
-static cycle_t em_sti_count(struct em_sti_priv *p)
+static u64 em_sti_count(struct em_sti_priv *p)
 {
-	cycle_t ticks;
+	u64 ticks;
 	unsigned long flags;
 
 	/* the STI hardware buffers the 48-bit count, but to
@@ -121,14 +121,14 @@ static cycle_t em_sti_count(struct em_sti_priv *p)
 	 * Always read STI_COUNT_H before STI_COUNT_L.
 	 */
 	raw_spin_lock_irqsave(&p->lock, flags);
-	ticks = (cycle_t)(em_sti_read(p, STI_COUNT_H) & 0xffff) << 32;
+	ticks = (u64)(em_sti_read(p, STI_COUNT_H) & 0xffff) << 32;
 	ticks |= em_sti_read(p, STI_COUNT_L);
 	raw_spin_unlock_irqrestore(&p->lock, flags);
 
 	return ticks;
 }
 
-static cycle_t em_sti_set_next(struct em_sti_priv *p, cycle_t next)
+static u64 em_sti_set_next(struct em_sti_priv *p, u64 next)
 {
 	unsigned long flags;
 
@@ -198,7 +198,7 @@ static struct em_sti_priv *cs_to_em_sti(struct clocksource *cs)
 	return container_of(cs, struct em_sti_priv, cs);
 }
 
-static cycle_t em_sti_clocksource_read(struct clocksource *cs)
+static u64 em_sti_clocksource_read(struct clocksource *cs)
 {
 	return em_sti_count(cs_to_em_sti(cs));
 }
@@ -271,7 +271,7 @@ static int em_sti_clock_event_next(unsigned long delta,
 				   struct clock_event_device *ced)
 {
 	struct em_sti_priv *p = ced_to_em_sti(ced);
-	cycle_t next;
+	u64 next;
 	int safe;
 
 	next = em_sti_set_next(p, em_sti_count(p) + delta);
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 8f3488b80896..c8b9f834f4de 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -183,7 +183,7 @@ static u64 exynos4_read_count_64(void)
 		hi2 = readl_relaxed(reg_base + EXYNOS4_MCT_G_CNT_U);
 	} while (hi != hi2);
 
-	return ((cycle_t)hi << 32) | lo;
+	return ((u64)hi << 32) | lo;
 }
 
 /**
@@ -199,7 +199,7 @@ static u32 notrace exynos4_read_count_32(void)
 	return readl_relaxed(reg_base + EXYNOS4_MCT_G_CNT_L);
 }
 
-static cycle_t exynos4_frc_read(struct clocksource *cs)
+static u64 exynos4_frc_read(struct clocksource *cs)
 {
 	return exynos4_read_count_32();
 }
@@ -266,7 +266,7 @@ static void exynos4_mct_comp0_stop(void)
 static void exynos4_mct_comp0_start(bool periodic, unsigned long cycles)
 {
 	unsigned int tcon;
-	cycle_t comp_cycle;
+	u64 comp_cycle;
 
 	tcon = readl_relaxed(reg_base + EXYNOS4_MCT_G_TCON);
 
diff --git a/drivers/clocksource/h8300_timer16.c b/drivers/clocksource/h8300_timer16.c
index 07d9d5be9054..5b27fb9997c2 100644
--- a/drivers/clocksource/h8300_timer16.c
+++ b/drivers/clocksource/h8300_timer16.c
@@ -72,7 +72,7 @@ static inline struct timer16_priv *cs_to_priv(struct clocksource *cs)
 	return container_of(cs, struct timer16_priv, cs);
 }
 
-static cycle_t timer16_clocksource_read(struct clocksource *cs)
+static u64 timer16_clocksource_read(struct clocksource *cs)
 {
 	struct timer16_priv *p = cs_to_priv(cs);
 	unsigned long raw, value;
diff --git a/drivers/clocksource/h8300_tpu.c b/drivers/clocksource/h8300_tpu.c
index 7bdf1991c847..72e1cf2b3096 100644
--- a/drivers/clocksource/h8300_tpu.c
+++ b/drivers/clocksource/h8300_tpu.c
@@ -64,7 +64,7 @@ static inline struct tpu_priv *cs_to_priv(struct clocksource *cs)
 	return container_of(cs, struct tpu_priv, cs);
 }
 
-static cycle_t tpu_clocksource_read(struct clocksource *cs)
+static u64 tpu_clocksource_read(struct clocksource *cs)
 {
 	struct tpu_priv *p = cs_to_priv(cs);
 	unsigned long flags;
diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index 0efd36e483ab..64f6490740d7 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -25,7 +25,7 @@ EXPORT_SYMBOL(i8253_lock);
  * to just read by itself. So use jiffies to emulate a free
  * running counter:
  */
-static cycle_t i8253_read(struct clocksource *cs)
+static u64 i8253_read(struct clocksource *cs)
 {
 	static int old_count;
 	static u32 old_jifs;
@@ -83,7 +83,7 @@ static cycle_t i8253_read(struct clocksource *cs)
 
 	count = (PIT_LATCH - 1) - count;
 
-	return (cycle_t)(jifs * PIT_LATCH) + count;
+	return (u64)(jifs * PIT_LATCH) + count;
 }
 
 static struct clocksource i8253_cs = {
diff --git a/drivers/clocksource/jcore-pit.c b/drivers/clocksource/jcore-pit.c
index 54e1665aa03c..e90a6cfcb061 100644
--- a/drivers/clocksource/jcore-pit.c
+++ b/drivers/clocksource/jcore-pit.c
@@ -57,7 +57,7 @@ static notrace u64 jcore_sched_clock_read(void)
 	return seclo * NSEC_PER_SEC + nsec;
 }
 
-static cycle_t jcore_clocksource_read(struct clocksource *cs)
+static u64 jcore_clocksource_read(struct clocksource *cs)
 {
 	return jcore_sched_clock_read();
 }
diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c
index a80ab3e446b7..8d06a0f7ff26 100644
--- a/drivers/clocksource/metag_generic.c
+++ b/drivers/clocksource/metag_generic.c
@@ -56,7 +56,7 @@ static int metag_timer_set_next_event(unsigned long delta,
 	return 0;
 }
 
-static cycle_t metag_clocksource_read(struct clocksource *cs)
+static u64 metag_clocksource_read(struct clocksource *cs)
 {
 	return __core_reg_get(TXTIMER);
 }
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index 7a960cd01104..7b86d07c99b4 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -125,7 +125,7 @@ static int gic_clockevent_init(void)
 	return 0;
 }
 
-static cycle_t gic_hpt_read(struct clocksource *cs)
+static u64 gic_hpt_read(struct clocksource *cs)
 {
 	return gic_read_count();
 }
diff --git a/drivers/clocksource/mmio.c b/drivers/clocksource/mmio.c
index c4f7d7a9b689..4c4df981d8cc 100644
--- a/drivers/clocksource/mmio.c
+++ b/drivers/clocksource/mmio.c
@@ -20,24 +20,24 @@ static inline struct clocksource_mmio *to_mmio_clksrc(struct clocksource *c)
 	return container_of(c, struct clocksource_mmio, clksrc);
 }
 
-cycle_t clocksource_mmio_readl_up(struct clocksource *c)
+u64 clocksource_mmio_readl_up(struct clocksource *c)
 {
-	return (cycle_t)readl_relaxed(to_mmio_clksrc(c)->reg);
+	return (u64)readl_relaxed(to_mmio_clksrc(c)->reg);
 }
 
-cycle_t clocksource_mmio_readl_down(struct clocksource *c)
+u64 clocksource_mmio_readl_down(struct clocksource *c)
 {
-	return ~(cycle_t)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
+	return ~(u64)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
 }
 
-cycle_t clocksource_mmio_readw_up(struct clocksource *c)
+u64 clocksource_mmio_readw_up(struct clocksource *c)
 {
-	return (cycle_t)readw_relaxed(to_mmio_clksrc(c)->reg);
+	return (u64)readw_relaxed(to_mmio_clksrc(c)->reg);
 }
 
-cycle_t clocksource_mmio_readw_down(struct clocksource *c)
+u64 clocksource_mmio_readw_down(struct clocksource *c)
 {
-	return ~(cycle_t)readw_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
+	return ~(u64)readw_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
 }
 
 /**
@@ -51,7 +51,7 @@ cycle_t clocksource_mmio_readw_down(struct clocksource *c)
  */
 int __init clocksource_mmio_init(void __iomem *base, const char *name,
 	unsigned long hz, int rating, unsigned bits,
-	cycle_t (*read)(struct clocksource *))
+	u64 (*read)(struct clocksource *))
 {
 	struct clocksource_mmio *cs;
 
diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c
index 0ba0a913b41d..99b77aff0839 100644
--- a/drivers/clocksource/mxs_timer.c
+++ b/drivers/clocksource/mxs_timer.c
@@ -97,7 +97,7 @@ static void timrot_irq_acknowledge(void)
 		     HW_TIMROT_TIMCTRLn(0) + STMP_OFFSET_REG_CLR);
 }
 
-static cycle_t timrotv1_get_cycles(struct clocksource *cs)
+static u64 timrotv1_get_cycles(struct clocksource *cs)
 {
 	return ~((__raw_readl(mxs_timrot_base + HW_TIMROT_TIMCOUNTn(1))
 			& 0xffff0000) >> 16);
diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/qcom-timer.c
index 3283cfa2aa52..d5d048d890d4 100644
--- a/drivers/clocksource/qcom-timer.c
+++ b/drivers/clocksource/qcom-timer.c
@@ -89,7 +89,7 @@ static struct clock_event_device __percpu *msm_evt;
 
 static void __iomem *source_base;
 
-static notrace cycle_t msm_read_timer_count(struct clocksource *cs)
+static notrace u64 msm_read_timer_count(struct clocksource *cs)
 {
 	return readl_relaxed(source_base + TIMER_COUNT_VAL);
 }
diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 54565bd0093b..0093ece661fe 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -307,7 +307,7 @@ static void samsung_clocksource_resume(struct clocksource *cs)
 	samsung_time_start(pwm.source_id, true);
 }
 
-static cycle_t notrace samsung_clocksource_read(struct clocksource *c)
+static u64 notrace samsung_clocksource_read(struct clocksource *c)
 {
 	return ~readl_relaxed(pwm.source_reg);
 }
diff --git a/drivers/clocksource/scx200_hrt.c b/drivers/clocksource/scx200_hrt.c
index 64f9e8294434..a46660bf6588 100644
--- a/drivers/clocksource/scx200_hrt.c
+++ b/drivers/clocksource/scx200_hrt.c
@@ -43,10 +43,10 @@ MODULE_PARM_DESC(ppm, "+-adjust to actual XO freq (ppm)");
 /* The base timer frequency, * 27 if selected */
 #define HRT_FREQ   1000000
 
-static cycle_t read_hrt(struct clocksource *cs)
+static u64 read_hrt(struct clocksource *cs)
 {
 	/* Read the timer value */
-	return (cycle_t) inl(scx200_cb_base + SCx200_TIMER_OFFSET);
+	return (u64) inl(scx200_cb_base + SCx200_TIMER_OFFSET);
 }
 
 static struct clocksource cs_hrt = {
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 103c49362c68..28757edf6aca 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -612,7 +612,7 @@ static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs)
 	return container_of(cs, struct sh_cmt_channel, cs);
 }
 
-static cycle_t sh_cmt_clocksource_read(struct clocksource *cs)
+static u64 sh_cmt_clocksource_read(struct clocksource *cs)
 {
 	struct sh_cmt_channel *ch = cs_to_sh_cmt(cs);
 	unsigned long flags, raw;
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index 469e776ec17a..1fbf2aadcfd4 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -255,7 +255,7 @@ static struct sh_tmu_channel *cs_to_sh_tmu(struct clocksource *cs)
 	return container_of(cs, struct sh_tmu_channel, cs);
 }
 
-static cycle_t sh_tmu_clocksource_read(struct clocksource *cs)
+static u64 sh_tmu_clocksource_read(struct clocksource *cs)
 {
 	struct sh_tmu_channel *ch = cs_to_sh_tmu(cs);
 
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index 4da2af9694a2..d4ca9962a759 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -41,7 +41,7 @@
 
 static void __iomem *tcaddr;
 
-static cycle_t tc_get_cycles(struct clocksource *cs)
+static u64 tc_get_cycles(struct clocksource *cs)
 {
 	unsigned long	flags;
 	u32		lower, upper;
@@ -56,7 +56,7 @@ static cycle_t tc_get_cycles(struct clocksource *cs)
 	return (upper << 16) | lower;
 }
 
-static cycle_t tc_get_cycles32(struct clocksource *cs)
+static u64 tc_get_cycles32(struct clocksource *cs)
 {
 	return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
 }
diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c
index a8e6c7df853d..3710e4d9dcba 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/time-pistachio.c
@@ -67,7 +67,7 @@ static inline void gpt_writel(void __iomem *base, u32 value, u32 offset,
 	writel(value, base + 0x20 * gpt_id + offset);
 }
 
-static cycle_t notrace
+static u64 notrace
 pistachio_clocksource_read_cycles(struct clocksource *cs)
 {
 	struct pistachio_clocksource *pcs = to_pistachio_clocksource(cs);
@@ -84,7 +84,7 @@ pistachio_clocksource_read_cycles(struct clocksource *cs)
 	counter = gpt_readl(pcs->base, TIMER_CURRENT_VALUE, 0);
 	raw_spin_unlock_irqrestore(&pcs->lock, flags);
 
-	return (cycle_t)~counter;
+	return (u64)~counter;
 }
 
 static u64 notrace pistachio_read_sched_clock(void)
diff --git a/drivers/clocksource/timer-atlas7.c b/drivers/clocksource/timer-atlas7.c
index 4334e0330ada..db0f21e7d7d2 100644
--- a/drivers/clocksource/timer-atlas7.c
+++ b/drivers/clocksource/timer-atlas7.c
@@ -85,7 +85,7 @@ static irqreturn_t sirfsoc_timer_interrupt(int irq, void *dev_id)
 }
 
 /* read 64-bit timer counter */
-static cycle_t sirfsoc_timer_read(struct clocksource *cs)
+static u64 sirfsoc_timer_read(struct clocksource *cs)
 {
 	u64 cycles;
 
diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
index 6555821bbdae..c0b5df3167a0 100644
--- a/drivers/clocksource/timer-atmel-pit.c
+++ b/drivers/clocksource/timer-atmel-pit.c
@@ -73,7 +73,7 @@ static inline void pit_write(void __iomem *base, unsigned int reg_offset, unsign
  * Clocksource:  just a monotonic counter of MCK/16 cycles.
  * We don't care whether or not PIT irqs are enabled.
  */
-static cycle_t read_pit_clk(struct clocksource *cs)
+static u64 read_pit_clk(struct clocksource *cs)
 {
 	struct pit_data *data = clksrc_to_pit_data(cs);
 	unsigned long flags;
diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c
index e90ab5b63a90..be4ac7604136 100644
--- a/drivers/clocksource/timer-atmel-st.c
+++ b/drivers/clocksource/timer-atmel-st.c
@@ -92,7 +92,7 @@ static irqreturn_t at91rm9200_timer_interrupt(int irq, void *dev_id)
 	return IRQ_NONE;
 }
 
-static cycle_t read_clk32k(struct clocksource *cs)
+static u64 read_clk32k(struct clocksource *cs)
 {
 	return read_CRTR();
 }
diff --git a/drivers/clocksource/timer-nps.c b/drivers/clocksource/timer-nps.c
index 8da5e93b6810..da1f7986e477 100644
--- a/drivers/clocksource/timer-nps.c
+++ b/drivers/clocksource/timer-nps.c
@@ -77,11 +77,11 @@ static int __init nps_get_timer_clk(struct device_node *node,
 	return 0;
 }
 
-static cycle_t nps_clksrc_read(struct clocksource *clksrc)
+static u64 nps_clksrc_read(struct clocksource *clksrc)
 {
 	int cluster = raw_smp_processor_id() >> NPS_CLUSTER_OFFSET;
 
-	return (cycle_t)ioread32be(nps_msu_reg_low_addr[cluster]);
+	return (u64)ioread32be(nps_msu_reg_low_addr[cluster]);
 }
 
 static int __init nps_setup_clocksource(struct device_node *node)
diff --git a/drivers/clocksource/timer-prima2.c b/drivers/clocksource/timer-prima2.c
index c32148ec7a38..bfa981ac1eaf 100644
--- a/drivers/clocksource/timer-prima2.c
+++ b/drivers/clocksource/timer-prima2.c
@@ -72,7 +72,7 @@ static irqreturn_t sirfsoc_timer_interrupt(int irq, void *dev_id)
 }
 
 /* read 64-bit timer counter */
-static cycle_t notrace sirfsoc_timer_read(struct clocksource *cs)
+static u64 notrace sirfsoc_timer_read(struct clocksource *cs)
 {
 	u64 cycles;
 
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 4f87f3e76d83..a3e662b15964 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -152,7 +152,7 @@ static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static cycle_t sun5i_clksrc_read(struct clocksource *clksrc)
+static u64 sun5i_clksrc_read(struct clocksource *clksrc)
 {
 	struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc);
 
diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index cf5b14e442e4..624067712ef0 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -65,11 +65,11 @@ static inline struct ti_32k *to_ti_32k(struct clocksource *cs)
 	return container_of(cs, struct ti_32k, cs);
 }
 
-static cycle_t notrace ti_32k_read_cycles(struct clocksource *cs)
+static u64 notrace ti_32k_read_cycles(struct clocksource *cs)
 {
 	struct ti_32k *ti = to_ti_32k(cs);
 
-	return (cycle_t)readl_relaxed(ti->counter);
+	return (u64)readl_relaxed(ti->counter);
 }
 
 static struct ti_32k ti_32k_timer = {
diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c
index b15069483fbd..d02b51075ad1 100644
--- a/drivers/clocksource/vt8500_timer.c
+++ b/drivers/clocksource/vt8500_timer.c
@@ -53,7 +53,7 @@
 
 static void __iomem *regbase;
 
-static cycle_t vt8500_timer_read(struct clocksource *cs)
+static u64 vt8500_timer_read(struct clocksource *cs)
 {
 	int loops = msecs_to_loops(10);
 	writel(3, regbase + TIMER_CTRL_VAL);
@@ -75,7 +75,7 @@ static int vt8500_timer_set_next_event(unsigned long cycles,
 				    struct clock_event_device *evt)
 {
 	int loops = msecs_to_loops(10);
-	cycle_t alarm = clocksource.read(&clocksource) + cycles;
+	u64 alarm = clocksource.read(&clocksource) + cycles;
 	while ((readl(regbase + TIMER_AS_VAL) & TIMER_MATCH_W_ACTIVE)
 						&& --loops)
 		cpu_relax();
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 446802ae8f1b..b44b32f21e61 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -135,9 +135,9 @@ u64 hv_do_hypercall(u64 control, void *input, void *output)
 EXPORT_SYMBOL_GPL(hv_do_hypercall);
 
 #ifdef CONFIG_X86_64
-static cycle_t read_hv_clock_tsc(struct clocksource *arg)
+static u64 read_hv_clock_tsc(struct clocksource *arg)
 {
-	cycle_t current_tick;
+	u64 current_tick;
 	struct ms_hyperv_tsc_page *tsc_pg = hv_context.tsc_page;
 
 	if (tsc_pg->tsc_sequence != 0) {
@@ -146,7 +146,7 @@ static cycle_t read_hv_clock_tsc(struct clocksource *arg)
 		 */
 
 		while (1) {
-			cycle_t tmp;
+			u64 tmp;
 			u32 sequence = tsc_pg->tsc_sequence;
 			u64 cur_tsc;
 			u64 scale = tsc_pg->tsc_scale;
@@ -350,7 +350,7 @@ int hv_post_message(union hv_connection_id connection_id,
 static int hv_ce_set_next_event(unsigned long delta,
 				struct clock_event_device *evt)
 {
-	cycle_t current_tick;
+	u64 current_tick;
 
 	WARN_ON(!clockevent_state_oneshot(evt));
 
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index c0178a122940..c01c09e9916d 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -152,12 +152,12 @@ static inline void gic_map_to_vpe(unsigned int intr, unsigned int vpe)
 }
 
 #ifdef CONFIG_CLKSRC_MIPS_GIC
-cycle_t gic_read_count(void)
+u64 gic_read_count(void)
 {
 	unsigned int hi, hi2, lo;
 
 	if (mips_cm_is64)
-		return (cycle_t)gic_read(GIC_REG(SHARED, GIC_SH_COUNTER));
+		return (u64)gic_read(GIC_REG(SHARED, GIC_SH_COUNTER));
 
 	do {
 		hi = gic_read32(GIC_REG(SHARED, GIC_SH_COUNTER_63_32));
@@ -165,7 +165,7 @@ cycle_t gic_read_count(void)
 		hi2 = gic_read32(GIC_REG(SHARED, GIC_SH_COUNTER_63_32));
 	} while (hi2 != hi);
 
-	return (((cycle_t) hi) << 32) + lo;
+	return (((u64) hi) << 32) + lo;
 }
 
 unsigned int gic_get_count_width(void)
@@ -179,7 +179,7 @@ unsigned int gic_get_count_width(void)
 	return bits;
 }
 
-void gic_write_compare(cycle_t cnt)
+void gic_write_compare(u64 cnt)
 {
 	if (mips_cm_is64) {
 		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt);
@@ -191,7 +191,7 @@ void gic_write_compare(cycle_t cnt)
 	}
 }
 
-void gic_write_cpu_compare(cycle_t cnt, int cpu)
+void gic_write_cpu_compare(u64 cnt, int cpu)
 {
 	unsigned long flags;
 
@@ -211,17 +211,17 @@ void gic_write_cpu_compare(cycle_t cnt, int cpu)
 	local_irq_restore(flags);
 }
 
-cycle_t gic_read_compare(void)
+u64 gic_read_compare(void)
 {
 	unsigned int hi, lo;
 
 	if (mips_cm_is64)
-		return (cycle_t)gic_read(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE));
+		return (u64)gic_read(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE));
 
 	hi = gic_read32(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE_HI));
 	lo = gic_read32(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE_LO));
 
-	return (((cycle_t) hi) << 32) + lo;
+	return (((u64) hi) << 32) + lo;
 }
 
 void gic_start_count(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
index b03e4f58d02e..a533a6cc2d53 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
@@ -122,7 +122,7 @@
 #include "xgbe.h"
 #include "xgbe-common.h"
 
-static cycle_t xgbe_cc_read(const struct cyclecounter *cc)
+static u64 xgbe_cc_read(const struct cyclecounter *cc)
 {
 	struct xgbe_prv_data *pdata = container_of(cc,
 						   struct xgbe_prv_data,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 688617ac8c29..d8d06fdfc42b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -15223,7 +15223,7 @@ void bnx2x_set_rx_ts(struct bnx2x *bp, struct sk_buff *skb)
 }
 
 /* Read the PHC */
-static cycle_t bnx2x_cyclecounter_read(const struct cyclecounter *cc)
+static u64 bnx2x_cyclecounter_read(const struct cyclecounter *cc)
 {
 	struct bnx2x *bp = container_of(cc, struct bnx2x, cyclecounter);
 	int port = BP_PORT(bp);
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index f9e74461bdc0..6ebad3fac81d 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -230,7 +230,7 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
  * cyclecounter structure used to construct a ns counter from the
  * arbitrary fixed point registers
  */
-static cycle_t fec_ptp_read(const struct cyclecounter *cc)
+static u64 fec_ptp_read(const struct cyclecounter *cc)
 {
 	struct fec_enet_private *fep =
 		container_of(cc, struct fec_enet_private, cc);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index ffcf35af4881..eccf1da9356b 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4305,24 +4305,24 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter)
 /**
  * e1000e_sanitize_systim - sanitize raw cycle counter reads
  * @hw: pointer to the HW structure
- * @systim: cycle_t value read, sanitized and returned
+ * @systim: time value read, sanitized and returned
  *
  * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
  * check to see that the time is incrementing at a reasonable
  * rate and is a multiple of incvalue.
  **/
-static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim)
+static u64 e1000e_sanitize_systim(struct e1000_hw *hw, u64 systim)
 {
 	u64 time_delta, rem, temp;
-	cycle_t systim_next;
+	u64 systim_next;
 	u32 incvalue;
 	int i;
 
 	incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
 	for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
 		/* latch SYSTIMH on read of SYSTIML */
-		systim_next = (cycle_t)er32(SYSTIML);
-		systim_next |= (cycle_t)er32(SYSTIMH) << 32;
+		systim_next = (u64)er32(SYSTIML);
+		systim_next |= (u64)er32(SYSTIMH) << 32;
 
 		time_delta = systim_next - systim;
 		temp = time_delta;
@@ -4342,13 +4342,13 @@ static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim)
  * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
  * @cc: cyclecounter structure
  **/
-static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
+static u64 e1000e_cyclecounter_read(const struct cyclecounter *cc)
 {
 	struct e1000_adapter *adapter = container_of(cc, struct e1000_adapter,
 						     cc);
 	struct e1000_hw *hw = &adapter->hw;
 	u32 systimel, systimeh;
-	cycle_t systim;
+	u64 systim;
 	/* SYSTIMH latching upon SYSTIML read does not work well.
 	 * This means that if SYSTIML overflows after we read it but before
 	 * we read SYSTIMH, the value of SYSTIMH has been incremented and we
@@ -4368,8 +4368,8 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
 			systimel = systimel_2;
 		}
 	}
-	systim = (cycle_t)systimel;
-	systim |= (cycle_t)systimeh << 32;
+	systim = (u64)systimel;
+	systim |= (u64)systimeh << 32;
 
 	if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
 		systim = e1000e_sanitize_systim(hw, systim);
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index ad03763e009a..34cc3be0df8e 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -127,8 +127,8 @@ static int e1000e_phc_get_syncdevicetime(ktime_t *device,
 	unsigned long flags;
 	int i;
 	u32 tsync_ctrl;
-	cycle_t dev_cycles;
-	cycle_t sys_cycles;
+	u64 dev_cycles;
+	u64 sys_cycles;
 
 	tsync_ctrl = er32(TSYNCTXCTL);
 	tsync_ctrl |= E1000_TSYNCTXCTL_START_SYNC |
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index c30eea8399a7..c4477552ce9e 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -77,7 +77,7 @@
 static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter);
 
 /* SYSTIM read access for the 82576 */
-static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc)
+static u64 igb_ptp_read_82576(const struct cyclecounter *cc)
 {
 	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
 	struct e1000_hw *hw = &igb->hw;
@@ -94,7 +94,7 @@ static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc)
 }
 
 /* SYSTIM read access for the 82580 */
-static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc)
+static u64 igb_ptp_read_82580(const struct cyclecounter *cc)
 {
 	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
 	struct e1000_hw *hw = &igb->hw;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index a92277683a64..1efb404431e9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -245,7 +245,7 @@ static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter)
  * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of
  * "cycles", rather than seconds and nanoseconds.
  */
-static cycle_t ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc)
+static u64 ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc)
 {
 	struct ixgbe_adapter *adapter =
 			container_of(hw_cc, struct ixgbe_adapter, hw_cc);
@@ -282,7 +282,7 @@ static cycle_t ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc)
  * cyclecounter structure used to construct a ns counter from the
  * arbitrary fixed point registers
  */
-static cycle_t ixgbe_ptp_read_82599(const struct cyclecounter *cc)
+static u64 ixgbe_ptp_read_82599(const struct cyclecounter *cc)
 {
 	struct ixgbe_adapter *adapter =
 		container_of(cc, struct ixgbe_adapter, hw_cc);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index a5fc46bbcbe2..015198c14fa8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -38,7 +38,7 @@
 
 /* mlx4_en_read_clock - read raw cycle counter (to be used by time counter)
  */
-static cycle_t mlx4_en_read_clock(const struct cyclecounter *tc)
+static u64 mlx4_en_read_clock(const struct cyclecounter *tc)
 {
 	struct mlx4_en_dev *mdev =
 		container_of(tc, struct mlx4_en_dev, cycles);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index b2ca8a635b2e..5e7840a7a33b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1823,10 +1823,10 @@ static void unmap_bf_area(struct mlx4_dev *dev)
 		io_mapping_free(mlx4_priv(dev)->bf_mapping);
 }
 
-cycle_t mlx4_read_clock(struct mlx4_dev *dev)
+u64 mlx4_read_clock(struct mlx4_dev *dev)
 {
 	u32 clockhi, clocklo, clockhi1;
-	cycle_t cycles;
+	u64 cycles;
 	int i;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 2cd8e56a573b..746a92c13644 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -49,7 +49,7 @@ void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp,
 	hwts->hwtstamp = ns_to_ktime(nsec);
 }
 
-static cycle_t mlx5e_read_internal_timer(const struct cyclecounter *cc)
+static u64 mlx5e_read_internal_timer(const struct cyclecounter *cc)
 {
 	struct mlx5e_tstamp *tstamp = container_of(cc, struct mlx5e_tstamp,
 						   cycles);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 7b4c339a8a9a..54e5a786f191 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -557,7 +557,7 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev)
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
 {
 	u32 timer_h, timer_h1, timer_l;
 
@@ -567,7 +567,7 @@ cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev)
 	if (timer_h != timer_h1) /* wrap around */
 		timer_l = ioread32be(&dev->iseg->internal_timer_l);
 
-	return (cycle_t)timer_l | (cycle_t)timer_h1 << 32;
+	return (u64)timer_l | (u64)timer_h1 << 32;
 }
 
 static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index e0a8fbdd1446..d4a99c9757cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -106,7 +106,7 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 					u32 element_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
-cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev);
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
 u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
 void mlx5_cq_tasklet_cb(unsigned long data);
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 0c0d48e5bea4..32279d21c836 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -121,7 +121,7 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 	return type == match ? 0 : -1;
 }
 
-static cycle_t cpts_systim_read(const struct cyclecounter *cc)
+static u64 cpts_systim_read(const struct cyclecounter *cc)
 {
 	u64 val = 0;
 	struct cpts_event *event;
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index dda39d8fa189..b717ed9d2b75 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -25,13 +25,13 @@
 
 struct arch_timer_kvm {
 	/* Virtual offset */
-	cycle_t			cntvoff;
+	u64			cntvoff;
 };
 
 struct arch_timer_cpu {
 	/* Registers: control register, timer value */
 	u32				cntv_ctl;	/* Saved/restored */
-	cycle_t				cntv_cval;	/* Saved/restored */
+	u64				cntv_cval;	/* Saved/restored */
 
 	/*
 	 * Anything that is not used directly from assembly code goes
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 65602d395a52..e315d04a2fd9 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -75,8 +75,8 @@ struct module;
  * structure.
  */
 struct clocksource {
-	cycle_t (*read)(struct clocksource *cs);
-	cycle_t mask;
+	u64 (*read)(struct clocksource *cs);
+	u64 mask;
 	u32 mult;
 	u32 shift;
 	u64 max_idle_ns;
@@ -98,8 +98,8 @@ struct clocksource {
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
 	struct list_head wd_list;
-	cycle_t cs_last;
-	cycle_t wd_last;
+	u64 cs_last;
+	u64 wd_last;
 #endif
 	struct module *owner;
 };
@@ -117,7 +117,7 @@ struct clocksource {
 #define CLOCK_SOURCE_RESELECT			0x100
 
 /* simplify initialization of mask field */
-#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
+#define CLOCKSOURCE_MASK(bits) (u64)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
 
 static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from)
 {
@@ -176,7 +176,7 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
  *
  * XXX - This could use some mult_lxl_ll() asm optimization
  */
-static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
+static inline s64 clocksource_cyc2ns(u64 cycles, u32 mult, u32 shift)
 {
 	return ((u64) cycles * mult) >> shift;
 }
@@ -236,13 +236,13 @@ static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz
 
 extern int timekeeping_notify(struct clocksource *clock);
 
-extern cycle_t clocksource_mmio_readl_up(struct clocksource *);
-extern cycle_t clocksource_mmio_readl_down(struct clocksource *);
-extern cycle_t clocksource_mmio_readw_up(struct clocksource *);
-extern cycle_t clocksource_mmio_readw_down(struct clocksource *);
+extern u64 clocksource_mmio_readl_up(struct clocksource *);
+extern u64 clocksource_mmio_readl_down(struct clocksource *);
+extern u64 clocksource_mmio_readw_up(struct clocksource *);
+extern u64 clocksource_mmio_readw_down(struct clocksource *);
 
 extern int clocksource_mmio_init(void __iomem *, const char *,
-	unsigned long, int, unsigned, cycle_t (*)(struct clocksource *));
+	unsigned long, int, unsigned, u64 (*)(struct clocksource *));
 
 extern int clocksource_i8253_init(void);
 
diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h
index 1f79b20918b1..4334106f44c3 100644
--- a/include/linux/dw_apb_timer.h
+++ b/include/linux/dw_apb_timer.h
@@ -50,6 +50,6 @@ dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base,
 			unsigned long freq);
 void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs);
 void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs);
-cycle_t dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs);
+u64 dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs);
 
 #endif /* __DW_APB_TIMER_H__ */
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index 81f930b0bca9..7b49c71c968b 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h
@@ -259,11 +259,11 @@ extern void gic_init(unsigned long gic_base_addr,
 	unsigned long gic_addrspace_size, unsigned int cpu_vec,
 	unsigned int irqbase);
 extern void gic_clocksource_init(unsigned int);
-extern cycle_t gic_read_count(void);
+extern u64 gic_read_count(void);
 extern unsigned int gic_get_count_width(void);
-extern cycle_t gic_read_compare(void);
-extern void gic_write_compare(cycle_t cnt);
-extern void gic_write_cpu_compare(cycle_t cnt, int cpu);
+extern u64 gic_read_compare(void);
+extern void gic_write_compare(u64 cnt);
+extern void gic_write_cpu_compare(u64 cnt, int cpu);
 extern void gic_start_count(void);
 extern void gic_stop_count(void);
 extern int gic_get_c0_compare_int(void);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c9f379689dd0..93bdb3485192 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1460,7 +1460,7 @@ int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id,
 int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
 				      u32 max_range_qpn);
 
-cycle_t mlx4_read_clock(struct mlx4_dev *dev);
+u64 mlx4_read_clock(struct mlx4_dev *dev);
 
 struct mlx4_active_ports {
 	DECLARE_BITMAP(ports, MLX4_MAX_PORTS);
diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h
index 4382035a75bb..2496ad4cfc99 100644
--- a/include/linux/timecounter.h
+++ b/include/linux/timecounter.h
@@ -20,7 +20,7 @@
 #include <linux/types.h>
 
 /* simplify initialization of mask field */
-#define CYCLECOUNTER_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
+#define CYCLECOUNTER_MASK(bits) (u64)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
 
 /**
  * struct cyclecounter - hardware abstraction for a free running counter
@@ -37,8 +37,8 @@
  * @shift:		cycle to nanosecond divisor (power of two)
  */
 struct cyclecounter {
-	cycle_t (*read)(const struct cyclecounter *cc);
-	cycle_t mask;
+	u64 (*read)(const struct cyclecounter *cc);
+	u64 mask;
 	u32 mult;
 	u32 shift;
 };
@@ -63,7 +63,7 @@ struct cyclecounter {
  */
 struct timecounter {
 	const struct cyclecounter *cc;
-	cycle_t cycle_last;
+	u64 cycle_last;
 	u64 nsec;
 	u64 mask;
 	u64 frac;
@@ -77,7 +77,7 @@ struct timecounter {
  * @frac:	pointer to storage for the fractional nanoseconds.
  */
 static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
-				      cycle_t cycles, u64 mask, u64 *frac)
+				      u64 cycles, u64 mask, u64 *frac)
 {
 	u64 ns = (u64) cycles;
 
@@ -134,6 +134,6 @@ extern u64 timecounter_read(struct timecounter *tc);
  * in the past.
  */
 extern u64 timecounter_cyc2time(struct timecounter *tc,
-				cycle_t cycle_tstamp);
+				u64 cycle_tstamp);
 
 #endif
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index e88005459035..110f4532188c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,9 +29,9 @@
  */
 struct tk_read_base {
 	struct clocksource	*clock;
-	cycle_t			(*read)(struct clocksource *cs);
-	cycle_t			mask;
-	cycle_t			cycle_last;
+	u64			(*read)(struct clocksource *cs);
+	u64			mask;
+	u64			cycle_last;
 	u32			mult;
 	u32			shift;
 	u64			xtime_nsec;
@@ -97,7 +97,7 @@ struct timekeeper {
 	struct timespec64	raw_time;
 
 	/* The following members are for timekeeping internal use */
-	cycle_t			cycle_interval;
+	u64			cycle_interval;
 	u64			xtime_interval;
 	s64			xtime_remainder;
 	u32			raw_interval;
@@ -136,7 +136,7 @@ extern void update_vsyscall_tz(void);
 
 extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
 				struct clocksource *c, u32 mult,
-				cycle_t cycle_last);
+				u64 cycle_last);
 extern void update_vsyscall_tz(void);
 
 #else
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 361f8bf1429d..d2e804e15c3e 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -293,7 +293,7 @@ extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw,
  * @cs_was_changed_seq:	The sequence number of clocksource change events
  */
 struct system_time_snapshot {
-	cycle_t		cycles;
+	u64		cycles;
 	ktime_t		real;
 	ktime_t		raw;
 	unsigned int	clock_was_set_seq;
@@ -321,7 +321,7 @@ struct system_device_crosststamp {
  *	timekeeping code to verify comparibility of two cycle values
  */
 struct system_counterval_t {
-	cycle_t			cycles;
+	u64			cycles;
 	struct clocksource	*cs;
 };
 
diff --git a/include/linux/types.h b/include/linux/types.h
index d501ad3ba247..1e7bd24848fc 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -228,8 +228,5 @@ struct callback_head {
 typedef void (*rcu_callback_t)(struct rcu_head *head);
 typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
 
-/* clocksource cycle base type */
-typedef u64 cycle_t;
-
 #endif /*  __ASSEMBLY__ */
 #endif /* _LINUX_TYPES_H */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 150242ccfcd2..665985b0a89a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -170,7 +170,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
 static void clocksource_watchdog(unsigned long data)
 {
 	struct clocksource *cs;
-	cycle_t csnow, wdnow, cslast, wdlast, delta;
+	u64 csnow, wdnow, cslast, wdlast, delta;
 	int64_t wd_nsec, cs_nsec;
 	int next_cpu, reset_pending;
 
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 555e21f7b966..a4a0e478e44d 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -59,9 +59,9 @@
 #define JIFFIES_SHIFT	8
 #endif
 
-static cycle_t jiffies_read(struct clocksource *cs)
+static u64 jiffies_read(struct clocksource *cs)
 {
-	return (cycle_t) jiffies;
+	return (u64) jiffies;
 }
 
 static struct clocksource clocksource_jiffies = {
diff --git a/kernel/time/timecounter.c b/kernel/time/timecounter.c
index 4687b3104bae..8afd78932bdf 100644
--- a/kernel/time/timecounter.c
+++ b/kernel/time/timecounter.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(timecounter_init);
  */
 static u64 timecounter_read_delta(struct timecounter *tc)
 {
-	cycle_t cycle_now, cycle_delta;
+	u64 cycle_now, cycle_delta;
 	u64 ns_offset;
 
 	/* read cycle counter: */
@@ -80,7 +80,7 @@ EXPORT_SYMBOL_GPL(timecounter_read);
  * time previous to the time stored in the cycle counter.
  */
 static u64 cc_cyc2ns_backwards(const struct cyclecounter *cc,
-			       cycle_t cycles, u64 mask, u64 frac)
+			       u64 cycles, u64 mask, u64 frac)
 {
 	u64 ns = (u64) cycles;
 
@@ -90,7 +90,7 @@ static u64 cc_cyc2ns_backwards(const struct cyclecounter *cc,
 }
 
 u64 timecounter_cyc2time(struct timecounter *tc,
-			 cycle_t cycle_tstamp)
+			 u64 cycle_tstamp)
 {
 	u64 delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
 	u64 nsec = tc->nsec, frac = tc->frac;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index da233cdf89b0..f4152a69277f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -119,10 +119,10 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
 
-static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
 {
 
-	cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
+	u64 max_cycles = tk->tkr_mono.clock->max_cycles;
 	const char *name = tk->tkr_mono.clock->name;
 
 	if (offset > max_cycles) {
@@ -158,10 +158,10 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
 	}
 }
 
-static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	cycle_t now, last, mask, max, delta;
+	u64 now, last, mask, max, delta;
 	unsigned int seq;
 
 	/*
@@ -199,12 +199,12 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
 	return delta;
 }
 #else
-static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
 {
 }
-static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 {
-	cycle_t cycle_now, delta;
+	u64 cycle_now, delta;
 
 	/* read clocksource */
 	cycle_now = tkr->read(tkr->clock);
@@ -229,7 +229,7 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
  */
 static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 {
-	cycle_t interval;
+	u64 interval;
 	u64 tmp, ntpinterval;
 	struct clocksource *old_clock;
 
@@ -254,7 +254,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	if (tmp == 0)
 		tmp = 1;
 
-	interval = (cycle_t) tmp;
+	interval = (u64) tmp;
 	tk->cycle_interval = interval;
 
 	/* Go back from cycles -> shifted ns */
@@ -298,8 +298,7 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
 static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
 
-static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
-					  cycle_t delta)
+static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, u64 delta)
 {
 	u64 nsec;
 
@@ -312,16 +311,15 @@ static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
 
 static inline u64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
-	cycle_t delta;
+	u64 delta;
 
 	delta = timekeeping_get_delta(tkr);
 	return timekeeping_delta_to_ns(tkr, delta);
 }
 
-static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
-					   cycle_t cycles)
+static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, u64 cycles)
 {
-	cycle_t delta;
+	u64 delta;
 
 	/* calculate the delta since the last update_wall_time */
 	delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
@@ -454,9 +452,9 @@ u64 notrace ktime_get_boot_fast_ns(void)
 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
 
 /* Suspend-time cycles value for halted fast timekeeper. */
-static cycle_t cycles_at_suspend;
+static u64 cycles_at_suspend;
 
-static cycle_t dummy_clock_read(struct clocksource *cs)
+static u64 dummy_clock_read(struct clocksource *cs)
 {
 	return cycles_at_suspend;
 }
@@ -650,7 +648,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
 	struct clocksource *clock = tk->tkr_mono.clock;
-	cycle_t cycle_now, delta;
+	u64 cycle_now, delta;
 	u64 nsec;
 
 	cycle_now = tk->tkr_mono.read(clock);
@@ -923,7 +921,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 	ktime_t base_real;
 	u64 nsec_raw;
 	u64 nsec_real;
-	cycle_t now;
+	u64 now;
 
 	WARN_ON_ONCE(timekeeping_suspended);
 
@@ -982,8 +980,8 @@ static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
  * interval is partial_history_cycles.
  */
 static int adjust_historical_crosststamp(struct system_time_snapshot *history,
-					 cycle_t partial_history_cycles,
-					 cycle_t total_history_cycles,
+					 u64 partial_history_cycles,
+					 u64 total_history_cycles,
 					 bool discontinuity,
 					 struct system_device_crosststamp *ts)
 {
@@ -1047,7 +1045,7 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history,
 /*
  * cycle_between - true if test occurs chronologically between before and after
  */
-static bool cycle_between(cycle_t before, cycle_t test, cycle_t after)
+static bool cycle_between(u64 before, u64 test, u64 after)
 {
 	if (test > before && test < after)
 		return true;
@@ -1077,7 +1075,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
 {
 	struct system_counterval_t system_counterval;
 	struct timekeeper *tk = &tk_core.timekeeper;
-	cycle_t cycles, now, interval_start;
+	u64 cycles, now, interval_start;
 	unsigned int clock_was_set_seq = 0;
 	ktime_t base_real, base_raw;
 	u64 nsec_real, nsec_raw;
@@ -1138,7 +1136,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
 	 * current interval
 	 */
 	if (do_interp) {
-		cycle_t partial_history_cycles, total_history_cycles;
+		u64 partial_history_cycles, total_history_cycles;
 		bool discontinuity;
 
 		/*
@@ -1644,7 +1642,7 @@ void timekeeping_resume(void)
 	struct clocksource *clock = tk->tkr_mono.clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
-	cycle_t cycle_now;
+	u64 cycle_now;
 
 	sleeptime_injected = false;
 	read_persistent_clock64(&ts_new);
@@ -2010,11 +2008,10 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
  *
  * Returns the unconsumed cycles.
  */
-static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
-						u32 shift,
-						unsigned int *clock_set)
+static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
+				    u32 shift, unsigned int *clock_set)
 {
-	cycle_t interval = tk->cycle_interval << shift;
+	u64 interval = tk->cycle_interval << shift;
 	u64 raw_nsecs;
 
 	/* If the offset is smaller than a shifted interval, do nothing */
@@ -2055,7 +2052,7 @@ void update_wall_time(void)
 {
 	struct timekeeper *real_tk = &tk_core.timekeeper;
 	struct timekeeper *tk = &shadow_timekeeper;
-	cycle_t offset;
+	u64 offset;
 	int shift = 0, maxshift;
 	unsigned int clock_set = 0;
 	unsigned long flags;
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 5be76270ec4a..9a18f121f399 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -13,9 +13,9 @@ extern void tk_debug_account_sleep_time(struct timespec64 *t);
 #endif
 
 #ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE
-static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
+static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
 {
-	cycle_t ret = (now - last) & mask;
+	u64 ret = (now - last) & mask;
 
 	/*
 	 * Prevent time going backwards by checking the MSB of mask in
@@ -24,7 +24,7 @@ static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
 	return ret & ~(mask >> 1) ? 0 : ret;
 }
 #else
-static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
+static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
 {
 	return (now - last) & mask;
 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1f0f547c54da..eb230f06ba41 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2847,7 +2847,7 @@ static void ftrace_shutdown_sysctl(void)
 	}
 }
 
-static cycle_t		ftrace_update_time;
+static u64		ftrace_update_time;
 unsigned long		ftrace_update_tot_cnt;
 
 static inline int ops_traces_mod(struct ftrace_ops *ops)
@@ -2894,7 +2894,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 {
 	struct ftrace_page *pg;
 	struct dyn_ftrace *p;
-	cycle_t start, stop;
+	u64 start, stop;
 	unsigned long update_cnt = 0;
 	unsigned long rec_flags = 0;
 	int i;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 66f829c47bec..d7449783987a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -236,7 +236,7 @@ static int __init set_tracepoint_printk(char *str)
 }
 __setup("tp_printk", set_tracepoint_printk);
 
-unsigned long long ns2usecs(cycle_t nsec)
+unsigned long long ns2usecs(u64 nsec)
 {
 	nsec += 500;
 	do_div(nsec, 1000);
@@ -573,7 +573,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
 	return read;
 }
 
-static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
+static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 {
 	u64 ts;
 
@@ -587,7 +587,7 @@ static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 	return ts;
 }
 
-cycle_t ftrace_now(int cpu)
+u64 ftrace_now(int cpu)
 {
 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c2234494f40c..1ea51ab53edf 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -159,7 +159,7 @@ struct trace_array_cpu {
 	unsigned long		policy;
 	unsigned long		rt_priority;
 	unsigned long		skipped_entries;
-	cycle_t			preempt_timestamp;
+	u64			preempt_timestamp;
 	pid_t			pid;
 	kuid_t			uid;
 	char			comm[TASK_COMM_LEN];
@@ -177,7 +177,7 @@ struct trace_buffer {
 	struct trace_array		*tr;
 	struct ring_buffer		*buffer;
 	struct trace_array_cpu __percpu	*data;
-	cycle_t				time_start;
+	u64				time_start;
 	int				cpu;
 };
 
@@ -689,7 +689,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
 }
 #endif /* CONFIG_STACKTRACE */
 
-extern cycle_t ftrace_now(int cpu);
+extern u64 ftrace_now(int cpu);
 
 extern void trace_find_cmdline(int pid, char comm[]);
 extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
@@ -736,7 +736,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-extern unsigned long long ns2usecs(cycle_t nsec);
+extern unsigned long long ns2usecs(u64 nsec);
 extern int
 trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
 extern int
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 86654d7e1afe..7758bc0617cb 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -298,7 +298,7 @@ static void irqsoff_print_header(struct seq_file *s)
 /*
  * Should this new latency be reported/recorded?
  */
-static bool report_latency(struct trace_array *tr, cycle_t delta)
+static bool report_latency(struct trace_array *tr, u64 delta)
 {
 	if (tracing_thresh) {
 		if (delta < tracing_thresh)
@@ -316,7 +316,7 @@ check_critical_timing(struct trace_array *tr,
 		      unsigned long parent_ip,
 		      int cpu)
 {
-	cycle_t T0, T1, delta;
+	u64 T0, T1, delta;
 	unsigned long flags;
 	int pc;
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 5d0bb025bb21..ddec53b67646 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -358,7 +358,7 @@ static void wakeup_print_header(struct seq_file *s)
 /*
  * Should this new latency be reported/recorded?
  */
-static bool report_latency(struct trace_array *tr, cycle_t delta)
+static bool report_latency(struct trace_array *tr, u64 delta)
 {
 	if (tracing_thresh) {
 		if (delta < tracing_thresh)
@@ -440,7 +440,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt,
 			  struct task_struct *prev, struct task_struct *next)
 {
 	struct trace_array_cpu *data;
-	cycle_t T0, T1, delta;
+	u64 T0, T1, delta;
 	unsigned long flags;
 	long disabled;
 	int cpu;
diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c
index 38990a77d7b7..c6994ebb4567 100644
--- a/sound/hda/hdac_stream.c
+++ b/sound/hda/hdac_stream.c
@@ -465,7 +465,7 @@ int snd_hdac_stream_set_params(struct hdac_stream *azx_dev,
 }
 EXPORT_SYMBOL_GPL(snd_hdac_stream_set_params);
 
-static cycle_t azx_cc_read(const struct cyclecounter *cc)
+static u64 azx_cc_read(const struct cyclecounter *cc)
 {
 	struct hdac_stream *azx_dev = container_of(cc, struct hdac_stream, cc);
 
@@ -473,7 +473,7 @@ static cycle_t azx_cc_read(const struct cyclecounter *cc)
 }
 
 static void azx_timecounter_init(struct hdac_stream *azx_dev,
-				 bool force, cycle_t last)
+				 bool force, u64 last)
 {
 	struct timecounter *tc = &azx_dev->tc;
 	struct cyclecounter *cc = &azx_dev->cc;
@@ -523,7 +523,7 @@ void snd_hdac_stream_timecounter_init(struct hdac_stream *azx_dev,
 	struct snd_pcm_runtime *runtime = azx_dev->substream->runtime;
 	struct hdac_stream *s;
 	bool inited = false;
-	cycle_t cycle_last = 0;
+	u64 cycle_last = 0;
 	int i = 0;
 
 	list_for_each_entry(s, &bus->stream_list, list) {
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index ae95fc0e3214..97b657adb3bd 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -39,7 +39,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	vcpu->arch.timer_cpu.active_cleared_last = false;
 }
 
-static cycle_t kvm_phys_timer_read(void)
+static u64 kvm_phys_timer_read(void)
 {
 	return timecounter->cc->read(timecounter->cc);
 }
@@ -102,7 +102,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
 
 static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
 {
-	cycle_t cval, now;
+	u64 cval, now;
 
 	cval = vcpu->arch.timer_cpu.cntv_cval;
 	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
@@ -155,7 +155,7 @@ static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
 bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-	cycle_t cval, now;
+	u64 cval, now;
 
 	if (!kvm_timer_irq_can_fire(vcpu))
 		return false;
-- 
cgit v1.2.3


From 2456e855354415bfaeb7badaa14e11b3e02c8466 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Dec 2016 11:38:40 +0100
Subject: ktime: Get rid of the union

ktime is a union because the initial implementation stored the time in
scalar nanoseconds on 64 bit machine and in a endianess optimized timespec
variant for 32bit machines. The Y2038 cleanup removed the timespec variant
and switched everything to scalar nanoseconds. The union remained, but
become completely pointless.

Get rid of the union and just keep ktime_t as simple typedef of type s64.

The conversion was done with coccinelle and some manual mopping up.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 drivers/base/power/wakeup.c            |  2 +-
 drivers/media/rc/ir-rx51.c             |  2 +-
 drivers/rtc/interface.c                |  8 ++--
 drivers/usb/chipidea/otg_fsm.c         | 14 +++----
 drivers/usb/host/ehci-timer.c          |  2 +-
 drivers/usb/host/fotg210-hcd.c         |  2 +-
 fs/aio.c                               |  4 +-
 fs/nfs/flexfilelayout/flexfilelayout.c |  3 +-
 fs/ocfs2/cluster/heartbeat.c           |  2 +-
 fs/timerfd.c                           | 26 ++++++-------
 include/linux/futex.h                  |  4 +-
 include/linux/hrtimer.h                | 12 +++---
 include/linux/ktime.h                  | 68 ++++++++++++----------------------
 include/linux/tick.h                   |  4 +-
 include/linux/wait.h                   |  2 +-
 include/net/red.h                      |  4 +-
 include/net/sock.h                     |  4 +-
 include/trace/events/alarmtimer.h      |  6 +--
 include/trace/events/timer.h           | 16 ++++----
 kernel/futex.c                         |  4 +-
 kernel/signal.c                        |  6 +--
 kernel/time/alarmtimer.c               | 20 +++++-----
 kernel/time/clockevents.c              |  6 +--
 kernel/time/hrtimer.c                  | 52 +++++++++++++-------------
 kernel/time/itimer.c                   | 10 ++---
 kernel/time/ntp.c                      |  2 +-
 kernel/time/posix-timers.c             | 20 +++++-----
 kernel/time/tick-broadcast-hrtimer.c   |  2 +-
 kernel/time/tick-broadcast.c           | 24 ++++++------
 kernel/time/tick-oneshot.c             |  2 +-
 kernel/time/tick-sched.c               | 22 +++++------
 kernel/time/timekeeping.c              |  6 +--
 lib/timerqueue.c                       |  4 +-
 net/can/bcm.c                          | 28 +++++++-------
 net/can/gw.c                           |  2 +-
 net/core/dev.c                         |  4 +-
 net/core/skbuff.c                      |  2 +-
 net/ipv4/tcp_output.c                  |  4 +-
 net/ipv6/exthdrs.c                     |  2 +-
 net/ipx/af_ipx.c                       |  2 +-
 net/netfilter/nf_conntrack_core.c      |  2 +-
 net/netfilter/nfnetlink_log.c          |  2 +-
 net/netfilter/nfnetlink_queue.c        |  4 +-
 net/netfilter/xt_time.c                |  2 +-
 net/sched/sch_netem.c                  |  2 +-
 net/socket.c                           |  2 +-
 net/sunrpc/svcsock.c                   |  2 +-
 sound/core/hrtimer.c                   |  2 +-
 48 files changed, 200 insertions(+), 227 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index bf9ba26981a5..94332902a1cf 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -998,7 +998,7 @@ static int print_wakeup_source_stats(struct seq_file *m,
 
 		active_time = ktime_sub(now, ws->last_time);
 		total_time = ktime_add(total_time, active_time);
-		if (active_time.tv64 > max_time.tv64)
+		if (active_time > max_time)
 			max_time = active_time;
 
 		if (ws->autosleep_enabled)
diff --git a/drivers/media/rc/ir-rx51.c b/drivers/media/rc/ir-rx51.c
index 82fb6f2ca011..e6efa8c267a0 100644
--- a/drivers/media/rc/ir-rx51.c
+++ b/drivers/media/rc/ir-rx51.c
@@ -109,7 +109,7 @@ static enum hrtimer_restart lirc_rx51_timer_cb(struct hrtimer *timer)
 
 		now = timer->base->get_time();
 
-	} while (hrtimer_get_expires_tv64(timer) < now.tv64);
+	} while (hrtimer_get_expires_tv64(timer) < now);
 
 	return HRTIMER_RESTART;
 end:
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 84a52db9b05f..5cf196dfc193 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -394,8 +394,8 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	rtc->aie_timer.period = ktime_set(0, 0);
 
 	/* Alarm has to be enabled & in the future for us to enqueue it */
-	if (alarm->enabled && (rtc_tm_to_ktime(now).tv64 <
-			 rtc->aie_timer.node.expires.tv64)) {
+	if (alarm->enabled && (rtc_tm_to_ktime(now) <
+			 rtc->aie_timer.node.expires)) {
 
 		rtc->aie_timer.enabled = 1;
 		timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node);
@@ -766,7 +766,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
 
 	/* Skip over expired timers */
 	while (next) {
-		if (next->expires.tv64 >= now.tv64)
+		if (next->expires >= now)
 			break;
 		next = timerqueue_iterate_next(next);
 	}
@@ -858,7 +858,7 @@ again:
 	__rtc_read_time(rtc, &tm);
 	now = rtc_tm_to_ktime(tm);
 	while ((next = timerqueue_getnext(&rtc->timerqueue))) {
-		if (next->expires.tv64 > now.tv64)
+		if (next->expires > now)
 			break;
 
 		/* expire timer */
diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c
index de8e22ec3902..93e24ce61a3a 100644
--- a/drivers/usb/chipidea/otg_fsm.c
+++ b/drivers/usb/chipidea/otg_fsm.c
@@ -234,8 +234,8 @@ static void ci_otg_add_timer(struct ci_hdrc *ci, enum otg_fsm_timer t)
 				ktime_set(timer_sec, timer_nsec));
 	ci->enabled_otg_timer_bits |= (1 << t);
 	if ((ci->next_otg_timer == NUM_OTG_FSM_TIMERS) ||
-			(ci->hr_timeouts[ci->next_otg_timer].tv64 >
-						ci->hr_timeouts[t].tv64)) {
+			(ci->hr_timeouts[ci->next_otg_timer] >
+						ci->hr_timeouts[t])) {
 			ci->next_otg_timer = t;
 			hrtimer_start_range_ns(&ci->otg_fsm_hrtimer,
 					ci->hr_timeouts[t], NSEC_PER_MSEC,
@@ -269,8 +269,8 @@ static void ci_otg_del_timer(struct ci_hdrc *ci, enum otg_fsm_timer t)
 			for_each_set_bit(cur_timer, &enabled_timer_bits,
 							NUM_OTG_FSM_TIMERS) {
 				if ((next_timer == NUM_OTG_FSM_TIMERS) ||
-					(ci->hr_timeouts[next_timer].tv64 <
-					ci->hr_timeouts[cur_timer].tv64))
+					(ci->hr_timeouts[next_timer] <
+					 ci->hr_timeouts[cur_timer]))
 					next_timer = cur_timer;
 			}
 		}
@@ -397,14 +397,14 @@ static enum hrtimer_restart ci_otg_hrtimer_func(struct hrtimer *t)
 
 	now = ktime_get();
 	for_each_set_bit(cur_timer, &enabled_timer_bits, NUM_OTG_FSM_TIMERS) {
-		if (now.tv64 >= ci->hr_timeouts[cur_timer].tv64) {
+		if (now >= ci->hr_timeouts[cur_timer]) {
 			ci->enabled_otg_timer_bits &= ~(1 << cur_timer);
 			if (otg_timer_handlers[cur_timer])
 				ret = otg_timer_handlers[cur_timer](ci);
 		} else {
 			if ((next_timer == NUM_OTG_FSM_TIMERS) ||
-				(ci->hr_timeouts[cur_timer].tv64 <
-					ci->hr_timeouts[next_timer].tv64))
+				(ci->hr_timeouts[cur_timer] <
+					ci->hr_timeouts[next_timer]))
 				next_timer = cur_timer;
 		}
 	}
diff --git a/drivers/usb/host/ehci-timer.c b/drivers/usb/host/ehci-timer.c
index 69f50e6533a6..262e10cacc8c 100644
--- a/drivers/usb/host/ehci-timer.c
+++ b/drivers/usb/host/ehci-timer.c
@@ -425,7 +425,7 @@ static enum hrtimer_restart ehci_hrtimer_func(struct hrtimer *t)
 	 */
 	now = ktime_get();
 	for_each_set_bit(e, &events, EHCI_HRTIMER_NUM_EVENTS) {
-		if (now.tv64 >= ehci->hr_timeouts[e].tv64)
+		if (now >= ehci->hr_timeouts[e])
 			event_handlers[e](ehci);
 		else
 			ehci_enable_event(ehci, e, false);
diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c
index 66efa9a67687..4dda56ef06cd 100644
--- a/drivers/usb/host/fotg210-hcd.c
+++ b/drivers/usb/host/fotg210-hcd.c
@@ -1381,7 +1381,7 @@ static enum hrtimer_restart fotg210_hrtimer_func(struct hrtimer *t)
 	 */
 	now = ktime_get();
 	for_each_set_bit(e, &events, FOTG210_HRTIMER_NUM_EVENTS) {
-		if (now.tv64 >= fotg210->hr_timeouts[e].tv64)
+		if (now >= fotg210->hr_timeouts[e])
 			event_handlers[e](fotg210);
 		else
 			fotg210_enable_event(fotg210, e, false);
diff --git a/fs/aio.c b/fs/aio.c
index 955c5241a8f2..4ab67e8cb776 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1285,7 +1285,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 			struct io_event __user *event,
 			struct timespec __user *timeout)
 {
-	ktime_t until = { .tv64 = KTIME_MAX };
+	ktime_t until = KTIME_MAX;
 	long ret = 0;
 
 	if (timeout) {
@@ -1311,7 +1311,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 	 * the ringbuffer empty. So in practice we should be ok, but it's
 	 * something to be aware of when touching this code.
 	 */
-	if (until.tv64 == 0)
+	if (until == 0)
 		aio_read_events(ctx, min_nr, nr, event, &ret);
 	else
 		wait_event_interruptible_hrtimeout(ctx->wait,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 45962fe5098c..c98f6db9aa6b 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -619,12 +619,11 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
 			    struct nfs4_ff_layoutstat *layoutstat,
 			    ktime_t now)
 {
-	static const ktime_t notime = {0};
 	s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;
 	struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
 
 	nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
-	if (ktime_equal(mirror->start_time, notime))
+	if (ktime_equal(mirror->start_time, 0))
 		mirror->start_time = now;
 	if (mirror->report_interval != 0)
 		report_interval = (s64)mirror->report_interval * 1000LL;
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 96a155ab5059..f6e871760f8d 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1250,7 +1250,7 @@ static int o2hb_thread(void *data)
 
 		mlog(ML_HEARTBEAT,
 		     "start = %lld, end = %lld, msec = %u, ret = %d\n",
-		     before_hb.tv64, after_hb.tv64, elapsed_msec, ret);
+		     before_hb, after_hb, elapsed_msec, ret);
 
 		if (!kthread_should_stop() &&
 		    elapsed_msec < reg->hr_timeout_ms) {
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 9ae4abb4110b..fb4407a7cf9e 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -55,7 +55,7 @@ static inline bool isalarm(struct timerfd_ctx *ctx)
 /*
  * This gets called when the timer event triggers. We set the "expired"
  * flag, but we do not re-arm the timer (in case it's necessary,
- * tintv.tv64 != 0) until the timer is accessed.
+ * tintv != 0) until the timer is accessed.
  */
 static void timerfd_triggered(struct timerfd_ctx *ctx)
 {
@@ -93,7 +93,7 @@ static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
  */
 void timerfd_clock_was_set(void)
 {
-	ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
+	ktime_t moffs = ktime_mono_to_real(0);
 	struct timerfd_ctx *ctx;
 	unsigned long flags;
 
@@ -102,8 +102,8 @@ void timerfd_clock_was_set(void)
 		if (!ctx->might_cancel)
 			continue;
 		spin_lock_irqsave(&ctx->wqh.lock, flags);
-		if (ctx->moffs.tv64 != moffs.tv64) {
-			ctx->moffs.tv64 = KTIME_MAX;
+		if (ctx->moffs != moffs) {
+			ctx->moffs = KTIME_MAX;
 			ctx->ticks++;
 			wake_up_locked(&ctx->wqh);
 		}
@@ -124,9 +124,9 @@ static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
 
 static bool timerfd_canceled(struct timerfd_ctx *ctx)
 {
-	if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
+	if (!ctx->might_cancel || ctx->moffs != KTIME_MAX)
 		return false;
-	ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
+	ctx->moffs = ktime_mono_to_real(0);
 	return true;
 }
 
@@ -155,7 +155,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
 	else
 		remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 
-	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
+	return remaining < 0 ? ktime_set(0, 0): remaining;
 }
 
 static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
@@ -184,7 +184,7 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
 		ctx->t.tmr.function = timerfd_tmrproc;
 	}
 
-	if (texp.tv64 != 0) {
+	if (texp != 0) {
 		if (isalarm(ctx)) {
 			if (flags & TFD_TIMER_ABSTIME)
 				alarm_start(&ctx->t.alarm, texp);
@@ -261,9 +261,9 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 	if (ctx->ticks) {
 		ticks = ctx->ticks;
 
-		if (ctx->expired && ctx->tintv.tv64) {
+		if (ctx->expired && ctx->tintv) {
 			/*
-			 * If tintv.tv64 != 0, this is a periodic timer that
+			 * If tintv != 0, this is a periodic timer that
 			 * needs to be re-armed. We avoid doing it in the timer
 			 * callback to avoid DoS attacks specifying a very
 			 * short timer period.
@@ -410,7 +410,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 	else
 		hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
 
-	ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
+	ctx->moffs = ktime_mono_to_real(0);
 
 	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
 			       O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -469,7 +469,7 @@ static int do_timerfd_settime(int ufd, int flags,
 	 * We do not update "ticks" and "expired" since the timer will be
 	 * re-programmed again in the following timerfd_setup() call.
 	 */
-	if (ctx->expired && ctx->tintv.tv64) {
+	if (ctx->expired && ctx->tintv) {
 		if (isalarm(ctx))
 			alarm_forward_now(&ctx->t.alarm, ctx->tintv);
 		else
@@ -499,7 +499,7 @@ static int do_timerfd_gettime(int ufd, struct itimerspec *t)
 	ctx = f.file->private_data;
 
 	spin_lock_irq(&ctx->wqh.lock);
-	if (ctx->expired && ctx->tintv.tv64) {
+	if (ctx->expired && ctx->tintv) {
 		ctx->expired = 0;
 
 		if (isalarm(ctx)) {
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 6435f46d6e13..7c5b694864cd 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -1,14 +1,14 @@
 #ifndef _LINUX_FUTEX_H
 #define _LINUX_FUTEX_H
 
+#include <linux/ktime.h>
 #include <uapi/linux/futex.h>
 
 struct inode;
 struct mm_struct;
 struct task_struct;
-union ktime;
 
-long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
 
 extern int
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5e00f80b1535..cdab81ba29f8 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -228,8 +228,8 @@ static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t t
 
 static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
 {
-	timer->node.expires.tv64 = tv64;
-	timer->_softexpires.tv64 = tv64;
+	timer->node.expires = tv64;
+	timer->_softexpires = tv64;
 }
 
 static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
@@ -256,11 +256,11 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
 
 static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
 {
-	return timer->node.expires.tv64;
+	return timer->node.expires;
 }
 static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
 {
-	return timer->_softexpires.tv64;
+	return timer->_softexpires;
 }
 
 static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
@@ -297,7 +297,7 @@ extern void hrtimer_peek_ahead_timers(void);
  * this resolution values.
  */
 # define HIGH_RES_NSEC		1
-# define KTIME_HIGH_RES		(ktime_t) { .tv64 = HIGH_RES_NSEC }
+# define KTIME_HIGH_RES		(HIGH_RES_NSEC)
 # define MONOTONIC_RES_NSEC	HIGH_RES_NSEC
 # define KTIME_MONOTONIC_RES	KTIME_HIGH_RES
 
@@ -333,7 +333,7 @@ __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
 	 * hrtimer_start_range_ns() to prevent short timeouts.
 	 */
 	if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
-		rem.tv64 -= hrtimer_resolution;
+		rem -= hrtimer_resolution;
 	return rem;
 }
 
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 0fb7ffb1775f..8e573deda55e 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -24,21 +24,8 @@
 #include <linux/time.h>
 #include <linux/jiffies.h>
 
-/*
- * ktime_t:
- *
- * A single 64-bit variable is used to store the hrtimers
- * internal representation of time values in scalar nanoseconds. The
- * design plays out best on 64-bit CPUs, where most conversions are
- * NOPs and most arithmetic ktime_t operations are plain arithmetic
- * operations.
- *
- */
-union ktime {
-	s64	tv64;
-};
-
-typedef union ktime ktime_t;		/* Kill this */
+/* Nanosecond scalar representation for kernel time values */
+typedef s64	ktime_t;
 
 /**
  * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value
@@ -50,39 +37,34 @@ typedef union ktime ktime_t;		/* Kill this */
 static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs)
 {
 	if (unlikely(secs >= KTIME_SEC_MAX))
-		return (ktime_t){ .tv64 = KTIME_MAX };
+		return KTIME_MAX;
 
-	return (ktime_t) { .tv64 = secs * NSEC_PER_SEC + (s64)nsecs };
+	return secs * NSEC_PER_SEC + (s64)nsecs;
 }
 
 /* Subtract two ktime_t variables. rem = lhs -rhs: */
-#define ktime_sub(lhs, rhs) \
-		({ (ktime_t){ .tv64 = (lhs).tv64 - (rhs).tv64 }; })
+#define ktime_sub(lhs, rhs)	((lhs) - (rhs))
 
 /* Add two ktime_t variables. res = lhs + rhs: */
-#define ktime_add(lhs, rhs) \
-		({ (ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }; })
+#define ktime_add(lhs, rhs)	((lhs) + (rhs))
 
 /*
  * Same as ktime_add(), but avoids undefined behaviour on overflow; however,
  * this means that you must check the result for overflow yourself.
  */
-#define ktime_add_unsafe(lhs, rhs) \
-		({ (ktime_t){ .tv64 = (u64) (lhs).tv64 + (rhs).tv64 }; })
+#define ktime_add_unsafe(lhs, rhs)	((u64) (lhs) + (rhs))
 
 /*
  * Add a ktime_t variable and a scalar nanosecond value.
  * res = kt + nsval:
  */
-#define ktime_add_ns(kt, nsval) \
-		({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; })
+#define ktime_add_ns(kt, nsval)		((kt) + (nsval))
 
 /*
  * Subtract a scalar nanosecod from a ktime_t variable
  * res = kt - nsval:
  */
-#define ktime_sub_ns(kt, nsval) \
-		({ (ktime_t){ .tv64 = (kt).tv64 - (nsval) }; })
+#define ktime_sub_ns(kt, nsval)		((kt) - (nsval))
 
 /* convert a timespec to ktime_t format: */
 static inline ktime_t timespec_to_ktime(struct timespec ts)
@@ -103,16 +85,16 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
 }
 
 /* Map the ktime_t to timespec conversion to ns_to_timespec function */
-#define ktime_to_timespec(kt)		ns_to_timespec((kt).tv64)
+#define ktime_to_timespec(kt)		ns_to_timespec((kt))
 
 /* Map the ktime_t to timespec conversion to ns_to_timespec function */
-#define ktime_to_timespec64(kt)		ns_to_timespec64((kt).tv64)
+#define ktime_to_timespec64(kt)		ns_to_timespec64((kt))
 
 /* Map the ktime_t to timeval conversion to ns_to_timeval function */
-#define ktime_to_timeval(kt)		ns_to_timeval((kt).tv64)
+#define ktime_to_timeval(kt)		ns_to_timeval((kt))
 
 /* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
-#define ktime_to_ns(kt)			((kt).tv64)
+#define ktime_to_ns(kt)			(kt)
 
 
 /**
@@ -126,7 +108,7 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
  */
 static inline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
 {
-	return cmp1.tv64 == cmp2.tv64;
+	return cmp1 == cmp2;
 }
 
 /**
@@ -141,9 +123,9 @@ static inline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
  */
 static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
 {
-	if (cmp1.tv64 < cmp2.tv64)
+	if (cmp1 < cmp2)
 		return -1;
-	if (cmp1.tv64 > cmp2.tv64)
+	if (cmp1 > cmp2)
 		return 1;
 	return 0;
 }
@@ -182,7 +164,7 @@ static inline s64 ktime_divns(const ktime_t kt, s64 div)
 	 */
 	BUG_ON(div < 0);
 	if (__builtin_constant_p(div) && !(div >> 32)) {
-		s64 ns = kt.tv64;
+		s64 ns = kt;
 		u64 tmp = ns < 0 ? -ns : ns;
 
 		do_div(tmp, div);
@@ -199,7 +181,7 @@ static inline s64 ktime_divns(const ktime_t kt, s64 div)
 	 * so catch them on 64bit as well.
 	 */
 	WARN_ON(div < 0);
-	return kt.tv64 / div;
+	return kt / div;
 }
 #endif
 
@@ -256,7 +238,7 @@ extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);
 static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt,
 						       struct timespec *ts)
 {
-	if (kt.tv64) {
+	if (kt) {
 		*ts = ktime_to_timespec(kt);
 		return true;
 	} else {
@@ -275,7 +257,7 @@ static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt,
 static inline __must_check bool ktime_to_timespec64_cond(const ktime_t kt,
 						       struct timespec64 *ts)
 {
-	if (kt.tv64) {
+	if (kt) {
 		*ts = ktime_to_timespec64(kt);
 		return true;
 	} else {
@@ -290,20 +272,16 @@ static inline __must_check bool ktime_to_timespec64_cond(const ktime_t kt,
  * this resolution values.
  */
 #define LOW_RES_NSEC		TICK_NSEC
-#define KTIME_LOW_RES		(ktime_t){ .tv64 = LOW_RES_NSEC }
+#define KTIME_LOW_RES		(LOW_RES_NSEC)
 
 static inline ktime_t ns_to_ktime(u64 ns)
 {
-	static const ktime_t ktime_zero = { .tv64 = 0 };
-
-	return ktime_add_ns(ktime_zero, ns);
+	return ns;
 }
 
 static inline ktime_t ms_to_ktime(u64 ms)
 {
-	static const ktime_t ktime_zero = { .tv64 = 0 };
-
-	return ktime_add_ms(ktime_zero, ms);
+	return ms * NSEC_PER_MSEC;
 }
 
 # include <linux/timekeeping.h>
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 62be0786d6d0..a04fea19676f 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -127,9 +127,7 @@ static inline void tick_nohz_idle_exit(void) { }
 
 static inline ktime_t tick_nohz_get_sleep_length(void)
 {
-	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
-
-	return len;
+	return NSEC_PER_SEC / HZ;
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2408e8d5c05c..1421132e9086 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -510,7 +510,7 @@ do {									\
 	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC,		\
 			      HRTIMER_MODE_REL);			\
 	hrtimer_init_sleeper(&__t, current);				\
-	if ((timeout).tv64 != KTIME_MAX)				\
+	if ((timeout) != KTIME_MAX)				\
 		hrtimer_start_range_ns(&__t.timer, timeout,		\
 				       current->timer_slack_ns,		\
 				       HRTIMER_MODE_REL);		\
diff --git a/include/net/red.h b/include/net/red.h
index 76e0b5f922c6..208e718e16b9 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -207,7 +207,7 @@ static inline void red_set_parms(struct red_parms *p,
 
 static inline int red_is_idling(const struct red_vars *v)
 {
-	return v->qidlestart.tv64 != 0;
+	return v->qidlestart != 0;
 }
 
 static inline void red_start_of_idle_period(struct red_vars *v)
@@ -217,7 +217,7 @@ static inline void red_start_of_idle_period(struct red_vars *v)
 
 static inline void red_end_of_idle_period(struct red_vars *v)
 {
-	v->qidlestart.tv64 = 0;
+	v->qidlestart = 0;
 }
 
 static inline void red_restart(struct red_vars *v)
diff --git a/include/net/sock.h b/include/net/sock.h
index 282d065e286b..f0e867f58722 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2193,8 +2193,8 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 	 */
 	if (sock_flag(sk, SOCK_RCVTSTAMP) ||
 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
-	    (kt.tv64 && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
-	    (hwtstamps->hwtstamp.tv64 &&
+	    (kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
+	    (hwtstamps->hwtstamp &&
 	     (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
 		__sock_recv_timestamp(msg, sk, skb);
 	else
diff --git a/include/trace/events/alarmtimer.h b/include/trace/events/alarmtimer.h
index a1c108c16c9c..ae4f358dd8e9 100644
--- a/include/trace/events/alarmtimer.h
+++ b/include/trace/events/alarmtimer.h
@@ -31,7 +31,7 @@ TRACE_EVENT(alarmtimer_suspend,
 	),
 
 	TP_fast_assign(
-		__entry->expires = expires.tv64;
+		__entry->expires = expires;
 		__entry->alarm_type = flag;
 	),
 
@@ -57,8 +57,8 @@ DECLARE_EVENT_CLASS(alarm_class,
 	TP_fast_assign(
 		__entry->alarm = alarm;
 		__entry->alarm_type = alarm->type;
-		__entry->expires = alarm->node.expires.tv64;
-		__entry->now = now.tv64;
+		__entry->expires = alarm->node.expires;
+		__entry->now = now;
 	),
 
 	TP_printk("alarmtimer:%p type:%s expires:%llu now:%llu",
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 28c5da6fdfac..1448637616d6 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -177,16 +177,14 @@ TRACE_EVENT(hrtimer_start,
 	TP_fast_assign(
 		__entry->hrtimer	= hrtimer;
 		__entry->function	= hrtimer->function;
-		__entry->expires	= hrtimer_get_expires(hrtimer).tv64;
-		__entry->softexpires	= hrtimer_get_softexpires(hrtimer).tv64;
+		__entry->expires	= hrtimer_get_expires(hrtimer);
+		__entry->softexpires	= hrtimer_get_softexpires(hrtimer);
 	),
 
 	TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu",
 		  __entry->hrtimer, __entry->function,
-		  (unsigned long long)ktime_to_ns((ktime_t) {
-				  .tv64 = __entry->expires }),
-		  (unsigned long long)ktime_to_ns((ktime_t) {
-				  .tv64 = __entry->softexpires }))
+		  (unsigned long long) __entry->expires,
+		  (unsigned long long) __entry->softexpires)
 );
 
 /**
@@ -211,13 +209,13 @@ TRACE_EVENT(hrtimer_expire_entry,
 
 	TP_fast_assign(
 		__entry->hrtimer	= hrtimer;
-		__entry->now		= now->tv64;
+		__entry->now		= *now;
 		__entry->function	= hrtimer->function;
 	),
 
 	TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function,
-		  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
- );
+		  (unsigned long long) __entry->now)
+);
 
 DECLARE_EVENT_CLASS(hrtimer_class,
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 9246d9f593d1..0842c8ca534b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2459,7 +2459,7 @@ retry:
 	restart->fn = futex_wait_restart;
 	restart->futex.uaddr = uaddr;
 	restart->futex.val = val;
-	restart->futex.time = abs_time->tv64;
+	restart->futex.time = *abs_time;
 	restart->futex.bitset = bitset;
 	restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
 
@@ -2480,7 +2480,7 @@ static long futex_wait_restart(struct restart_block *restart)
 	ktime_t t, *tp = NULL;
 
 	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
-		t.tv64 = restart->futex.time;
+		t = restart->futex.time;
 		tp = &t;
 	}
 	restart->fn = do_no_restart_syscall;
diff --git a/kernel/signal.c b/kernel/signal.c
index f5d4e275345e..ff046b73ff2d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -587,7 +587,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 			struct hrtimer *tmr = &tsk->signal->real_timer;
 
 			if (!hrtimer_is_queued(tmr) &&
-			    tsk->signal->it_real_incr.tv64 != 0) {
+			    tsk->signal->it_real_incr != 0) {
 				hrtimer_forward(tmr, tmr->base->get_time(),
 						tsk->signal->it_real_incr);
 				hrtimer_restart(tmr);
@@ -2766,7 +2766,7 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
 int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
 		    const struct timespec *ts)
 {
-	ktime_t *to = NULL, timeout = { .tv64 = KTIME_MAX };
+	ktime_t *to = NULL, timeout = KTIME_MAX;
 	struct task_struct *tsk = current;
 	sigset_t mask = *which;
 	int sig, ret = 0;
@@ -2786,7 +2786,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
 
 	spin_lock_irq(&tsk->sighand->siglock);
 	sig = dequeue_signal(tsk, &mask, info);
-	if (!sig && timeout.tv64) {
+	if (!sig && timeout) {
 		/*
 		 * None ready, temporarily unblock those we're interested
 		 * while we are sleeping in so that we'll be awakened when
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 3921cf7fea8e..ab6ac077bdb7 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -254,13 +254,13 @@ static int alarmtimer_suspend(struct device *dev)
 		if (!next)
 			continue;
 		delta = ktime_sub(next->expires, base->gettime());
-		if (!min.tv64 || (delta.tv64 < min.tv64)) {
+		if (!min || (delta < min)) {
 			expires = next->expires;
 			min = delta;
 			type = i;
 		}
 	}
-	if (min.tv64 == 0)
+	if (min == 0)
 		return 0;
 
 	if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
@@ -328,7 +328,7 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
 	delta = ktime_sub(absexp, base->gettime());
 
 	spin_lock_irqsave(&freezer_delta_lock, flags);
-	if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64)) {
+	if (!freezer_delta || (delta < freezer_delta)) {
 		freezer_delta = delta;
 		freezer_expires = absexp;
 		freezer_alarmtype = type;
@@ -453,10 +453,10 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
 
 	delta = ktime_sub(now, alarm->node.expires);
 
-	if (delta.tv64 < 0)
+	if (delta < 0)
 		return 0;
 
-	if (unlikely(delta.tv64 >= interval.tv64)) {
+	if (unlikely(delta >= interval)) {
 		s64 incr = ktime_to_ns(interval);
 
 		overrun = ktime_divns(delta, incr);
@@ -464,7 +464,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
 		alarm->node.expires = ktime_add_ns(alarm->node.expires,
 							incr*overrun);
 
-		if (alarm->node.expires.tv64 > now.tv64)
+		if (alarm->node.expires > now)
 			return overrun;
 		/*
 		 * This (and the ktime_add() below) is the
@@ -522,7 +522,7 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
 	}
 
 	/* Re-add periodic timers */
-	if (ptr->it.alarm.interval.tv64) {
+	if (ptr->it.alarm.interval) {
 		ptr->it_overrun += alarm_forward(alarm, now,
 						ptr->it.alarm.interval);
 		result = ALARMTIMER_RESTART;
@@ -730,7 +730,7 @@ static int update_rmtp(ktime_t exp, enum  alarmtimer_type type,
 
 	rem = ktime_sub(exp, alarm_bases[type].gettime());
 
-	if (rem.tv64 <= 0)
+	if (rem <= 0)
 		return 0;
 	rmt = ktime_to_timespec(rem);
 
@@ -755,7 +755,7 @@ static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
 	struct alarm alarm;
 	int ret = 0;
 
-	exp.tv64 = restart->nanosleep.expires;
+	exp = restart->nanosleep.expires;
 	alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
 
 	if (alarmtimer_do_nsleep(&alarm, exp))
@@ -835,7 +835,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 	restart = &current->restart_block;
 	restart->fn = alarm_timer_nsleep_restart;
 	restart->nanosleep.clockid = type;
-	restart->nanosleep.expires = exp.tv64;
+	restart->nanosleep.expires = exp;
 	restart->nanosleep.rmtp = rmtp;
 	ret = -ERESTART_RESTARTBLOCK;
 
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 2c5bc77c0bb0..97ac0951f164 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -179,7 +179,7 @@ void clockevents_switch_state(struct clock_event_device *dev,
 void clockevents_shutdown(struct clock_event_device *dev)
 {
 	clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
-	dev->next_event.tv64 = KTIME_MAX;
+	dev->next_event = KTIME_MAX;
 }
 
 /**
@@ -213,7 +213,7 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)
 	if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
 		printk_deferred(KERN_WARNING
 				"CE: Reprogramming failure. Giving up\n");
-		dev->next_event.tv64 = KTIME_MAX;
+		dev->next_event = KTIME_MAX;
 		return -ETIME;
 	}
 
@@ -310,7 +310,7 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
 	int64_t delta;
 	int rc;
 
-	if (unlikely(expires.tv64 < 0)) {
+	if (unlikely(expires < 0)) {
 		WARN_ON_ONCE(1);
 		return -ETIME;
 	}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 161e340395d5..c7f780113884 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -171,7 +171,7 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
 		return 0;
 
 	expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
-	return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+	return expires <= new_base->cpu_base->expires_next;
 #else
 	return 0;
 #endif
@@ -313,7 +313,7 @@ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
 	 * We use KTIME_SEC_MAX here, the maximum timeout which we can
 	 * return to user space in a timespec:
 	 */
-	if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
+	if (res < 0 || res < lhs || res < rhs)
 		res = ktime_set(KTIME_SEC_MAX, 0);
 
 	return res;
@@ -465,8 +465,8 @@ static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base,
 static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
 {
 	struct hrtimer_clock_base *base = cpu_base->clock_base;
-	ktime_t expires, expires_next = { .tv64 = KTIME_MAX };
 	unsigned int active = cpu_base->active_bases;
+	ktime_t expires, expires_next = KTIME_MAX;
 
 	hrtimer_update_next_timer(cpu_base, NULL);
 	for (; active; base++, active >>= 1) {
@@ -479,7 +479,7 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
 		next = timerqueue_getnext(&base->active);
 		timer = container_of(next, struct hrtimer, node);
 		expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
-		if (expires.tv64 < expires_next.tv64) {
+		if (expires < expires_next) {
 			expires_next = expires;
 			hrtimer_update_next_timer(cpu_base, timer);
 		}
@@ -489,8 +489,8 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
 	 * the clock bases so the result might be negative. Fix it up
 	 * to prevent a false positive in clockevents_program_event().
 	 */
-	if (expires_next.tv64 < 0)
-		expires_next.tv64 = 0;
+	if (expires_next < 0)
+		expires_next = 0;
 	return expires_next;
 }
 #endif
@@ -561,10 +561,10 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
 
 	expires_next = __hrtimer_get_next_event(cpu_base);
 
-	if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
+	if (skip_equal && expires_next == cpu_base->expires_next)
 		return;
 
-	cpu_base->expires_next.tv64 = expires_next.tv64;
+	cpu_base->expires_next = expires_next;
 
 	/*
 	 * If a hang was detected in the last timer interrupt then we
@@ -622,10 +622,10 @@ static void hrtimer_reprogram(struct hrtimer *timer,
 	 * CLOCK_REALTIME timer might be requested with an absolute
 	 * expiry time which is less than base->offset. Set it to 0.
 	 */
-	if (expires.tv64 < 0)
-		expires.tv64 = 0;
+	if (expires < 0)
+		expires = 0;
 
-	if (expires.tv64 >= cpu_base->expires_next.tv64)
+	if (expires >= cpu_base->expires_next)
 		return;
 
 	/* Update the pointer to the next expiring timer */
@@ -653,7 +653,7 @@ static void hrtimer_reprogram(struct hrtimer *timer,
  */
 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
 {
-	base->expires_next.tv64 = KTIME_MAX;
+	base->expires_next = KTIME_MAX;
 	base->hres_active = 0;
 }
 
@@ -827,21 +827,21 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
 
 	delta = ktime_sub(now, hrtimer_get_expires(timer));
 
-	if (delta.tv64 < 0)
+	if (delta < 0)
 		return 0;
 
 	if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
 		return 0;
 
-	if (interval.tv64 < hrtimer_resolution)
-		interval.tv64 = hrtimer_resolution;
+	if (interval < hrtimer_resolution)
+		interval = hrtimer_resolution;
 
-	if (unlikely(delta.tv64 >= interval.tv64)) {
+	if (unlikely(delta >= interval)) {
 		s64 incr = ktime_to_ns(interval);
 
 		orun = ktime_divns(delta, incr);
 		hrtimer_add_expires_ns(timer, incr * orun);
-		if (hrtimer_get_expires_tv64(timer) > now.tv64)
+		if (hrtimer_get_expires_tv64(timer) > now)
 			return orun;
 		/*
 		 * This (and the ktime_add() below) is the
@@ -1104,7 +1104,7 @@ u64 hrtimer_get_next_event(void)
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	if (!__hrtimer_hres_active(cpu_base))
-		expires = __hrtimer_get_next_event(cpu_base).tv64;
+		expires = __hrtimer_get_next_event(cpu_base);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
@@ -1296,7 +1296,7 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
 			 * are right-of a not yet expired timer, because that
 			 * timer will have to trigger a wakeup anyway.
 			 */
-			if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
+			if (basenow < hrtimer_get_softexpires_tv64(timer))
 				break;
 
 			__run_hrtimer(cpu_base, base, timer, &basenow);
@@ -1318,7 +1318,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
 	BUG_ON(!cpu_base->hres_active);
 	cpu_base->nr_events++;
-	dev->next_event.tv64 = KTIME_MAX;
+	dev->next_event = KTIME_MAX;
 
 	raw_spin_lock(&cpu_base->lock);
 	entry_time = now = hrtimer_update_base(cpu_base);
@@ -1331,7 +1331,7 @@ retry:
 	 * timers which run their callback and need to be requeued on
 	 * this CPU.
 	 */
-	cpu_base->expires_next.tv64 = KTIME_MAX;
+	cpu_base->expires_next = KTIME_MAX;
 
 	__hrtimer_run_queues(cpu_base, now);
 
@@ -1379,13 +1379,13 @@ retry:
 	cpu_base->hang_detected = 1;
 	raw_spin_unlock(&cpu_base->lock);
 	delta = ktime_sub(now, entry_time);
-	if ((unsigned int)delta.tv64 > cpu_base->max_hang_time)
-		cpu_base->max_hang_time = (unsigned int) delta.tv64;
+	if ((unsigned int)delta > cpu_base->max_hang_time)
+		cpu_base->max_hang_time = (unsigned int) delta;
 	/*
 	 * Limit it to a sensible value as we enforce a longer
 	 * delay. Give the CPU at least 100ms to catch up.
 	 */
-	if (delta.tv64 > 100 * NSEC_PER_MSEC)
+	if (delta > 100 * NSEC_PER_MSEC)
 		expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
 	else
 		expires_next = ktime_add(now, delta);
@@ -1495,7 +1495,7 @@ static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
 	ktime_t rem;
 
 	rem = hrtimer_expires_remaining(timer);
-	if (rem.tv64 <= 0)
+	if (rem <= 0)
 		return 0;
 	rmt = ktime_to_timespec(rem);
 
@@ -1693,7 +1693,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 	 * Optimize when a zero timeout value is given. It does not
 	 * matter whether this is an absolute or a relative time.
 	 */
-	if (expires && !expires->tv64) {
+	if (expires && *expires == 0) {
 		__set_current_state(TASK_RUNNING);
 		return 0;
 	}
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index a45afb7277c2..8c89143f9ebf 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -34,10 +34,10 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer)
 	 * then we return 0 - which is correct.
 	 */
 	if (hrtimer_active(timer)) {
-		if (rem.tv64 <= 0)
-			rem.tv64 = NSEC_PER_USEC;
+		if (rem <= 0)
+			rem = NSEC_PER_USEC;
 	} else
-		rem.tv64 = 0;
+		rem = 0;
 
 	return ktime_to_timeval(rem);
 }
@@ -216,12 +216,12 @@ again:
 			goto again;
 		}
 		expires = timeval_to_ktime(value->it_value);
-		if (expires.tv64 != 0) {
+		if (expires != 0) {
 			tsk->signal->it_real_incr =
 				timeval_to_ktime(value->it_interval);
 			hrtimer_start(timer, expires, HRTIMER_MODE_REL);
 		} else
-			tsk->signal->it_real_incr.tv64 = 0;
+			tsk->signal->it_real_incr = 0;
 
 		trace_itimer_state(ITIMER_REAL, value, 0);
 		spin_unlock_irq(&tsk->sighand->siglock);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 6df8927c58a5..edf19cc53140 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -381,7 +381,7 @@ ktime_t ntp_get_next_leap(void)
 
 	if ((time_state == TIME_INS) && (time_status & STA_INS))
 		return ktime_set(ntp_next_leap_sec, 0);
-	ret.tv64 = KTIME_MAX;
+	ret = KTIME_MAX;
 	return ret;
 }
 
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 42d7b9558741..9fe98b3777a2 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -359,7 +359,7 @@ static void schedule_next_timer(struct k_itimer *timr)
 {
 	struct hrtimer *timer = &timr->it.real.timer;
 
-	if (timr->it.real.interval.tv64 == 0)
+	if (timr->it.real.interval == 0)
 		return;
 
 	timr->it_overrun += (unsigned int) hrtimer_forward(timer,
@@ -449,7 +449,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 	timr = container_of(timer, struct k_itimer, it.real.timer);
 	spin_lock_irqsave(&timr->it_lock, flags);
 
-	if (timr->it.real.interval.tv64 != 0)
+	if (timr->it.real.interval != 0)
 		si_private = ++timr->it_requeue_pending;
 
 	if (posix_timer_event(timr, si_private)) {
@@ -458,7 +458,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 		 * we will not get a call back to restart it AND
 		 * it should be restarted.
 		 */
-		if (timr->it.real.interval.tv64 != 0) {
+		if (timr->it.real.interval != 0) {
 			ktime_t now = hrtimer_cb_get_time(timer);
 
 			/*
@@ -487,7 +487,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 			{
 				ktime_t kj = ktime_set(0, NSEC_PER_SEC / HZ);
 
-				if (timr->it.real.interval.tv64 < kj.tv64)
+				if (timr->it.real.interval < kj)
 					now = ktime_add(now, kj);
 			}
 #endif
@@ -743,7 +743,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
 	iv = timr->it.real.interval;
 
 	/* interval timer ? */
-	if (iv.tv64)
+	if (iv)
 		cur_setting->it_interval = ktime_to_timespec(iv);
 	else if (!hrtimer_active(timer) &&
 		 (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
@@ -756,13 +756,13 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
 	 * timer move the expiry time forward by intervals, so
 	 * expiry is > now.
 	 */
-	if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
-	    (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
+	if (iv && (timr->it_requeue_pending & REQUEUE_PENDING ||
+		   (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
 		timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
 
 	remaining = __hrtimer_expires_remaining_adjusted(timer, now);
 	/* Return 0 only, when the timer is expired and not pending */
-	if (remaining.tv64 <= 0) {
+	if (remaining <= 0) {
 		/*
 		 * A single shot SIGEV_NONE timer must return 0, when
 		 * it is expired !
@@ -839,7 +839,7 @@ common_timer_set(struct k_itimer *timr, int flags,
 		common_timer_get(timr, old_setting);
 
 	/* disable the timer */
-	timr->it.real.interval.tv64 = 0;
+	timr->it.real.interval = 0;
 	/*
 	 * careful here.  If smp we could be in the "fire" routine which will
 	 * be spinning as we hold the lock.  But this is ONLY an SMP issue.
@@ -924,7 +924,7 @@ retry:
 
 static int common_timer_del(struct k_itimer *timer)
 {
-	timer->it.real.interval.tv64 = 0;
+	timer->it.real.interval = 0;
 
 	if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0)
 		return TIMER_RETRY;
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index 690b797f522e..a7bb8f33ae07 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -97,7 +97,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
 	ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
 
 	if (clockevent_state_oneshot(&ce_broadcast_hrtimer))
-		if (ce_broadcast_hrtimer.next_event.tv64 != KTIME_MAX)
+		if (ce_broadcast_hrtimer.next_event != KTIME_MAX)
 			return HRTIMER_RESTART;
 
 	return HRTIMER_NORESTART;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index d2a20e83ebae..3109204c87cc 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -604,14 +604,14 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 	bool bc_local;
 
 	raw_spin_lock(&tick_broadcast_lock);
-	dev->next_event.tv64 = KTIME_MAX;
-	next_event.tv64 = KTIME_MAX;
+	dev->next_event = KTIME_MAX;
+	next_event = KTIME_MAX;
 	cpumask_clear(tmpmask);
 	now = ktime_get();
 	/* Find all expired events */
 	for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
 		td = &per_cpu(tick_cpu_device, cpu);
-		if (td->evtdev->next_event.tv64 <= now.tv64) {
+		if (td->evtdev->next_event <= now) {
 			cpumask_set_cpu(cpu, tmpmask);
 			/*
 			 * Mark the remote cpu in the pending mask, so
@@ -619,8 +619,8 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 			 * timer in tick_broadcast_oneshot_control().
 			 */
 			cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
-		} else if (td->evtdev->next_event.tv64 < next_event.tv64) {
-			next_event.tv64 = td->evtdev->next_event.tv64;
+		} else if (td->evtdev->next_event < next_event) {
+			next_event = td->evtdev->next_event;
 			next_cpu = cpu;
 		}
 	}
@@ -657,7 +657,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 	 * - There are pending events on sleeping CPUs which were not
 	 * in the event mask
 	 */
-	if (next_event.tv64 != KTIME_MAX)
+	if (next_event != KTIME_MAX)
 		tick_broadcast_set_event(dev, next_cpu, next_event);
 
 	raw_spin_unlock(&tick_broadcast_lock);
@@ -672,7 +672,7 @@ static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
 {
 	if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
 		return 0;
-	if (bc->next_event.tv64 == KTIME_MAX)
+	if (bc->next_event == KTIME_MAX)
 		return 0;
 	return bc->bound_on == cpu ? -EBUSY : 0;
 }
@@ -688,7 +688,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
 	if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
 		if (broadcast_needs_cpu(bc, smp_processor_id()))
 			return;
-		if (dev->next_event.tv64 < bc->next_event.tv64)
+		if (dev->next_event < bc->next_event)
 			return;
 	}
 	clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
@@ -754,7 +754,7 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 			 */
 			if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
 				ret = -EBUSY;
-			} else if (dev->next_event.tv64 < bc->next_event.tv64) {
+			} else if (dev->next_event < bc->next_event) {
 				tick_broadcast_set_event(bc, cpu, dev->next_event);
 				/*
 				 * In case of hrtimer broadcasts the
@@ -789,7 +789,7 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 			/*
 			 * Bail out if there is no next event.
 			 */
-			if (dev->next_event.tv64 == KTIME_MAX)
+			if (dev->next_event == KTIME_MAX)
 				goto out;
 			/*
 			 * If the pending bit is not set, then we are
@@ -824,7 +824,7 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 			 * nohz fixups.
 			 */
 			now = ktime_get();
-			if (dev->next_event.tv64 <= now.tv64) {
+			if (dev->next_event <= now) {
 				cpumask_set_cpu(cpu, tick_broadcast_force_mask);
 				goto out;
 			}
@@ -897,7 +897,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 						       tick_next_period);
 			tick_broadcast_set_event(bc, cpu, tick_next_period);
 		} else
-			bc->next_event.tv64 = KTIME_MAX;
+			bc->next_event = KTIME_MAX;
 	} else {
 		/*
 		 * The first cpu which switches to oneshot mode sets
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index b51344652330..6b009c207671 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -28,7 +28,7 @@ int tick_program_event(ktime_t expires, int force)
 {
 	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 
-	if (unlikely(expires.tv64 == KTIME_MAX)) {
+	if (unlikely(expires == KTIME_MAX)) {
 		/*
 		 * We don't need the clock event device any more, stop it.
 		 */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 71496a20e670..2c115fdab397 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -58,21 +58,21 @@ static void tick_do_update_jiffies64(ktime_t now)
 	 * Do a quick check without holding jiffies_lock:
 	 */
 	delta = ktime_sub(now, last_jiffies_update);
-	if (delta.tv64 < tick_period.tv64)
+	if (delta < tick_period)
 		return;
 
 	/* Reevaluate with jiffies_lock held */
 	write_seqlock(&jiffies_lock);
 
 	delta = ktime_sub(now, last_jiffies_update);
-	if (delta.tv64 >= tick_period.tv64) {
+	if (delta >= tick_period) {
 
 		delta = ktime_sub(delta, tick_period);
 		last_jiffies_update = ktime_add(last_jiffies_update,
 						tick_period);
 
 		/* Slow path for long timeouts */
-		if (unlikely(delta.tv64 >= tick_period.tv64)) {
+		if (unlikely(delta >= tick_period)) {
 			s64 incr = ktime_to_ns(tick_period);
 
 			ticks = ktime_divns(delta, incr);
@@ -101,7 +101,7 @@ static ktime_t tick_init_jiffy_update(void)
 
 	write_seqlock(&jiffies_lock);
 	/* Did we start the jiffies update yet ? */
-	if (last_jiffies_update.tv64 == 0)
+	if (last_jiffies_update == 0)
 		last_jiffies_update = tick_next_period;
 	period = last_jiffies_update;
 	write_sequnlock(&jiffies_lock);
@@ -669,7 +669,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	/* Read jiffies and the time when jiffies were updated last */
 	do {
 		seq = read_seqbegin(&jiffies_lock);
-		basemono = last_jiffies_update.tv64;
+		basemono = last_jiffies_update;
 		basejiff = jiffies;
 	} while (read_seqretry(&jiffies_lock, seq));
 	ts->last_jiffies = basejiff;
@@ -697,7 +697,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	 */
 	delta = next_tick - basemono;
 	if (delta <= (u64)TICK_NSEC) {
-		tick.tv64 = 0;
+		tick = 0;
 
 		/*
 		 * Tell the timer code that the base is not idle, i.e. undo
@@ -764,10 +764,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		expires = KTIME_MAX;
 
 	expires = min_t(u64, expires, next_tick);
-	tick.tv64 = expires;
+	tick = expires;
 
 	/* Skip reprogram of event if its not changed */
-	if (ts->tick_stopped && (expires == dev->next_event.tv64))
+	if (ts->tick_stopped && (expires == dev->next_event))
 		goto out;
 
 	/*
@@ -864,7 +864,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	}
 
 	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
-		ts->sleep_length = (ktime_t) { .tv64 = NSEC_PER_SEC/HZ };
+		ts->sleep_length = NSEC_PER_SEC / HZ;
 		return false;
 	}
 
@@ -914,7 +914,7 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
 		ts->idle_calls++;
 
 		expires = tick_nohz_stop_sched_tick(ts, now, cpu);
-		if (expires.tv64 > 0LL) {
+		if (expires > 0LL) {
 			ts->idle_sleeps++;
 			ts->idle_expires = expires;
 		}
@@ -1051,7 +1051,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 	struct pt_regs *regs = get_irq_regs();
 	ktime_t now = ktime_get();
 
-	dev->next_event.tv64 = KTIME_MAX;
+	dev->next_event = KTIME_MAX;
 
 	tick_sched_do_timer(now);
 	tick_sched_handle(ts, regs);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f4152a69277f..db087d7e106d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -104,7 +104,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
 	 */
 	set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
 					-tk->wall_to_monotonic.tv_nsec);
-	WARN_ON_ONCE(tk->offs_real.tv64 != timespec64_to_ktime(tmp).tv64);
+	WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp));
 	tk->wall_to_monotonic = wtm;
 	set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
 	tk->offs_real = timespec64_to_ktime(tmp);
@@ -571,7 +571,7 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
 static inline void tk_update_leap_state(struct timekeeper *tk)
 {
 	tk->next_leap_ktime = ntp_get_next_leap();
-	if (tk->next_leap_ktime.tv64 != KTIME_MAX)
+	if (tk->next_leap_ktime != KTIME_MAX)
 		/* Convert to monotonic time */
 		tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
 }
@@ -2250,7 +2250,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
 		}
 
 		/* Handle leapsecond insertion adjustments */
-		if (unlikely(base.tv64 >= tk->next_leap_ktime.tv64))
+		if (unlikely(base >= tk->next_leap_ktime))
 			*offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 782ae8ca2c06..adc6ee0a5126 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -48,7 +48,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 	while (*p) {
 		parent = *p;
 		ptr = rb_entry(parent, struct timerqueue_node, node);
-		if (node->expires.tv64 < ptr->expires.tv64)
+		if (node->expires < ptr->expires)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
@@ -56,7 +56,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 	rb_link_node(&node->node, parent, p);
 	rb_insert_color(&node->node, &head->head);
 
-	if (!head->next || node->expires.tv64 < head->next->expires.tv64) {
+	if (!head->next || node->expires < head->next->expires) {
 		head->next = node;
 		return true;
 	}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 436a7537e6a9..ab8ba1e16473 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -199,11 +199,11 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 
 		seq_printf(m, "%c ", (op->flags & RX_CHECK_DLC) ? 'd' : ' ');
 
-		if (op->kt_ival1.tv64)
+		if (op->kt_ival1)
 			seq_printf(m, "timeo=%lld ",
 				   (long long)ktime_to_us(op->kt_ival1));
 
-		if (op->kt_ival2.tv64)
+		if (op->kt_ival2)
 			seq_printf(m, "thr=%lld ",
 				   (long long)ktime_to_us(op->kt_ival2));
 
@@ -226,11 +226,11 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 		else
 			seq_printf(m, "[%u] ", op->nframes);
 
-		if (op->kt_ival1.tv64)
+		if (op->kt_ival1)
 			seq_printf(m, "t1=%lld ",
 				   (long long)ktime_to_us(op->kt_ival1));
 
-		if (op->kt_ival2.tv64)
+		if (op->kt_ival2)
 			seq_printf(m, "t2=%lld ",
 				   (long long)ktime_to_us(op->kt_ival2));
 
@@ -365,11 +365,11 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 
 static void bcm_tx_start_timer(struct bcm_op *op)
 {
-	if (op->kt_ival1.tv64 && op->count)
+	if (op->kt_ival1 && op->count)
 		hrtimer_start(&op->timer,
 			      ktime_add(ktime_get(), op->kt_ival1),
 			      HRTIMER_MODE_ABS);
-	else if (op->kt_ival2.tv64)
+	else if (op->kt_ival2)
 		hrtimer_start(&op->timer,
 			      ktime_add(ktime_get(), op->kt_ival2),
 			      HRTIMER_MODE_ABS);
@@ -380,7 +380,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
 	struct bcm_op *op = (struct bcm_op *)data;
 	struct bcm_msg_head msg_head;
 
-	if (op->kt_ival1.tv64 && (op->count > 0)) {
+	if (op->kt_ival1 && (op->count > 0)) {
 
 		op->count--;
 		if (!op->count && (op->flags & TX_COUNTEVT)) {
@@ -398,7 +398,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
 		}
 		bcm_can_tx(op);
 
-	} else if (op->kt_ival2.tv64)
+	} else if (op->kt_ival2)
 		bcm_can_tx(op);
 
 	bcm_tx_start_timer(op);
@@ -459,7 +459,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
 	lastdata->flags |= (RX_RECV|RX_THR);
 
 	/* throttling mode inactive ? */
-	if (!op->kt_ival2.tv64) {
+	if (!op->kt_ival2) {
 		/* send RX_CHANGED to the user immediately */
 		bcm_rx_changed(op, lastdata);
 		return;
@@ -470,7 +470,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
 		return;
 
 	/* first reception with enabled throttling mode */
-	if (!op->kt_lastmsg.tv64)
+	if (!op->kt_lastmsg)
 		goto rx_changed_settime;
 
 	/* got a second frame inside a potential throttle period? */
@@ -537,7 +537,7 @@ static void bcm_rx_starttimer(struct bcm_op *op)
 	if (op->flags & RX_NO_AUTOTIMER)
 		return;
 
-	if (op->kt_ival1.tv64)
+	if (op->kt_ival1)
 		hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
 }
 
@@ -1005,7 +1005,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
 
 		/* disable an active timer due to zero values? */
-		if (!op->kt_ival1.tv64 && !op->kt_ival2.tv64)
+		if (!op->kt_ival1 && !op->kt_ival2)
 			hrtimer_cancel(&op->timer);
 	}
 
@@ -1189,7 +1189,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
 
 			/* disable an active timer due to zero value? */
-			if (!op->kt_ival1.tv64)
+			if (!op->kt_ival1)
 				hrtimer_cancel(&op->timer);
 
 			/*
@@ -1201,7 +1201,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			bcm_rx_thr_flush(op, 1);
 		}
 
-		if ((op->flags & STARTTIMER) && op->kt_ival1.tv64)
+		if ((op->flags & STARTTIMER) && op->kt_ival1)
 			hrtimer_start(&op->timer, op->kt_ival1,
 				      HRTIMER_MODE_REL);
 	}
diff --git a/net/can/gw.c b/net/can/gw.c
index 455168718c2e..a54ab0c82104 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -429,7 +429,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 
 	/* clear the skb timestamp if not configured the other way */
 	if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP))
-		nskb->tstamp.tv64 = 0;
+		nskb->tstamp = 0;
 
 	/* send to netdevice */
 	if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO))
diff --git a/net/core/dev.c b/net/core/dev.c
index 037ffd27fcc2..8db5a0b4b520 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1731,14 +1731,14 @@ EXPORT_SYMBOL(net_disable_timestamp);
 
 static inline void net_timestamp_set(struct sk_buff *skb)
 {
-	skb->tstamp.tv64 = 0;
+	skb->tstamp = 0;
 	if (static_key_false(&netstamp_needed))
 		__net_timestamp(skb);
 }
 
 #define net_timestamp_check(COND, SKB)			\
 	if (static_key_false(&netstamp_needed)) {		\
-		if ((COND) && !(SKB)->tstamp.tv64)	\
+		if ((COND) && !(SKB)->tstamp)	\
 			__net_timestamp(SKB);		\
 	}						\
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e77f40616fea..5a03730fbc1a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4368,7 +4368,7 @@ EXPORT_SYMBOL(skb_try_coalesce);
  */
 void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 {
-	skb->tstamp.tv64 = 0;
+	skb->tstamp = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->skb_iif = 0;
 	skb->ignore_df = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 31a255b555ad..1d5331a1b1dc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1038,7 +1038,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
 
 	/* Our usage of tstamp should remain private */
-	skb->tstamp.tv64 = 0;
+	skb->tstamp = 0;
 
 	/* Cleanup our debris for IP stacks */
 	memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
@@ -3203,7 +3203,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 #endif
 
 	/* Do not fool tcpdump (if any), clean our debris */
-	skb->tstamp.tv64 = 0;
+	skb->tstamp = 0;
 	return skb;
 }
 EXPORT_SYMBOL(tcp_make_synack);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 926818c331e5..e4198502fd98 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -232,7 +232,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
 	ipv6h->saddr = hao->addr;
 	hao->addr = tmp_addr;
 
-	if (skb->tstamp.tv64 == 0)
+	if (skb->tstamp == 0)
 		__net_timestamp(skb);
 
 	return true;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index e07f22b0c58a..8a9219ff2e77 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1809,7 +1809,7 @@ static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 	rc = skb_copy_datagram_msg(skb, sizeof(struct ipxhdr), msg, copied);
 	if (rc)
 		goto out_free;
-	if (skb->tstamp.tv64)
+	if (skb->tstamp)
 		sk->sk_stamp = skb->tstamp;
 
 	if (sipx) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6a0bbfa8e702..3a073cd9fcf4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -783,7 +783,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	/* set conntrack timestamp, if enabled. */
 	tstamp = nf_conn_tstamp_find(ct);
 	if (tstamp) {
-		if (skb->tstamp.tv64 == 0)
+		if (skb->tstamp == 0)
 			__net_timestamp(skb);
 
 		tstamp->start = ktime_to_ns(skb->tstamp);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 200922bb2036..08247bf7d7b8 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -538,7 +538,7 @@ __build_packet_message(struct nfnl_log_net *log,
 			goto nla_put_failure;
 	}
 
-	if (skb->tstamp.tv64) {
+	if (skb->tstamp) {
 		struct nfulnl_msg_packet_timestamp ts;
 		struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
 		ts.sec = cpu_to_be64(kts.tv_sec);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index be7627b80400..3ee0b8a000a4 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -384,7 +384,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		+ nla_total_size(sizeof(u_int32_t))	/* skbinfo */
 		+ nla_total_size(sizeof(u_int32_t));	/* cap_len */
 
-	if (entskb->tstamp.tv64)
+	if (entskb->tstamp)
 		size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
 
 	size += nfqnl_get_bridge_size(entry);
@@ -555,7 +555,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	if (nfqnl_put_bridge(entry, skb) < 0)
 		goto nla_put_failure;
 
-	if (entskb->tstamp.tv64) {
+	if (entskb->tstamp) {
 		struct nfqnl_msg_packet_timestamp ts;
 		struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 0ae55a36f492..1b01eec1fbda 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -168,7 +168,7 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	 * may happen that the same packet matches both rules if
 	 * it arrived at the right moment before 13:00.
 	 */
-	if (skb->tstamp.tv64 == 0)
+	if (skb->tstamp == 0)
 		__net_timestamp((struct sk_buff *)skb);
 
 	stamp = ktime_to_ns(skb->tstamp);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b7e4097bfdab..bcfadfdea8e0 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -627,7 +627,7 @@ deliver:
 			 * from the network (tstamp will be updated).
 			 */
 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
-				skb->tstamp.tv64 = 0;
+				skb->tstamp = 0;
 #endif
 
 			if (q->qdisc) {
diff --git a/net/socket.c b/net/socket.c
index 5ff26c44db33..8487bf136e5c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -668,7 +668,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 
 	/* Race occurred between timestamp enabling and packet
 	   receiving.  Fill in the current time for now. */
-	if (need_software_tstamp && skb->tstamp.tv64 == 0)
+	if (need_software_tstamp && skb->tstamp == 0)
 		__net_timestamp(skb);
 
 	if (need_software_tstamp) {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a3e85ee28b5a..de066acdb34e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -574,7 +574,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 	}
 	len = svc_addr_len(svc_addr(rqstp));
 	rqstp->rq_addrlen = len;
-	if (skb->tstamp.tv64 == 0) {
+	if (skb->tstamp == 0) {
 		skb->tstamp = ktime_get_real();
 		/* Don't enable netstamp, sunrpc doesn't
 		   need that much accuracy */
diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c
index e2f27022b363..1ac0c423903e 100644
--- a/sound/core/hrtimer.c
+++ b/sound/core/hrtimer.c
@@ -58,7 +58,7 @@ static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt)
 
 	/* calculate the drift */
 	delta = ktime_sub(hrt->base->get_time(), hrtimer_get_expires(hrt));
-	if (delta.tv64 > 0)
+	if (delta > 0)
 		ticks += ktime_divns(delta, ticks * resolution);
 
 	snd_timer_interrupt(stime->timer, ticks);
-- 
cgit v1.2.3


From 8b0e195314fabd58a331c4f7b6db75a1565535d7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Dec 2016 12:30:41 +0100
Subject: ktime: Cleanup ktime_set() usage

ktime_set(S,N) was required for the timespec storage type and is still
useful for situations where a Seconds and Nanoseconds part of a time value
needs to be converted. For anything where the Seconds argument is 0, this
is pointless and can be replaced with a simple assignment.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 arch/powerpc/kvm/book3s_hv.c                   | 3 +--
 arch/powerpc/oprofile/cell/spu_profiler.c      | 4 ++--
 arch/s390/kvm/interrupt.c                      | 2 +-
 arch/x86/kvm/lapic.c                           | 4 ++--
 block/blk-mq.c                                 | 2 +-
 drivers/base/power/main.c                      | 2 +-
 drivers/base/power/wakeup.c                    | 2 +-
 drivers/block/null_blk.c                       | 2 +-
 drivers/dma/dmatest.c                          | 4 ++--
 drivers/gpu/drm/amd/amdgpu/dce_virtual.c       | 6 +++---
 drivers/gpu/drm/i915/intel_uncore.c            | 2 +-
 drivers/gpu/drm/nouveau/nouveau_fence.c        | 2 +-
 drivers/gpu/drm/tilcdc/tilcdc_crtc.c           | 2 +-
 drivers/iio/trigger/iio-trig-hrtimer.c         | 5 ++---
 drivers/input/joystick/walkera0701.c           | 2 +-
 drivers/mailbox/mailbox.c                      | 3 +--
 drivers/media/dvb-core/dmxdev.c                | 2 +-
 drivers/media/pci/cx88/cx88-input.c            | 6 ++----
 drivers/media/pci/pt3/pt3.c                    | 2 +-
 drivers/net/can/softing/softing_fw.c           | 4 ++--
 drivers/net/can/softing/softing_main.c         | 2 +-
 drivers/net/ethernet/ec_bhf.c                  | 5 ++---
 drivers/net/ethernet/marvell/mvpp2.c           | 2 +-
 drivers/net/ethernet/tile/tilegx.c             | 4 ++--
 drivers/net/ieee802154/at86rf230.c             | 9 ++++-----
 drivers/net/usb/cdc_ncm.c                      | 2 +-
 drivers/net/wireless/ralink/rt2x00/rt2800usb.c | 4 ++--
 drivers/pci/quirks.c                           | 2 +-
 drivers/platform/x86/msi-wmi.c                 | 2 +-
 drivers/power/reset/ltc2952-poweroff.c         | 2 +-
 drivers/rtc/interface.c                        | 8 ++++----
 drivers/s390/crypto/ap_bus.c                   | 4 ++--
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c       | 2 +-
 drivers/scsi/scsi_debug.c                      | 2 +-
 drivers/scsi/ufs/ufshcd.c                      | 4 ++--
 drivers/usb/gadget/function/f_ncm.c            | 3 +--
 drivers/usb/host/ehci-timer.c                  | 3 +--
 drivers/usb/host/fotg210-hcd.c                 | 3 +--
 drivers/usb/musb/musb_cppi41.c                 | 9 ++++-----
 fs/dlm/lock.c                                  | 2 +-
 fs/gfs2/glock.c                                | 2 +-
 fs/timerfd.c                                   | 2 +-
 include/linux/skbuff.h                         | 2 +-
 kernel/sched/core.c                            | 2 +-
 kernel/time/alarmtimer.c                       | 4 ++--
 kernel/time/hrtimer.c                          | 2 +-
 kernel/time/posix-timers.c                     | 2 +-
 kernel/time/tick-common.c                      | 4 ++--
 net/can/bcm.c                                  | 4 ++--
 net/mac802154/util.c                           | 4 ++--
 net/sched/sch_cbq.c                            | 2 +-
 net/sctp/transport.c                           | 2 +-
 net/xfrm/xfrm_state.c                          | 2 +-
 sound/drivers/pcsp/pcsp_lib.c                  | 2 +-
 sound/firewire/lib.c                           | 6 +++---
 sound/sh/sh_dac_audio.c                        | 2 +-
 56 files changed, 84 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 66b2a35be424..ec34e39471a7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1872,8 +1872,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 	}
 	dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
 		   / tb_ticks_per_sec;
-	hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
-		      HRTIMER_MODE_REL);
+	hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
 	vcpu->arch.timer_running = 1;
 }
 
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index b19265de9178..5182f2936af2 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -180,7 +180,7 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
 	smp_wmb();	/* insure spu event buffer updates are written */
 			/* don't want events intermingled... */
 
-	kt = ktime_set(0, profiling_interval);
+	kt = profiling_interval;
 	if (!spu_prof_running)
 		goto stop;
 	hrtimer_forward(timer, timer->base->get_time(), kt);
@@ -204,7 +204,7 @@ int start_spu_profiling_cycles(unsigned int cycles_reset)
 	ktime_t kt;
 
 	pr_debug("timer resolution: %lu\n", TICK_NSEC);
-	kt = ktime_set(0, profiling_interval);
+	kt = profiling_interval;
 	hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer_set_expires(&timer, kt);
 	timer.function = profile_spus;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 6843dd5a1cba..0f8f14199734 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1019,7 +1019,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 		return 0;
 
 	__set_cpu_idle(vcpu);
-	hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
+	hrtimer_start(&vcpu->arch.ckc_timer, sltime, HRTIMER_MODE_REL);
 	VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
 no_timer:
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 34a66b2d47e6..5fe290c1b7d8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1106,7 +1106,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
 	now = ktime_get();
 	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
 	if (ktime_to_ns(remaining) < 0)
-		remaining = ktime_set(0, 0);
+		remaining = 0;
 
 	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
 	tmcct = div64_u64(ns,
@@ -2057,7 +2057,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 			apic->lapic_timer.tscdeadline = 0;
 		if (apic_lvtt_oneshot(apic)) {
 			apic->lapic_timer.tscdeadline = 0;
-			apic->lapic_timer.target_expiration = ktime_set(0, 0);
+			apic->lapic_timer.target_expiration = 0;
 		}
 		atomic_set(&apic->lapic_timer.pending, 0);
 	}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4bf850e8d6b5..a8e67a155d04 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2569,7 +2569,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 	 * This will be replaced with the stats tracking code, using
 	 * 'avg_completion_time / 2' as the pre-sleep target.
 	 */
-	kt = ktime_set(0, nsecs);
+	kt = nsecs;
 
 	mode = HRTIMER_MODE_REL;
 	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 48c6294e9c34..249e0304597f 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -194,7 +194,7 @@ void device_pm_move_last(struct device *dev)
 
 static ktime_t initcall_debug_start(struct device *dev)
 {
-	ktime_t calltime = ktime_set(0, 0);
+	ktime_t calltime = 0;
 
 	if (pm_print_times_enabled) {
 		pr_info("calling  %s+ @ %i, parent: %s\n",
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 94332902a1cf..f546f8f107b0 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -1005,7 +1005,7 @@ static int print_wakeup_source_stats(struct seq_file *m,
 			prevent_sleep_time = ktime_add(prevent_sleep_time,
 				ktime_sub(now, ws->start_prevent_time));
 	} else {
-		active_time = ktime_set(0, 0);
+		active_time = 0;
 	}
 
 	seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n",
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 4943ee22716e..c0e14e54909b 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -257,7 +257,7 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
 
 static void null_cmd_end_timer(struct nullb_cmd *cmd)
 {
-	ktime_t kt = ktime_set(0, completion_nsec);
+	ktime_t kt = completion_nsec;
 
 	hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
 }
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 451f899f74e4..c9297605058c 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -429,8 +429,8 @@ static int dmatest_func(void *data)
 	int			dst_cnt;
 	int			i;
 	ktime_t			ktime, start, diff;
-	ktime_t			filltime = ktime_set(0, 0);
-	ktime_t			comparetime = ktime_set(0, 0);
+	ktime_t			filltime = 0;
+	ktime_t			comparetime = 0;
 	s64			runtime = 0;
 	unsigned long long	total_len = 0;
 	u8			align = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index e4a5a5ac0ff3..762f8e82ceb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -752,7 +752,7 @@ static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vbla
 
 	drm_handle_vblank(ddev, amdgpu_crtc->crtc_id);
 	dce_virtual_pageflip(adev, amdgpu_crtc->crtc_id);
-	hrtimer_start(vblank_timer, ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD),
+	hrtimer_start(vblank_timer, DCE_VIRTUAL_VBLANK_PERIOD,
 		      HRTIMER_MODE_REL);
 
 	return HRTIMER_NORESTART;
@@ -772,11 +772,11 @@ static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *ad
 		hrtimer_init(&adev->mode_info.crtcs[crtc]->vblank_timer,
 			     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		hrtimer_set_expires(&adev->mode_info.crtcs[crtc]->vblank_timer,
-				    ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD));
+				    DCE_VIRTUAL_VBLANK_PERIOD);
 		adev->mode_info.crtcs[crtc]->vblank_timer.function =
 			dce_virtual_vblank_timer_handle;
 		hrtimer_start(&adev->mode_info.crtcs[crtc]->vblank_timer,
-			      ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD), HRTIMER_MODE_REL);
+			      DCE_VIRTUAL_VBLANK_PERIOD, HRTIMER_MODE_REL);
 	} else if (!state && adev->mode_info.crtcs[crtc]->vsync_timer_enabled) {
 		DRM_DEBUG("Disable software vsync timer\n");
 		hrtimer_cancel(&adev->mode_info.crtcs[crtc]->vblank_timer);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index d7be0d94ba4d..0bffd3f0c15d 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -62,7 +62,7 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
 {
 	d->wake_count++;
 	hrtimer_start_range_ns(&d->timer,
-			       ktime_set(0, NSEC_PER_MSEC),
+			       NSEC_PER_MSEC,
 			       NSEC_PER_MSEC,
 			       HRTIMER_MODE_REL);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index f2f348f0160c..a6126c93f215 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -330,7 +330,7 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait)
 		__set_current_state(intr ? TASK_INTERRUPTIBLE :
 					   TASK_UNINTERRUPTIBLE);
 
-		kt = ktime_set(0, sleep_time);
+		kt = sleep_time;
 		schedule_hrtimeout(&kt, HRTIMER_MODE_REL);
 		sleep_time *= 2;
 		if (sleep_time > NSEC_PER_MSEC)
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 9942b0577d6e..725dffad5640 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -539,7 +539,7 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown)
 	}
 
 	drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
-	tilcdc_crtc->last_vblank = ktime_set(0, 0);
+	tilcdc_crtc->last_vblank = 0;
 
 	tilcdc_crtc->enabled = false;
 	mutex_unlock(&tilcdc_crtc->enable_lock);
diff --git a/drivers/iio/trigger/iio-trig-hrtimer.c b/drivers/iio/trigger/iio-trig-hrtimer.c
index 5e6d451febeb..a1cad6cc2e0f 100644
--- a/drivers/iio/trigger/iio-trig-hrtimer.c
+++ b/drivers/iio/trigger/iio-trig-hrtimer.c
@@ -63,7 +63,7 @@ ssize_t iio_hrtimer_store_sampling_frequency(struct device *dev,
 		return -EINVAL;
 
 	info->sampling_frequency = val;
-	info->period = ktime_set(0, NSEC_PER_SEC / val);
+	info->period = NSEC_PER_SEC / val;
 
 	return len;
 }
@@ -141,8 +141,7 @@ static struct iio_sw_trigger *iio_trig_hrtimer_probe(const char *name)
 	trig_info->timer.function = iio_hrtimer_trig_handler;
 
 	trig_info->sampling_frequency = HRTIMER_DEFAULT_SAMPLING_FREQUENCY;
-	trig_info->period = ktime_set(0, NSEC_PER_SEC /
-				      trig_info->sampling_frequency);
+	trig_info->period = NSEC_PER_SEC / trig_info->sampling_frequency;
 
 	ret = iio_trigger_register(trig_info->swt.trigger);
 	if (ret)
diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c
index 70a893a17467..36a5b93156ed 100644
--- a/drivers/input/joystick/walkera0701.c
+++ b/drivers/input/joystick/walkera0701.c
@@ -165,7 +165,7 @@ static void walkera0701_irq_handler(void *handler_data)
 				RESERVE + BIN1_PULSE - BIN0_PULSE)	/* frame sync .. */
 		w->counter = 0;
 
-	hrtimer_start(&w->timer, ktime_set(0, BIN_SAMPLE), HRTIMER_MODE_REL);
+	hrtimer_start(&w->timer, BIN_SAMPLE, HRTIMER_MODE_REL);
 }
 
 static enum hrtimer_restart timer_handler(struct hrtimer
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 4a36632c236f..4671f8a12872 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -87,8 +87,7 @@ exit:
 
 	if (!err && (chan->txdone_method & TXDONE_BY_POLL))
 		/* kick start the timer immediately to avoid delays */
-		hrtimer_start(&chan->mbox->poll_hrt, ktime_set(0, 0),
-			      HRTIMER_MODE_REL);
+		hrtimer_start(&chan->mbox->poll_hrt, 0, HRTIMER_MODE_REL);
 }
 
 static void tx_tick(struct mbox_chan *chan, int r)
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index 0c44479b556e..0c16bb213101 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -562,7 +562,7 @@ static int dvb_dmxdev_start_feed(struct dmxdev *dmxdev,
 				 struct dmxdev_filter *filter,
 				 struct dmxdev_feed *feed)
 {
-	ktime_t timeout = ktime_set(0, 0);
+	ktime_t timeout = 0;
 	struct dmx_pes_filter_params *para = &filter->params.pes;
 	dmx_output_t otype;
 	int ret;
diff --git a/drivers/media/pci/cx88/cx88-input.c b/drivers/media/pci/cx88/cx88-input.c
index dcfea3502e42..c7b3cb406499 100644
--- a/drivers/media/pci/cx88/cx88-input.c
+++ b/drivers/media/pci/cx88/cx88-input.c
@@ -178,8 +178,7 @@ static enum hrtimer_restart cx88_ir_work(struct hrtimer *timer)
 	struct cx88_IR *ir = container_of(timer, struct cx88_IR, timer);
 
 	cx88_ir_handle_key(ir);
-	missed = hrtimer_forward_now(&ir->timer,
-				     ktime_set(0, ir->polling * 1000000));
+	missed = hrtimer_forward_now(&ir->timer, ir->polling * 1000000);
 	if (missed > 1)
 		ir_dprintk("Missed ticks %ld\n", missed - 1);
 
@@ -199,8 +198,7 @@ static int __cx88_ir_start(void *priv)
 	if (ir->polling) {
 		hrtimer_init(&ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		ir->timer.function = cx88_ir_work;
-		hrtimer_start(&ir->timer,
-			      ktime_set(0, ir->polling * 1000000),
+		hrtimer_start(&ir->timer, ir->polling * 1000000,
 			      HRTIMER_MODE_REL);
 	}
 	if (ir->sampling) {
diff --git a/drivers/media/pci/pt3/pt3.c b/drivers/media/pci/pt3/pt3.c
index 7fb649e523f4..77f4d15f322b 100644
--- a/drivers/media/pci/pt3/pt3.c
+++ b/drivers/media/pci/pt3/pt3.c
@@ -463,7 +463,7 @@ static int pt3_fetch_thread(void *data)
 
 		pt3_proc_dma(adap);
 
-		delay = ktime_set(0, PT3_FETCH_DELAY * NSEC_PER_MSEC);
+		delay = PT3_FETCH_DELAY * NSEC_PER_MSEC;
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		freezable_schedule_hrtimeout_range(&delay,
 					PT3_FETCH_DELAY_DELTA * NSEC_PER_MSEC,
diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c
index 52fe50725d74..4063215c9b54 100644
--- a/drivers/net/can/softing/softing_fw.c
+++ b/drivers/net/can/softing/softing_fw.c
@@ -390,7 +390,7 @@ static void softing_initialize_timestamp(struct softing *card)
 	ovf = 0x100000000ULL * 16;
 	do_div(ovf, card->pdat->freq ?: 16);
 
-	card->ts_overflow = ktime_add_us(ktime_set(0, 0), ovf);
+	card->ts_overflow = ktime_add_us(0, ovf);
 }
 
 ktime_t softing_raw2ktime(struct softing *card, u32 raw)
@@ -647,7 +647,7 @@ int softing_startstop(struct net_device *dev, int up)
 		open_candev(netdev);
 		if (dev != netdev) {
 			/* notify other busses on the restart */
-			softing_netdev_rx(netdev, &msg, ktime_set(0, 0));
+			softing_netdev_rx(netdev, &msg, 0);
 			++priv->can.can_stats.restarts;
 		}
 		netif_wake_queue(netdev);
diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
index 7621f91a8a20..5f64deec9f6c 100644
--- a/drivers/net/can/softing/softing_main.c
+++ b/drivers/net/can/softing/softing_main.c
@@ -192,7 +192,7 @@ static int softing_handle_1(struct softing *card)
 				/* a dead bus has no overflows */
 				continue;
 			++netdev->stats.rx_over_errors;
-			softing_netdev_rx(netdev, &msg, ktime_set(0, 0));
+			softing_netdev_rx(netdev, &msg, 0);
 		}
 		/* prepare for other use */
 		memset(&msg, 0, sizeof(msg));
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index 57650953ff83..7bf78a0d322c 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -253,7 +253,7 @@ static enum hrtimer_restart ec_bhf_timer_fun(struct hrtimer *timer)
 	if (!netif_running(priv->net_dev))
 		return HRTIMER_NORESTART;
 
-	hrtimer_forward_now(timer, ktime_set(0, polling_frequency));
+	hrtimer_forward_now(timer, polling_frequency);
 	return HRTIMER_RESTART;
 }
 
@@ -427,8 +427,7 @@ static int ec_bhf_open(struct net_device *net_dev)
 
 	hrtimer_init(&priv->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	priv->hrtimer.function = ec_bhf_timer_fun;
-	hrtimer_start(&priv->hrtimer, ktime_set(0, polling_frequency),
-		      HRTIMER_MODE_REL);
+	hrtimer_start(&priv->hrtimer, polling_frequency, HRTIMER_MODE_REL);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index cda04b3126bc..4fe430ceb194 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -4913,7 +4913,7 @@ static void mvpp2_timer_set(struct mvpp2_port_pcpu *port_pcpu)
 
 	if (!port_pcpu->timer_scheduled) {
 		port_pcpu->timer_scheduled = true;
-		interval = ktime_set(0, MVPP2_TXDONE_HRTIMER_PERIOD_NS);
+		interval = MVPP2_TXDONE_HRTIMER_PERIOD_NS;
 		hrtimer_start(&port_pcpu->tx_done_timer, interval,
 			      HRTIMER_MODE_REL_PINNED);
 	}
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 0aaf975bb347..2255f9a6f3bc 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -751,7 +751,7 @@ static void tile_net_schedule_tx_wake_timer(struct net_device *dev,
 		&info->mpipe[instance].tx_wake[priv->echannel];
 
 	hrtimer_start(&tx_wake->timer,
-		      ktime_set(0, TX_TIMER_DELAY_USEC * 1000UL),
+		      TX_TIMER_DELAY_USEC * 1000UL,
 		      HRTIMER_MODE_REL_PINNED);
 }
 
@@ -770,7 +770,7 @@ static void tile_net_schedule_egress_timer(void)
 
 	if (!info->egress_timer_scheduled) {
 		hrtimer_start(&info->egress_timer,
-			      ktime_set(0, EGRESS_TIMER_DELAY_USEC * 1000UL),
+			      EGRESS_TIMER_DELAY_USEC * 1000UL,
 			      HRTIMER_MODE_REL_PINNED);
 		info->egress_timer_scheduled = true;
 	}
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 057025722e3d..46d53a6c8cf8 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -510,7 +510,7 @@ at86rf230_async_state_delay(void *context)
 	case STATE_TRX_OFF:
 		switch (ctx->to_state) {
 		case STATE_RX_AACK_ON:
-			tim = ktime_set(0, c->t_off_to_aack * NSEC_PER_USEC);
+			tim = c->t_off_to_aack * NSEC_PER_USEC;
 			/* state change from TRX_OFF to RX_AACK_ON to do a
 			 * calibration, we need to reset the timeout for the
 			 * next one.
@@ -519,7 +519,7 @@ at86rf230_async_state_delay(void *context)
 			goto change;
 		case STATE_TX_ARET_ON:
 		case STATE_TX_ON:
-			tim = ktime_set(0, c->t_off_to_tx_on * NSEC_PER_USEC);
+			tim = c->t_off_to_tx_on * NSEC_PER_USEC;
 			/* state change from TRX_OFF to TX_ON or ARET_ON to do
 			 * a calibration, we need to reset the timeout for the
 			 * next one.
@@ -539,8 +539,7 @@ at86rf230_async_state_delay(void *context)
 			 * to TX_ON or TRX_OFF.
 			 */
 			if (!force) {
-				tim = ktime_set(0, (c->t_frame + c->t_p_ack) *
-						   NSEC_PER_USEC);
+				tim = (c->t_frame + c->t_p_ack) * NSEC_PER_USEC;
 				goto change;
 			}
 			break;
@@ -552,7 +551,7 @@ at86rf230_async_state_delay(void *context)
 	case STATE_P_ON:
 		switch (ctx->to_state) {
 		case STATE_TRX_OFF:
-			tim = ktime_set(0, c->t_reset_to_off * NSEC_PER_USEC);
+			tim = c->t_reset_to_off * NSEC_PER_USEC;
 			goto change;
 		default:
 			break;
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2d1a6f2e16ab..f317984f7536 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1282,7 +1282,7 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx)
 	/* start timer, if not already started */
 	if (!(hrtimer_active(&ctx->tx_timer) || atomic_read(&ctx->stop)))
 		hrtimer_start(&ctx->tx_timer,
-				ktime_set(0, ctx->timer_interval),
+				ctx->timer_interval,
 				HRTIMER_MODE_REL);
 }
 
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
index 9f61293f1a56..f38c44061b5b 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
@@ -177,7 +177,7 @@ static bool rt2800usb_tx_sta_fifo_read_completed(struct rt2x00_dev *rt2x00dev,
 	if (rt2800usb_txstatus_pending(rt2x00dev)) {
 		/* Read register after 1 ms */
 		hrtimer_start(&rt2x00dev->txstatus_timer,
-			      ktime_set(0, TXSTATUS_READ_INTERVAL),
+			      TXSTATUS_READ_INTERVAL,
 			      HRTIMER_MODE_REL);
 		return false;
 	}
@@ -204,7 +204,7 @@ static void rt2800usb_async_read_tx_status(struct rt2x00_dev *rt2x00dev)
 
 	/* Read TX_STA_FIFO register after 2 ms */
 	hrtimer_start(&rt2x00dev->txstatus_timer,
-		      ktime_set(0, 2*TXSTATUS_READ_INTERVAL),
+		      2 * TXSTATUS_READ_INTERVAL,
 		      HRTIMER_MODE_REL);
 }
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 9236e40ac055..1800befa8b8b 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3044,7 +3044,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb);
 static ktime_t fixup_debug_start(struct pci_dev *dev,
 				 void (*fn)(struct pci_dev *dev))
 {
-	ktime_t calltime = ktime_set(0, 0);
+	ktime_t calltime = 0;
 
 	dev_dbg(&dev->dev, "calling %pF\n", fn);
 	if (initcall_debug) {
diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index 978e6d640572..9a32f8627ecc 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c
@@ -283,7 +283,7 @@ static int __init msi_wmi_input_setup(void)
 	if (err)
 		goto err_free_keymap;
 
-	last_pressed = ktime_set(0, 0);
+	last_pressed = 0;
 
 	return 0;
 
diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c
index 15fed9d8f871..bfcd6fba6363 100644
--- a/drivers/power/reset/ltc2952-poweroff.c
+++ b/drivers/power/reset/ltc2952-poweroff.c
@@ -169,7 +169,7 @@ static void ltc2952_poweroff_kill(void)
 
 static void ltc2952_poweroff_default(struct ltc2952_poweroff *data)
 {
-	data->wde_interval = ktime_set(0, 300L*1E6L);
+	data->wde_interval = 300L * 1E6L;
 	data->trigger_delay = ktime_set(2, 500L*1E6L);
 
 	hrtimer_init(&data->timer_trigger, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 5cf196dfc193..fc0fa7577636 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -363,7 +363,7 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 		rtc_timer_remove(rtc, &rtc->aie_timer);
 
 	rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
-	rtc->aie_timer.period = ktime_set(0, 0);
+	rtc->aie_timer.period = 0;
 	if (alarm->enabled)
 		err = rtc_timer_enqueue(rtc, &rtc->aie_timer);
 
@@ -391,7 +391,7 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 		return err;
 
 	rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
-	rtc->aie_timer.period = ktime_set(0, 0);
+	rtc->aie_timer.period = 0;
 
 	/* Alarm has to be enabled & in the future for us to enqueue it */
 	if (alarm->enabled && (rtc_tm_to_ktime(now) <
@@ -554,7 +554,7 @@ enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer)
 	int count;
 	rtc = container_of(timer, struct rtc_device, pie_timer);
 
-	period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq);
+	period = NSEC_PER_SEC / rtc->irq_freq;
 	count = hrtimer_forward_now(timer, period);
 
 	rtc_handle_legacy_irq(rtc, count, RTC_PF);
@@ -665,7 +665,7 @@ static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
 		return -1;
 
 	if (enabled) {
-		ktime_t period = ktime_set(0, NSEC_PER_SEC / rtc->irq_freq);
+		ktime_t period = NSEC_PER_SEC / rtc->irq_freq;
 
 		hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL);
 	}
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 6d75984a3d85..5fa699192864 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -333,7 +333,7 @@ void ap_wait(enum ap_wait wait)
 	case AP_WAIT_TIMEOUT:
 		spin_lock_bh(&ap_poll_timer_lock);
 		if (!hrtimer_is_queued(&ap_poll_timer)) {
-			hr_time = ktime_set(0, poll_timeout);
+			hr_time = poll_timeout;
 			hrtimer_forward_now(&ap_poll_timer, hr_time);
 			hrtimer_restart(&ap_poll_timer);
 		}
@@ -860,7 +860,7 @@ static ssize_t poll_timeout_store(struct bus_type *bus, const char *buf,
 	    time > 120000000000ULL)
 		return -EINVAL;
 	poll_timeout = time;
-	hr_time = ktime_set(0, poll_timeout);
+	hr_time = poll_timeout;
 
 	spin_lock_bh(&ap_poll_timer_lock);
 	hrtimer_cancel(&ap_poll_timer);
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 2583e8b50b21..3d3768aaab4f 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -1694,7 +1694,7 @@ static void srp_snd_msg_failed(struct scsi_info *vscsi, long rc)
 		if (!vscsi->rsp_q_timer.started) {
 			if (vscsi->rsp_q_timer.timer_pops <
 			    MAX_TIMER_POPS) {
-				kt = ktime_set(0, WAIT_NANO_SECONDS);
+				kt = WAIT_NANO_SECONDS;
 			} else {
 				/*
 				 * slide the timeslice if the maximum
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index cf04a364fd8b..03051e12a072 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -4085,7 +4085,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 			jiffies_to_timespec(delta_jiff, &ts);
 			kt = ktime_set(ts.tv_sec, ts.tv_nsec);
 		} else
-			kt = ktime_set(0, sdebug_ndelay);
+			kt = sdebug_ndelay;
 		if (NULL == sd_dp) {
 			sd_dp = kzalloc(sizeof(*sd_dp), GFP_ATOMIC);
 			if (NULL == sd_dp)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index a2c2817fc566..20e5e5fb048c 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -930,7 +930,7 @@ static void ufshcd_clk_scaling_update_busy(struct ufs_hba *hba)
 	if (!hba->outstanding_reqs && scaling->is_busy_started) {
 		scaling->tot_busy_t += ktime_to_us(ktime_sub(ktime_get(),
 					scaling->busy_start_t));
-		scaling->busy_start_t = ktime_set(0, 0);
+		scaling->busy_start_t = 0;
 		scaling->is_busy_started = false;
 	}
 }
@@ -6661,7 +6661,7 @@ start_window:
 		scaling->busy_start_t = ktime_get();
 		scaling->is_busy_started = true;
 	} else {
-		scaling->busy_start_t = ktime_set(0, 0);
+		scaling->busy_start_t = 0;
 		scaling->is_busy_started = false;
 	}
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index e8008fa35e1e..224717e63a53 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1113,8 +1113,7 @@ static struct sk_buff *ncm_wrap_ntb(struct gether *port,
 		}
 
 		/* Delay the timer. */
-		hrtimer_start(&ncm->task_timer,
-			      ktime_set(0, TX_TIMEOUT_NSECS),
+		hrtimer_start(&ncm->task_timer, TX_TIMEOUT_NSECS,
 			      HRTIMER_MODE_REL);
 
 		/* Add the datagram position entries */
diff --git a/drivers/usb/host/ehci-timer.c b/drivers/usb/host/ehci-timer.c
index 262e10cacc8c..3893b5bafd87 100644
--- a/drivers/usb/host/ehci-timer.c
+++ b/drivers/usb/host/ehci-timer.c
@@ -88,8 +88,7 @@ static void ehci_enable_event(struct ehci_hcd *ehci, unsigned event,
 	ktime_t		*timeout = &ehci->hr_timeouts[event];
 
 	if (resched)
-		*timeout = ktime_add(ktime_get(),
-				ktime_set(0, event_delays_ns[event]));
+		*timeout = ktime_add(ktime_get(), event_delays_ns[event]);
 	ehci->enabled_hrtimer_events |= (1 << event);
 
 	/* Track only the lowest-numbered pending event */
diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c
index 4dda56ef06cd..9d0b0518290a 100644
--- a/drivers/usb/host/fotg210-hcd.c
+++ b/drivers/usb/host/fotg210-hcd.c
@@ -1080,8 +1080,7 @@ static void fotg210_enable_event(struct fotg210_hcd *fotg210, unsigned event,
 	ktime_t *timeout = &fotg210->hr_timeouts[event];
 
 	if (resched)
-		*timeout = ktime_add(ktime_get(),
-				ktime_set(0, event_delays_ns[event]));
+		*timeout = ktime_add(ktime_get(), event_delays_ns[event]);
 	fotg210->enabled_hrtimer_events |= (1 << event);
 
 	/* Track only the lowest-numbered pending event */
diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c
index d4d7c56b48c7..16363852c034 100644
--- a/drivers/usb/musb/musb_cppi41.c
+++ b/drivers/usb/musb/musb_cppi41.c
@@ -197,8 +197,7 @@ static enum hrtimer_restart cppi41_recheck_tx_req(struct hrtimer *timer)
 	if (!list_empty(&controller->early_tx_list) &&
 	    !hrtimer_is_queued(&controller->early_tx)) {
 		ret = HRTIMER_RESTART;
-		hrtimer_forward_now(&controller->early_tx,
-				ktime_set(0, 20 * NSEC_PER_USEC));
+		hrtimer_forward_now(&controller->early_tx, 20 * NSEC_PER_USEC);
 	}
 
 	spin_unlock_irqrestore(&musb->lock, flags);
@@ -280,9 +279,9 @@ static void cppi41_dma_callback(void *private_data)
 		unsigned long usecs = cppi41_channel->total_len / 10;
 
 		hrtimer_start_range_ns(&controller->early_tx,
-				ktime_set(0, usecs * NSEC_PER_USEC),
-				20 * NSEC_PER_USEC,
-				HRTIMER_MODE_REL);
+				       usecs * NSEC_PER_USEC,
+				       20 * NSEC_PER_USEC,
+				       HRTIMER_MODE_REL);
 	}
 
 out:
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 35502d4046f5..f631a70407f6 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1395,7 +1395,7 @@ static int nodeid_warned(int nodeid, int num_nodes, int *warned)
 void dlm_scan_waiters(struct dlm_ls *ls)
 {
 	struct dlm_lkb *lkb;
-	ktime_t zero = ktime_set(0, 0);
+	ktime_t zero = 0;
 	s64 us;
 	s64 debug_maxus = 0;
 	u32 debug_scanned = 0;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f7b3ba61add5..94f50cac91c6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -695,7 +695,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	gl->gl_target = LM_ST_UNLOCKED;
 	gl->gl_demote_state = LM_ST_EXCLUSIVE;
 	gl->gl_ops = glops;
-	gl->gl_dstamp = ktime_set(0, 0);
+	gl->gl_dstamp = 0;
 	preempt_disable();
 	/* We use the global stats to estimate the initial per-glock stats */
 	gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
diff --git a/fs/timerfd.c b/fs/timerfd.c
index fb4407a7cf9e..c173cc196175 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -155,7 +155,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
 	else
 		remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 
-	return remaining < 0 ? ktime_set(0, 0): remaining;
+	return remaining < 0 ? 0: remaining;
 }
 
 static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ac7fa34db8a7..b53c0cfd417e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3227,7 +3227,7 @@ static inline ktime_t net_timedelta(ktime_t t)
 
 static inline ktime_t net_invalid_timestamp(void)
 {
-	return ktime_set(0, 0);
+	return 0;
 }
 
 struct sk_buff *skb_clone_sk(struct sk_buff *skb);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 966556ebdbb3..c56fb57f2991 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1456,7 +1456,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		 * yield - it could be a while.
 		 */
 		if (unlikely(queued)) {
-			ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
+			ktime_t to = NSEC_PER_SEC / HZ;
 
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index ab6ac077bdb7..e6dc9a538efa 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -234,7 +234,7 @@ static int alarmtimer_suspend(struct device *dev)
 	min = freezer_delta;
 	expires = freezer_expires;
 	type = freezer_alarmtype;
-	freezer_delta = ktime_set(0, 0);
+	freezer_delta = 0;
 	spin_unlock_irqrestore(&freezer_delta_lock, flags);
 
 	rtc = alarmtimer_get_rtcdev();
@@ -277,7 +277,7 @@ static int alarmtimer_suspend(struct device *dev)
 	now = ktime_add(now, min);
 
 	/* Set alarm, if in the past reject suspend briefly to handle */
-	ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
+	ret = rtc_timer_start(rtc, &rtctimer, now, 0);
 	if (ret < 0)
 		__pm_wakeup_event(ws, MSEC_PER_SEC);
 	return ret;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index c7f780113884..c6ecedd3b839 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -955,7 +955,7 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
 	 */
 	timer->is_rel = mode & HRTIMER_MODE_REL;
 	if (timer->is_rel)
-		tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
+		tim = ktime_add_safe(tim, hrtimer_resolution);
 #endif
 	return tim;
 }
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 9fe98b3777a2..1e6623d76750 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -485,7 +485,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 			 */
 #ifdef CONFIG_HIGH_RES_TIMERS
 			{
-				ktime_t kj = ktime_set(0, NSEC_PER_SEC / HZ);
+				ktime_t kj = NSEC_PER_SEC / HZ;
 
 				if (timr->it.real.interval < kj)
 					now = ktime_add(now, kj);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 4fcd99e12aa0..49edc1c4f3e6 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -178,8 +178,8 @@ static void tick_setup_device(struct tick_device *td,
 			      struct clock_event_device *newdev, int cpu,
 			      const struct cpumask *cpumask)
 {
-	ktime_t next_event;
 	void (*handler)(struct clock_event_device *) = NULL;
+	ktime_t next_event = 0;
 
 	/*
 	 * First device setup ?
@@ -195,7 +195,7 @@ static void tick_setup_device(struct tick_device *td,
 			else
 				tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 			tick_next_period = ktime_get();
-			tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
+			tick_period = NSEC_PER_SEC / HZ;
 		}
 
 		/*
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ab8ba1e16473..21ac75390e3d 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -643,7 +643,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
 		return HRTIMER_RESTART;
 	} else {
 		/* rearm throttle handling */
-		op->kt_lastmsg = ktime_set(0, 0);
+		op->kt_lastmsg = 0;
 		return HRTIMER_NORESTART;
 	}
 }
@@ -1196,7 +1196,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			 * In any case cancel the throttle timer, flush
 			 * potentially blocked msgs and reset throttle handling
 			 */
-			op->kt_lastmsg = ktime_set(0, 0);
+			op->kt_lastmsg = 0;
 			hrtimer_cancel(&op->thrtimer);
 			bcm_rx_thr_flush(op, 1);
 		}
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index f9fd0957ab67..7c03fb0ea34c 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -80,11 +80,11 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
 
 		if (skb->len > max_sifs_size)
 			hrtimer_start(&local->ifs_timer,
-				      ktime_set(0, hw->phy->lifs_period * NSEC_PER_USEC),
+				      hw->phy->lifs_period * NSEC_PER_USEC,
 				      HRTIMER_MODE_REL);
 		else
 			hrtimer_start(&local->ifs_timer,
-				      ktime_set(0, hw->phy->sifs_period * NSEC_PER_USEC),
+				      hw->phy->sifs_period * NSEC_PER_USEC,
 				      HRTIMER_MODE_REL);
 	} else {
 		ieee802154_wake_queue(hw);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 9ffe1c220b02..f1207582cbf3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -509,7 +509,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 	if (delay) {
 		ktime_t time;
 
-		time = ktime_set(0, 0);
+		time = 0;
 		time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
 	}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ce54dce13ddb..a1652ab63918 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
 	 */
 	peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
 
-	peer->last_time_heard = ktime_set(0, 0);
+	peer->last_time_heard = 0;
 	peer->last_time_ecne_reduced = jiffies;
 
 	peer->param_flags = SPP_HB_DISABLE |
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d4ab9a7f3d94..64e3c82eedf6 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1404,7 +1404,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
 	    x->curlft.packets >= x->lft.hard_packet_limit) {
 		x->km.state = XFRM_STATE_EXPIRED;
-		tasklet_hrtimer_start(&x->mtimer, ktime_set(0, 0), HRTIMER_MODE_REL);
+		tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
 		return -EINVAL;
 	}
 
diff --git a/sound/drivers/pcsp/pcsp_lib.c b/sound/drivers/pcsp/pcsp_lib.c
index 3689f5f6be64..aca2d7d5f059 100644
--- a/sound/drivers/pcsp/pcsp_lib.c
+++ b/sound/drivers/pcsp/pcsp_lib.c
@@ -166,7 +166,7 @@ static int pcsp_start_playing(struct snd_pcsp *chip)
 	atomic_set(&chip->timer_active, 1);
 	chip->thalf = 0;
 
-	hrtimer_start(&pcsp_chip.timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+	hrtimer_start(&pcsp_chip.timer, 0, HRTIMER_MODE_REL);
 	return 0;
 }
 
diff --git a/sound/firewire/lib.c b/sound/firewire/lib.c
index ca4dfcf43175..7683238283b6 100644
--- a/sound/firewire/lib.c
+++ b/sound/firewire/lib.c
@@ -114,7 +114,7 @@ static void async_midi_port_callback(struct fw_card *card, int rcode,
 		snd_rawmidi_transmit_ack(substream, port->consume_bytes);
 	else if (!rcode_is_permanent_error(rcode))
 		/* To start next transaction immediately for recovery. */
-		port->next_ktime = ktime_set(0, 0);
+		port->next_ktime = 0;
 	else
 		/* Don't continue processing. */
 		port->error = true;
@@ -156,7 +156,7 @@ static void midi_port_work(struct work_struct *work)
 	if (port->consume_bytes <= 0) {
 		/* Do it in next chance, immediately. */
 		if (port->consume_bytes == 0) {
-			port->next_ktime = ktime_set(0, 0);
+			port->next_ktime = 0;
 			schedule_work(&port->work);
 		} else {
 			/* Fatal error. */
@@ -219,7 +219,7 @@ int snd_fw_async_midi_port_init(struct snd_fw_async_midi_port *port,
 	port->addr = addr;
 	port->fill = fill;
 	port->idling = true;
-	port->next_ktime = ktime_set(0, 0);
+	port->next_ktime = 0;
 	port->error = false;
 
 	INIT_WORK(&port->work, midi_port_work);
diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c
index abf9c0cab1e2..461b310c7872 100644
--- a/sound/sh/sh_dac_audio.c
+++ b/sound/sh/sh_dac_audio.c
@@ -87,7 +87,7 @@ static void dac_audio_reset(struct snd_sh_dac *chip)
 
 static void dac_audio_set_rate(struct snd_sh_dac *chip)
 {
-	chip->wakeups_per_second = ktime_set(0, 1000000000 / chip->rate);
+	chip->wakeups_per_second = 1000000000 / chip->rate;
 }
 
 
-- 
cgit v1.2.3


From 1f3a8e49d8f28f498b8694464623ac20aebfe62a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Dec 2016 12:43:07 +0100
Subject: ktime: Get rid of ktime_equal()

No point in going through loops and hoops instead of just comparing the
values.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 fs/dlm/lock.c                          |  5 ++---
 fs/nfs/flexfilelayout/flexfilelayout.c |  2 +-
 include/linux/ktime.h                  | 15 ---------------
 net/ipv6/mip6.c                        |  2 +-
 4 files changed, 4 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index f631a70407f6..6df332296c66 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1395,7 +1395,6 @@ static int nodeid_warned(int nodeid, int num_nodes, int *warned)
 void dlm_scan_waiters(struct dlm_ls *ls)
 {
 	struct dlm_lkb *lkb;
-	ktime_t zero = 0;
 	s64 us;
 	s64 debug_maxus = 0;
 	u32 debug_scanned = 0;
@@ -1409,7 +1408,7 @@ void dlm_scan_waiters(struct dlm_ls *ls)
 	mutex_lock(&ls->ls_waiters_mutex);
 
 	list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
-		if (ktime_equal(lkb->lkb_wait_time, zero))
+		if (!lkb->lkb_wait_time)
 			continue;
 
 		debug_scanned++;
@@ -1419,7 +1418,7 @@ void dlm_scan_waiters(struct dlm_ls *ls)
 		if (us < dlm_config.ci_waitwarn_us)
 			continue;
 
-		lkb->lkb_wait_time = zero;
+		lkb->lkb_wait_time = 0;
 
 		debug_expired++;
 		if (us > debug_maxus)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index c98f6db9aa6b..0ca4af8cca5d 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -623,7 +623,7 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
 	struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
 
 	nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
-	if (ktime_equal(mirror->start_time, 0))
+	if (!mirror->start_time)
 		mirror->start_time = now;
 	if (mirror->report_interval != 0)
 		report_interval = (s64)mirror->report_interval * 1000LL;
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 8e573deda55e..0c8bd45c8206 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -96,21 +96,6 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
 /* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
 #define ktime_to_ns(kt)			(kt)
 
-
-/**
- * ktime_equal - Compares two ktime_t variables to see if they are equal
- * @cmp1:	comparable1
- * @cmp2:	comparable2
- *
- * Compare two ktime_t variables.
- *
- * Return: 1 if equal.
- */
-static inline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
-{
-	return cmp1 == cmp2;
-}
-
 /**
  * ktime_compare - Compares two ktime_t variables for less, greater or equal
  * @cmp1:	comparable1
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 60c79a08e14a..64f0f7be9e5e 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -191,7 +191,7 @@ static inline int mip6_report_rl_allow(ktime_t stamp,
 	int allow = 0;
 
 	spin_lock_bh(&mip6_report_rl.lock);
-	if (!ktime_equal(mip6_report_rl.stamp, stamp) ||
+	if (mip6_report_rl.stamp != stamp ||
 	    mip6_report_rl.iif != iif ||
 	    !ipv6_addr_equal(&mip6_report_rl.src, src) ||
 	    !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
-- 
cgit v1.2.3


From 6326fec1122cde256bd2a8c63f2606e08e44ce1d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 25 Dec 2016 13:00:29 +1000
Subject: mm: Use owner_priv bit for PageSwapCache, valid when PageSwapBacked

A page is not added to the swap cache without being swap backed,
so PageSwapBacked mappings can use PG_owner_priv_1 for PageSwapCache.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Bob Peterson <rpeterso@redhat.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h     | 24 ++++++++++++++++--------
 include/trace/events/mmflags.h |  1 -
 mm/memory-failure.c            |  4 +---
 mm/migrate.c                   | 14 ++++++++------
 4 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 74e4dda91238..a57c909a15e4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -87,7 +87,6 @@ enum pageflags {
 	PG_private_2,		/* If pagecache, has fs aux data */
 	PG_writeback,		/* Page is under writeback */
 	PG_head,		/* A head page */
-	PG_swapcache,		/* Swap page: swp_entry_t in private */
 	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 	PG_reclaim,		/* To be reclaimed asap */
 	PG_swapbacked,		/* Page is backed by RAM/swap */
@@ -110,6 +109,9 @@ enum pageflags {
 	/* Filesystems */
 	PG_checked = PG_owner_priv_1,
 
+	/* SwapBacked */
+	PG_swapcache = PG_owner_priv_1,	/* Swap page: swp_entry_t in private */
+
 	/* Two page bits are conscripted by FS-Cache to maintain local caching
 	 * state.  These bits are set on pages belonging to the netfs's inodes
 	 * when those inodes are being locally cached.
@@ -314,7 +316,13 @@ PAGEFLAG_FALSE(HighMem)
 #endif
 
 #ifdef CONFIG_SWAP
-PAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
+static __always_inline int PageSwapCache(struct page *page)
+{
+	return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags);
+
+}
+SETPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
+CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
 #else
 PAGEFLAG_FALSE(SwapCache)
 #endif
@@ -701,12 +709,12 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
  * Flags checked when a page is freed.  Pages being freed should not have
  * these flags set.  It they are, there is a problem.
  */
-#define PAGE_FLAGS_CHECK_AT_FREE \
-	(1UL << PG_lru	 | 1UL << PG_locked    | \
-	 1UL << PG_private | 1UL << PG_private_2 | \
-	 1UL << PG_writeback | 1UL << PG_reserved | \
-	 1UL << PG_slab	 | 1UL << PG_swapcache | 1UL << PG_active | \
-	 1UL << PG_unevictable | __PG_MLOCKED)
+#define PAGE_FLAGS_CHECK_AT_FREE				\
+	(1UL << PG_lru		| 1UL << PG_locked	|	\
+	 1UL << PG_private	| 1UL << PG_private_2	|	\
+	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
+	 1UL << PG_slab		| 1UL << PG_active 	|	\
+	 1UL << PG_unevictable	| __PG_MLOCKED)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 5a81ab48a2fb..30c2adbdebe8 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -95,7 +95,6 @@
 	{1UL << PG_private_2,		"private_2"	},		\
 	{1UL << PG_writeback,		"writeback"	},		\
 	{1UL << PG_head,		"head"		},		\
-	{1UL << PG_swapcache,		"swapcache"	},		\
 	{1UL << PG_mappedtodisk,	"mappedtodisk"	},		\
 	{1UL << PG_reclaim,		"reclaim"	},		\
 	{1UL << PG_swapbacked,		"swapbacked"	},		\
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 19e796d36a62..f283c7e0a2a3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -764,12 +764,11 @@ static int me_huge_page(struct page *p, unsigned long pfn)
  */
 
 #define dirty		(1UL << PG_dirty)
-#define sc		(1UL << PG_swapcache)
+#define sc		((1UL << PG_swapcache) | (1UL << PG_swapbacked))
 #define unevict		(1UL << PG_unevictable)
 #define mlock		(1UL << PG_mlocked)
 #define writeback	(1UL << PG_writeback)
 #define lru		(1UL << PG_lru)
-#define swapbacked	(1UL << PG_swapbacked)
 #define head		(1UL << PG_head)
 #define slab		(1UL << PG_slab)
 #define reserved	(1UL << PG_reserved)
@@ -819,7 +818,6 @@ static struct page_state {
 #undef mlock
 #undef writeback
 #undef lru
-#undef swapbacked
 #undef head
 #undef slab
 #undef reserved
diff --git a/mm/migrate.c b/mm/migrate.c
index 0ed24b1fa77b..87f4d0f81819 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -466,13 +466,15 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	 */
 	newpage->index = page->index;
 	newpage->mapping = page->mapping;
-	if (PageSwapBacked(page))
-		__SetPageSwapBacked(newpage);
-
 	get_page(newpage);	/* add cache reference */
-	if (PageSwapCache(page)) {
-		SetPageSwapCache(newpage);
-		set_page_private(newpage, page_private(page));
+	if (PageSwapBacked(page)) {
+		__SetPageSwapBacked(newpage);
+		if (PageSwapCache(page)) {
+			SetPageSwapCache(newpage);
+			set_page_private(newpage, page_private(page));
+		}
+	} else {
+		VM_BUG_ON_PAGE(PageSwapCache(page), page);
 	}
 
 	/* Move dirty while page refs frozen and newpage not yet exposed */
-- 
cgit v1.2.3


From 62906027091f1d02de44041524f0769f60bb9cf3 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 25 Dec 2016 13:00:30 +1000
Subject: mm: add PageWaiters indicating tasks are waiting for a page bit

Add a new page flag, PageWaiters, to indicate the page waitqueue has
tasks waiting. This can be tested rather than testing waitqueue_active
which requires another cacheline load.

This bit is always set when the page has tasks on page_waitqueue(page),
and is set and cleared under the waitqueue lock. It may be set when
there are no tasks on the waitqueue, which will cause a harmless extra
wakeup check that will clears the bit.

The generic bit-waitqueue infrastructure is no longer used for pages.
Instead, waitqueues are used directly with a custom key type. The
generic code was not flexible enough to have PageWaiters manipulation
under the waitqueue lock (which simplifies concurrency).

This improves the performance of page lock intensive microbenchmarks by
2-3%.

Putting two bits in the same word opens the opportunity to remove the
memory barrier between clearing the lock bit and testing the waiters
bit, after some work on the arch primitives (e.g., ensuring memory
operand widths match and cover both bits).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Bob Peterson <rpeterso@redhat.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h             |   2 +
 include/linux/page-flags.h     |   9 ++
 include/linux/pagemap.h        |  23 +++---
 include/linux/writeback.h      |   1 -
 include/trace/events/mmflags.h |   1 +
 init/main.c                    |   3 +-
 mm/filemap.c                   | 181 +++++++++++++++++++++++++++++++++--------
 mm/internal.h                  |   2 +
 mm/swap.c                      |   2 +
 9 files changed, 174 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4424784ac374..fe6b4036664a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1758,6 +1758,8 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
 	return ptl;
 }
 
+extern void __init pagecache_init(void);
+
 extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, unsigned long * zones_size,
 		unsigned long zone_start_pfn, unsigned long *zholes_size);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a57c909a15e4..c56b39890a41 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -73,6 +73,7 @@
  */
 enum pageflags {
 	PG_locked,		/* Page is locked. Don't touch. */
+	PG_waiters,		/* Page has waiters, check its waitqueue */
 	PG_error,
 	PG_referenced,
 	PG_uptodate,
@@ -169,6 +170,9 @@ static __always_inline int PageCompound(struct page *page)
  *     for compound page all operations related to the page flag applied to
  *     head page.
  *
+ * PF_ONLY_HEAD:
+ *     for compound page, callers only ever operate on the head page.
+ *
  * PF_NO_TAIL:
  *     modifications of the page flag must be done on small or head pages,
  *     checks can be done on tail pages too.
@@ -178,6 +182,9 @@ static __always_inline int PageCompound(struct page *page)
  */
 #define PF_ANY(page, enforce)	page
 #define PF_HEAD(page, enforce)	compound_head(page)
+#define PF_ONLY_HEAD(page, enforce) ({					\
+		VM_BUG_ON_PGFLAGS(PageTail(page), page);		\
+		page;})
 #define PF_NO_TAIL(page, enforce) ({					\
 		VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page);	\
 		compound_head(page);})
@@ -255,6 +262,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
 	TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
 
 __PAGEFLAG(Locked, locked, PF_NO_TAIL)
+PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
 PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
 PAGEFLAG(Referenced, referenced, PF_HEAD)
 	TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
@@ -743,6 +751,7 @@ static inline int page_has_private(struct page *page)
 
 #undef PF_ANY
 #undef PF_HEAD
+#undef PF_ONLY_HEAD
 #undef PF_NO_TAIL
 #undef PF_NO_COMPOUND
 #endif /* !__GENERATING_BOUNDS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index f29f80f81dbf..324c8dbad1e1 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -486,22 +486,14 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
  * and for filesystems which need to wait on PG_private.
  */
 extern void wait_on_page_bit(struct page *page, int bit_nr);
-
 extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
-extern int wait_on_page_bit_killable_timeout(struct page *page,
-					     int bit_nr, unsigned long timeout);
-
-static inline int wait_on_page_locked_killable(struct page *page)
-{
-	if (!PageLocked(page))
-		return 0;
-	return wait_on_page_bit_killable(compound_head(page), PG_locked);
-}
+extern void wake_up_page_bit(struct page *page, int bit_nr);
 
-extern wait_queue_head_t *page_waitqueue(struct page *page);
 static inline void wake_up_page(struct page *page, int bit)
 {
-	__wake_up_bit(page_waitqueue(page), &page->flags, bit);
+	if (!PageWaiters(page))
+		return;
+	wake_up_page_bit(page, bit);
 }
 
 /* 
@@ -517,6 +509,13 @@ static inline void wait_on_page_locked(struct page *page)
 		wait_on_page_bit(compound_head(page), PG_locked);
 }
 
+static inline int wait_on_page_locked_killable(struct page *page)
+{
+	if (!PageLocked(page))
+		return 0;
+	return wait_on_page_bit_killable(compound_head(page), PG_locked);
+}
+
 /* 
  * Wait for a page to complete writeback
  */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c78f9f0920b5..5527d910ba3d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -375,7 +375,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
 
 void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
-void page_writeback_init(void);
 void balance_dirty_pages_ratelimited(struct address_space *mapping);
 bool wb_over_bg_thresh(struct bdi_writeback *wb);
 
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 30c2adbdebe8..9e687ca9a307 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -81,6 +81,7 @@
 
 #define __def_pageflag_names						\
 	{1UL << PG_locked,		"locked"	},		\
+	{1UL << PG_waiters,		"waiters"	},		\
 	{1UL << PG_error,		"error"		},		\
 	{1UL << PG_referenced,		"referenced"	},		\
 	{1UL << PG_uptodate,		"uptodate"	},		\
diff --git a/init/main.c b/init/main.c
index c81c9fa21bc7..b0c9d6facef9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -647,9 +647,8 @@ asmlinkage __visible void __init start_kernel(void)
 	security_init();
 	dbg_late_init();
 	vfs_caches_init();
+	pagecache_init();
 	signals_init();
-	/* rootfs populating might need page-writeback */
-	page_writeback_init();
 	proc_root_init();
 	nsfs_init();
 	cpuset_init();
diff --git a/mm/filemap.c b/mm/filemap.c
index 32be3c8f3a11..82f26cde830c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -739,45 +739,159 @@ EXPORT_SYMBOL(__page_cache_alloc);
  * at a cost of "thundering herd" phenomena during rare hash
  * collisions.
  */
-wait_queue_head_t *page_waitqueue(struct page *page)
+#define PAGE_WAIT_TABLE_BITS 8
+#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
+static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
+
+static wait_queue_head_t *page_waitqueue(struct page *page)
 {
-	return bit_waitqueue(page, 0);
+	return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
 }
-EXPORT_SYMBOL(page_waitqueue);
 
-void wait_on_page_bit(struct page *page, int bit_nr)
+void __init pagecache_init(void)
 {
-	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+	int i;
 
-	if (test_bit(bit_nr, &page->flags))
-		__wait_on_bit(page_waitqueue(page), &wait, bit_wait_io,
-							TASK_UNINTERRUPTIBLE);
+	for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
+		init_waitqueue_head(&page_wait_table[i]);
+
+	page_writeback_init();
 }
-EXPORT_SYMBOL(wait_on_page_bit);
 
-int wait_on_page_bit_killable(struct page *page, int bit_nr)
+struct wait_page_key {
+	struct page *page;
+	int bit_nr;
+	int page_match;
+};
+
+struct wait_page_queue {
+	struct page *page;
+	int bit_nr;
+	wait_queue_t wait;
+};
+
+static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
 {
-	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+	struct wait_page_key *key = arg;
+	struct wait_page_queue *wait_page
+		= container_of(wait, struct wait_page_queue, wait);
+
+	if (wait_page->page != key->page)
+	       return 0;
+	key->page_match = 1;
 
-	if (!test_bit(bit_nr, &page->flags))
+	if (wait_page->bit_nr != key->bit_nr)
+		return 0;
+	if (test_bit(key->bit_nr, &key->page->flags))
 		return 0;
 
-	return __wait_on_bit(page_waitqueue(page), &wait,
-			     bit_wait_io, TASK_KILLABLE);
+	return autoremove_wake_function(wait, mode, sync, key);
 }
 
-int wait_on_page_bit_killable_timeout(struct page *page,
-				       int bit_nr, unsigned long timeout)
+void wake_up_page_bit(struct page *page, int bit_nr)
 {
-	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+	wait_queue_head_t *q = page_waitqueue(page);
+	struct wait_page_key key;
+	unsigned long flags;
 
-	wait.key.timeout = jiffies + timeout;
-	if (!test_bit(bit_nr, &page->flags))
-		return 0;
-	return __wait_on_bit(page_waitqueue(page), &wait,
-			     bit_wait_io_timeout, TASK_KILLABLE);
+	key.page = page;
+	key.bit_nr = bit_nr;
+	key.page_match = 0;
+
+	spin_lock_irqsave(&q->lock, flags);
+	__wake_up_locked_key(q, TASK_NORMAL, &key);
+	/*
+	 * It is possible for other pages to have collided on the waitqueue
+	 * hash, so in that case check for a page match. That prevents a long-
+	 * term waiter
+	 *
+	 * It is still possible to miss a case here, when we woke page waiters
+	 * and removed them from the waitqueue, but there are still other
+	 * page waiters.
+	 */
+	if (!waitqueue_active(q) || !key.page_match) {
+		ClearPageWaiters(page);
+		/*
+		 * It's possible to miss clearing Waiters here, when we woke
+		 * our page waiters, but the hashed waitqueue has waiters for
+		 * other pages on it.
+		 *
+		 * That's okay, it's a rare case. The next waker will clear it.
+		 */
+	}
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(wake_up_page_bit);
+
+static inline int wait_on_page_bit_common(wait_queue_head_t *q,
+		struct page *page, int bit_nr, int state, bool lock)
+{
+	struct wait_page_queue wait_page;
+	wait_queue_t *wait = &wait_page.wait;
+	int ret = 0;
+
+	init_wait(wait);
+	wait->func = wake_page_function;
+	wait_page.page = page;
+	wait_page.bit_nr = bit_nr;
+
+	for (;;) {
+		spin_lock_irq(&q->lock);
+
+		if (likely(list_empty(&wait->task_list))) {
+			if (lock)
+				__add_wait_queue_tail_exclusive(q, wait);
+			else
+				__add_wait_queue(q, wait);
+			SetPageWaiters(page);
+		}
+
+		set_current_state(state);
+
+		spin_unlock_irq(&q->lock);
+
+		if (likely(test_bit(bit_nr, &page->flags))) {
+			io_schedule();
+			if (unlikely(signal_pending_state(state, current))) {
+				ret = -EINTR;
+				break;
+			}
+		}
+
+		if (lock) {
+			if (!test_and_set_bit_lock(bit_nr, &page->flags))
+				break;
+		} else {
+			if (!test_bit(bit_nr, &page->flags))
+				break;
+		}
+	}
+
+	finish_wait(q, wait);
+
+	/*
+	 * A signal could leave PageWaiters set. Clearing it here if
+	 * !waitqueue_active would be possible (by open-coding finish_wait),
+	 * but still fail to catch it in the case of wait hash collision. We
+	 * already can fail to clear wait hash collision cases, so don't
+	 * bother with signals either.
+	 */
+
+	return ret;
+}
+
+void wait_on_page_bit(struct page *page, int bit_nr)
+{
+	wait_queue_head_t *q = page_waitqueue(page);
+	wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false);
+}
+EXPORT_SYMBOL(wait_on_page_bit);
+
+int wait_on_page_bit_killable(struct page *page, int bit_nr)
+{
+	wait_queue_head_t *q = page_waitqueue(page);
+	return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
 }
-EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
 
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
@@ -793,6 +907,7 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
 
 	spin_lock_irqsave(&q->lock, flags);
 	__add_wait_queue(q, waiter);
+	SetPageWaiters(page);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL_GPL(add_page_wait_queue);
@@ -874,23 +989,19 @@ EXPORT_SYMBOL_GPL(page_endio);
  * __lock_page - get a lock on the page, assuming we need to sleep to get it
  * @page: the page to lock
  */
-void __lock_page(struct page *page)
+void __lock_page(struct page *__page)
 {
-	struct page *page_head = compound_head(page);
-	DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
-
-	__wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io,
-							TASK_UNINTERRUPTIBLE);
+	struct page *page = compound_head(__page);
+	wait_queue_head_t *q = page_waitqueue(page);
+	wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true);
 }
 EXPORT_SYMBOL(__lock_page);
 
-int __lock_page_killable(struct page *page)
+int __lock_page_killable(struct page *__page)
 {
-	struct page *page_head = compound_head(page);
-	DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
-
-	return __wait_on_bit_lock(page_waitqueue(page_head), &wait,
-					bit_wait_io, TASK_KILLABLE);
+	struct page *page = compound_head(__page);
+	wait_queue_head_t *q = page_waitqueue(page);
+	return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true);
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
diff --git a/mm/internal.h b/mm/internal.h
index 44d68895a9b9..7aa2ea0a8623 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -36,6 +36,8 @@
 /* Do not use these with a slab allocator */
 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
+void page_writeback_init(void);
+
 int do_swap_page(struct vm_fault *vmf);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
diff --git a/mm/swap.c b/mm/swap.c
index 4dcf852e1e6d..844baedd2429 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -69,6 +69,7 @@ static void __page_cache_release(struct page *page)
 		del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		spin_unlock_irqrestore(zone_lru_lock(zone), flags);
 	}
+	__ClearPageWaiters(page);
 	mem_cgroup_uncharge(page);
 }
 
@@ -784,6 +785,7 @@ void release_pages(struct page **pages, int nr, bool cold)
 
 		/* Clear Active bit in case of parallel mark_page_accessed */
 		__ClearPageActive(page);
+		__ClearPageWaiters(page);
 
 		list_add(&page->lru, &pages_to_free);
 	}
-- 
cgit v1.2.3